From 223ae6aa0b14c071d5719ada0cc6b43e9199a81b Mon Sep 17 00:00:00 2001 From: Joris Date: Tue, 3 Sep 2019 23:25:39 +0200 Subject: Use wreq instead of http-conduit to maintain cookies in a session --- src/lib/haskell/FetchAd.hs | 20 ++++++++++---------- src/lib/haskell/Utils/HTTP.hs | 35 +++++++++++++++++------------------ 2 files changed, 27 insertions(+), 28 deletions(-) (limited to 'src/lib') diff --git a/src/lib/haskell/FetchAd.hs b/src/lib/haskell/FetchAd.hs index 1708fe4..c143d3c 100644 --- a/src/lib/haskell/FetchAd.hs +++ b/src/lib/haskell/FetchAd.hs @@ -5,7 +5,7 @@ module FetchAd ) where import Data.Either (rights) -import Network.HTTP.Conduit (Manager) +import Network.Wreq.Session (Session) import Model.Ad (Ad) import Model.URL (URL) @@ -14,23 +14,23 @@ import qualified Parser.OuestFranceParser as OuestFranceParser import qualified Parser.SeLogerParser as SeLogerParser import qualified Utils.HTTP as HTTP -leboncoin :: Manager -> [URL] -> IO [Ad] -leboncoin manager urls = +leboncoin :: Session -> [URL] -> IO [Ad] +leboncoin session urls = fmap (concat . map LeboncoinParser.parse . rights) . sequence - . map (HTTP.get manager) + . map (HTTP.get session) $ urls -ouestFrance :: Manager -> [URL] -> IO [Ad] -ouestFrance manager urls = +ouestFrance :: Session -> [URL] -> IO [Ad] +ouestFrance session urls = fmap (concat . map OuestFranceParser.parse . rights) . sequence - . map (HTTP.get manager) + . map (HTTP.get session) $ urls -seLoger :: Manager -> [URL] -> IO [Ad] -seLoger manager urls = +seLoger :: Session -> [URL] -> IO [Ad] +seLoger session urls = fmap (concat . map SeLogerParser.parse . rights) . sequence - . map (HTTP.get manager) + . map (HTTP.get session) $ urls diff --git a/src/lib/haskell/Utils/HTTP.hs b/src/lib/haskell/Utils/HTTP.hs index 9bcf5f0..d441f86 100644 --- a/src/lib/haskell/Utils/HTTP.hs +++ b/src/lib/haskell/Utils/HTTP.hs @@ -2,25 +2,25 @@ module Utils.HTTP ( get ) where -import qualified Data.ByteString.Lazy as BS -import Data.Text (Text) -import qualified Data.Text as T -import Data.Text.Encoding as T -import Data.Text.IO as T -import Network.HTTP.Conduit (Manager) -import qualified Network.HTTP.Conduit as H -import qualified Network.HTTP.Simple as HS -import qualified Network.HTTP.Types.Status as Status +import Control.Lens ((^.)) +import qualified Data.ByteString.Lazy as BS +import Data.Text (Text) +import qualified Data.Text as T +import Data.Text.Encoding as T +import qualified Data.Text.IO as T +import qualified Network.Wreq as Wreq +import Network.Wreq.Session (Session) +import qualified Network.Wreq.Session as Session +import qualified Network.Wreq.Types as Types import Model.URL -get :: Manager -> URL -> IO (Either Text Text) -get manager url = do - request <- H.parseRequest (T.unpack url) - - response <- H.httpLbs (HS.setRequestHeaders requestHeaders request) manager - let body = T.decodeUtf8 . BS.toStrict . H.responseBody $ response - let statusCode = Status.statusCode . H.responseStatus $ response +get :: Session -> URL -> IO (Either Text Text) +get session url = do + let options = Wreq.defaults { Types.headers = headers } + response <- Session.getWith options session (T.unpack url) + let body = T.decodeUtf8 . BS.toStrict $ response ^. Wreq.responseBody + let statusCode = response ^. Wreq.responseStatus ^. Wreq.statusCode if statusCode >= 200 && statusCode < 300 then return . Right $ body @@ -36,11 +36,10 @@ get manager url = do return . Left $ body where - requestHeaders = + headers = [ ("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0") , ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") , ("Accept-Language", "en-US,en;fr;q=0.5") - , ("Accept-Encoding", "gzip, deflate, br") , ("Referer", "https://duckduckgo.com/") , ("DNT", "1") , ("Connection", "keep-alive") -- cgit v1.2.3