From 223ae6aa0b14c071d5719ada0cc6b43e9199a81b Mon Sep 17 00:00:00 2001 From: Joris Date: Tue, 3 Sep 2019 23:25:39 +0200 Subject: Use wreq instead of http-conduit to maintain cookies in a session --- src/executable/haskell/Main.hs | 6 ++--- src/executable/haskell/Service/AdListener.hs | 28 +++++++++++----------- src/lib/haskell/FetchAd.hs | 20 ++++++++-------- src/lib/haskell/Utils/HTTP.hs | 35 ++++++++++++++-------------- src/test/haskell/Main.hs | 6 ++--- 5 files changed, 47 insertions(+), 48 deletions(-) (limited to 'src') diff --git a/src/executable/haskell/Main.hs b/src/executable/haskell/Main.hs index d082b94..8c0f37f 100644 --- a/src/executable/haskell/Main.hs +++ b/src/executable/haskell/Main.hs @@ -2,7 +2,7 @@ module Main ( main ) where -import qualified Network.HTTP.Conduit as H +import qualified Network.Wreq.Session as Session import qualified Conf import qualified Service.AdListener as AdListener @@ -10,5 +10,5 @@ import qualified Service.AdListener as AdListener main :: IO () main = do conf <- Conf.parse "application.conf" - manager <- H.newManager H.tlsManagerSettings - AdListener.start conf manager + session <- Session.newSession + AdListener.start conf session diff --git a/src/executable/haskell/Service/AdListener.hs b/src/executable/haskell/Service/AdListener.hs index 5cf26d1..c393f38 100644 --- a/src/executable/haskell/Service/AdListener.hs +++ b/src/executable/haskell/Service/AdListener.hs @@ -5,7 +5,7 @@ module Service.AdListener import Control.Concurrent (threadDelay) import qualified Data.Text as T import qualified Data.Text.IO as T -import Network.HTTP.Conduit (Manager) +import Network.Wreq.Session (Session) import Prelude hiding (error) import Conf (Conf) @@ -19,17 +19,17 @@ import qualified Service.MailService as MailService import qualified Utils.Time as TimeUtils import qualified View.Ad as Ad -start :: Conf -> Manager -> IO () -start conf manager = do - ads <- fetchAds conf manager +start :: Conf -> Session -> IO () +start conf session = do + ads <- fetchAds conf session let newURLs = map Ad.url ads T.putStrLn "Listening to new ads…" waitListenInterval conf - listenToNewAdsWithViewedURLs conf manager newURLs + listenToNewAdsWithViewedURLs conf session newURLs -listenToNewAdsWithViewedURLs :: Conf -> Manager -> [URL] -> IO () -listenToNewAdsWithViewedURLs conf manager viewedURLs = do - ads <- fetchAds conf manager +listenToNewAdsWithViewedURLs :: Conf -> Session -> [URL] -> IO () +listenToNewAdsWithViewedURLs conf session viewedURLs = do + ads <- fetchAds conf session let (newURLs, newAds) = Ad.getNewAds viewedURLs ads time <- TimeUtils.getCurrentFormattedTime if not (null newAds) @@ -40,13 +40,13 @@ listenToNewAdsWithViewedURLs conf manager viewedURLs = do else return () waitListenInterval conf - listenToNewAdsWithViewedURLs conf manager (viewedURLs ++ newURLs) + listenToNewAdsWithViewedURLs conf session (viewedURLs ++ newURLs) -fetchAds :: Conf -> Manager -> IO [Ad] -fetchAds conf manager = do - leboncoinAds <- FetchAd.leboncoin manager (Conf.leboncoinUrls conf) - ouestFranceAds <- FetchAd.ouestFrance manager (Conf.ouestFranceUrls conf) - seLogerAds <- FetchAd.seLoger manager (Conf.seLogerUrls conf) +fetchAds :: Conf -> Session -> IO [Ad] +fetchAds conf session = do + leboncoinAds <- FetchAd.leboncoin session (Conf.leboncoinUrls conf) + ouestFranceAds <- FetchAd.ouestFrance session (Conf.ouestFranceUrls conf) + seLogerAds <- FetchAd.seLoger session (Conf.seLogerUrls conf) let results = leboncoinAds ++ ouestFranceAds ++ seLogerAds T.putStrLn . T.concat $ [ "Parsed " diff --git a/src/lib/haskell/FetchAd.hs b/src/lib/haskell/FetchAd.hs index 1708fe4..c143d3c 100644 --- a/src/lib/haskell/FetchAd.hs +++ b/src/lib/haskell/FetchAd.hs @@ -5,7 +5,7 @@ module FetchAd ) where import Data.Either (rights) -import Network.HTTP.Conduit (Manager) +import Network.Wreq.Session (Session) import Model.Ad (Ad) import Model.URL (URL) @@ -14,23 +14,23 @@ import qualified Parser.OuestFranceParser as OuestFranceParser import qualified Parser.SeLogerParser as SeLogerParser import qualified Utils.HTTP as HTTP -leboncoin :: Manager -> [URL] -> IO [Ad] -leboncoin manager urls = +leboncoin :: Session -> [URL] -> IO [Ad] +leboncoin session urls = fmap (concat . map LeboncoinParser.parse . rights) . sequence - . map (HTTP.get manager) + . map (HTTP.get session) $ urls -ouestFrance :: Manager -> [URL] -> IO [Ad] -ouestFrance manager urls = +ouestFrance :: Session -> [URL] -> IO [Ad] +ouestFrance session urls = fmap (concat . map OuestFranceParser.parse . rights) . sequence - . map (HTTP.get manager) + . map (HTTP.get session) $ urls -seLoger :: Manager -> [URL] -> IO [Ad] -seLoger manager urls = +seLoger :: Session -> [URL] -> IO [Ad] +seLoger session urls = fmap (concat . map SeLogerParser.parse . rights) . sequence - . map (HTTP.get manager) + . map (HTTP.get session) $ urls diff --git a/src/lib/haskell/Utils/HTTP.hs b/src/lib/haskell/Utils/HTTP.hs index 9bcf5f0..d441f86 100644 --- a/src/lib/haskell/Utils/HTTP.hs +++ b/src/lib/haskell/Utils/HTTP.hs @@ -2,25 +2,25 @@ module Utils.HTTP ( get ) where -import qualified Data.ByteString.Lazy as BS -import Data.Text (Text) -import qualified Data.Text as T -import Data.Text.Encoding as T -import Data.Text.IO as T -import Network.HTTP.Conduit (Manager) -import qualified Network.HTTP.Conduit as H -import qualified Network.HTTP.Simple as HS -import qualified Network.HTTP.Types.Status as Status +import Control.Lens ((^.)) +import qualified Data.ByteString.Lazy as BS +import Data.Text (Text) +import qualified Data.Text as T +import Data.Text.Encoding as T +import qualified Data.Text.IO as T +import qualified Network.Wreq as Wreq +import Network.Wreq.Session (Session) +import qualified Network.Wreq.Session as Session +import qualified Network.Wreq.Types as Types import Model.URL -get :: Manager -> URL -> IO (Either Text Text) -get manager url = do - request <- H.parseRequest (T.unpack url) - - response <- H.httpLbs (HS.setRequestHeaders requestHeaders request) manager - let body = T.decodeUtf8 . BS.toStrict . H.responseBody $ response - let statusCode = Status.statusCode . H.responseStatus $ response +get :: Session -> URL -> IO (Either Text Text) +get session url = do + let options = Wreq.defaults { Types.headers = headers } + response <- Session.getWith options session (T.unpack url) + let body = T.decodeUtf8 . BS.toStrict $ response ^. Wreq.responseBody + let statusCode = response ^. Wreq.responseStatus ^. Wreq.statusCode if statusCode >= 200 && statusCode < 300 then return . Right $ body @@ -36,11 +36,10 @@ get manager url = do return . Left $ body where - requestHeaders = + headers = [ ("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0") , ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") , ("Accept-Language", "en-US,en;fr;q=0.5") - , ("Accept-Encoding", "gzip, deflate, br") , ("Referer", "https://duckduckgo.com/") , ("DNT", "1") , ("Connection", "keep-alive") diff --git a/src/test/haskell/Main.hs b/src/test/haskell/Main.hs index ba6d466..541cc54 100644 --- a/src/test/haskell/Main.hs +++ b/src/test/haskell/Main.hs @@ -1,6 +1,6 @@ import Data.Maybe (catMaybes) import qualified Data.Text.IO as T -import qualified Network.HTTP.Conduit as H +import qualified Network.Wreq.Session as Session import Test.Hspec import qualified Ads @@ -12,7 +12,7 @@ import qualified Parser.LeboncoinParser as LeboncoinParser main :: IO () main = do - manager <- H.newManager H.tlsManagerSettings + session <- Session.newSession hspec $ do describe "LeboncoinParser" $ do @@ -26,7 +26,7 @@ main = do it "should parse ads from remote page" $ do ads <- FetchAd.leboncoin - manager + session ["https://www.leboncoin.fr/annonces/offres/ile_de_france/"] checkAds ads -- cgit v1.2.3