diff options
author | Joris | 2019-09-03 23:25:39 +0200 |
---|---|---|
committer | Joris | 2019-09-03 23:25:39 +0200 |
commit | 223ae6aa0b14c071d5719ada0cc6b43e9199a81b (patch) | |
tree | 6fc822b7b9bde1989e3c040eb52e5f4d52c49923 | |
parent | 5cedcecd6ae31e2485dcab2ddd74c74a4779545d (diff) |
Use wreq instead of http-conduit to maintain cookies in a session
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | Makefile | 13 | ||||
-rw-r--r-- | ad-listener.cabal | 8 | ||||
-rw-r--r-- | shell.nix | 2 | ||||
-rw-r--r-- | src/executable/haskell/Main.hs | 6 | ||||
-rw-r--r-- | src/executable/haskell/Service/AdListener.hs | 28 | ||||
-rw-r--r-- | src/lib/haskell/FetchAd.hs | 20 | ||||
-rw-r--r-- | src/lib/haskell/Utils/HTTP.hs | 35 | ||||
-rw-r--r-- | src/test/haskell/Main.hs | 6 | ||||
-rw-r--r-- | stack.yaml | 8 |
10 files changed, 69 insertions, 59 deletions
@@ -1,4 +1,4 @@ ad-listener.nix -dist-newstyle/ local.conf .ghc.environment.* +.stack-work/ @@ -11,19 +11,22 @@ stop: # Other commands clean: - @cabal new-clean > /dev/null + @stack clean > /dev/null + +install: + @stack setup watch: build @nodemon --watch src --delay 0.2 -e hs,conf --exec 'clear && make build-and-launch' build-and-launch: - @(pkill ad-listener || true) && (nix-shell -p zlib --command "cabal new-run ad-listener" || true) + @(pkill ad-listener || true) && (stack exec ad-listener || true) build: - @nix-shell -p zlib --command "cabal new-build" + @stack build repl: - @cabal new-repl + @stack repl test: - @nix-shell -p zlib --command "cabal new-test" + @stack test diff --git a/ad-listener.cabal b/ad-listener.cabal index 65da10b..fbe094e 100644 --- a/ad-listener.cabal +++ b/ad-listener.cabal @@ -9,7 +9,6 @@ Cabal-version: >= 1.10 Library Hs-source-dirs: src/lib/haskell - Main-is: Main.hs Ghc-options: -Wall -Werror Default-language: Haskell2010 @@ -19,10 +18,11 @@ Library Build-depends: base , bytestring - , http-conduit + , wreq , tagsoup , text , http-types + , lens Exposed-modules: FetchAd @@ -58,7 +58,7 @@ Executable ad-listener , tagsoup , text , time - , http-conduit + , wreq Other-modules: Conf @@ -83,7 +83,7 @@ Test-suite test , hspec , ad-listener , text - , http-conduit + , wreq Other-modules: Ads @@ -2,7 +2,7 @@ with import <nixpkgs> {}; { env = stdenv.mkDerivation { name = "env"; buildInputs = with nodePackages; with haskellPackages; [ - cabal-install + stack nodemon stylish-haskell tmux diff --git a/src/executable/haskell/Main.hs b/src/executable/haskell/Main.hs index d082b94..8c0f37f 100644 --- a/src/executable/haskell/Main.hs +++ b/src/executable/haskell/Main.hs @@ -2,7 +2,7 @@ module Main ( main ) where -import qualified Network.HTTP.Conduit as H +import qualified Network.Wreq.Session as Session import qualified Conf import qualified Service.AdListener as AdListener @@ -10,5 +10,5 @@ import qualified Service.AdListener as AdListener main :: IO () main = do conf <- Conf.parse "application.conf" - manager <- H.newManager H.tlsManagerSettings - AdListener.start conf manager + session <- Session.newSession + AdListener.start conf session diff --git a/src/executable/haskell/Service/AdListener.hs b/src/executable/haskell/Service/AdListener.hs index 5cf26d1..c393f38 100644 --- a/src/executable/haskell/Service/AdListener.hs +++ b/src/executable/haskell/Service/AdListener.hs @@ -5,7 +5,7 @@ module Service.AdListener import Control.Concurrent (threadDelay) import qualified Data.Text as T import qualified Data.Text.IO as T -import Network.HTTP.Conduit (Manager) +import Network.Wreq.Session (Session) import Prelude hiding (error) import Conf (Conf) @@ -19,17 +19,17 @@ import qualified Service.MailService as MailService import qualified Utils.Time as TimeUtils import qualified View.Ad as Ad -start :: Conf -> Manager -> IO () -start conf manager = do - ads <- fetchAds conf manager +start :: Conf -> Session -> IO () +start conf session = do + ads <- fetchAds conf session let newURLs = map Ad.url ads T.putStrLn "Listening to new ads…" waitListenInterval conf - listenToNewAdsWithViewedURLs conf manager newURLs + listenToNewAdsWithViewedURLs conf session newURLs -listenToNewAdsWithViewedURLs :: Conf -> Manager -> [URL] -> IO () -listenToNewAdsWithViewedURLs conf manager viewedURLs = do - ads <- fetchAds conf manager +listenToNewAdsWithViewedURLs :: Conf -> Session -> [URL] -> IO () +listenToNewAdsWithViewedURLs conf session viewedURLs = do + ads <- fetchAds conf session let (newURLs, newAds) = Ad.getNewAds viewedURLs ads time <- TimeUtils.getCurrentFormattedTime if not (null newAds) @@ -40,13 +40,13 @@ listenToNewAdsWithViewedURLs conf manager viewedURLs = do else return () waitListenInterval conf - listenToNewAdsWithViewedURLs conf manager (viewedURLs ++ newURLs) + listenToNewAdsWithViewedURLs conf session (viewedURLs ++ newURLs) -fetchAds :: Conf -> Manager -> IO [Ad] -fetchAds conf manager = do - leboncoinAds <- FetchAd.leboncoin manager (Conf.leboncoinUrls conf) - ouestFranceAds <- FetchAd.ouestFrance manager (Conf.ouestFranceUrls conf) - seLogerAds <- FetchAd.seLoger manager (Conf.seLogerUrls conf) +fetchAds :: Conf -> Session -> IO [Ad] +fetchAds conf session = do + leboncoinAds <- FetchAd.leboncoin session (Conf.leboncoinUrls conf) + ouestFranceAds <- FetchAd.ouestFrance session (Conf.ouestFranceUrls conf) + seLogerAds <- FetchAd.seLoger session (Conf.seLogerUrls conf) let results = leboncoinAds ++ ouestFranceAds ++ seLogerAds T.putStrLn . T.concat $ [ "Parsed " diff --git a/src/lib/haskell/FetchAd.hs b/src/lib/haskell/FetchAd.hs index 1708fe4..c143d3c 100644 --- a/src/lib/haskell/FetchAd.hs +++ b/src/lib/haskell/FetchAd.hs @@ -5,7 +5,7 @@ module FetchAd ) where import Data.Either (rights) -import Network.HTTP.Conduit (Manager) +import Network.Wreq.Session (Session) import Model.Ad (Ad) import Model.URL (URL) @@ -14,23 +14,23 @@ import qualified Parser.OuestFranceParser as OuestFranceParser import qualified Parser.SeLogerParser as SeLogerParser import qualified Utils.HTTP as HTTP -leboncoin :: Manager -> [URL] -> IO [Ad] -leboncoin manager urls = +leboncoin :: Session -> [URL] -> IO [Ad] +leboncoin session urls = fmap (concat . map LeboncoinParser.parse . rights) . sequence - . map (HTTP.get manager) + . map (HTTP.get session) $ urls -ouestFrance :: Manager -> [URL] -> IO [Ad] -ouestFrance manager urls = +ouestFrance :: Session -> [URL] -> IO [Ad] +ouestFrance session urls = fmap (concat . map OuestFranceParser.parse . rights) . sequence - . map (HTTP.get manager) + . map (HTTP.get session) $ urls -seLoger :: Manager -> [URL] -> IO [Ad] -seLoger manager urls = +seLoger :: Session -> [URL] -> IO [Ad] +seLoger session urls = fmap (concat . map SeLogerParser.parse . rights) . sequence - . map (HTTP.get manager) + . map (HTTP.get session) $ urls diff --git a/src/lib/haskell/Utils/HTTP.hs b/src/lib/haskell/Utils/HTTP.hs index 9bcf5f0..d441f86 100644 --- a/src/lib/haskell/Utils/HTTP.hs +++ b/src/lib/haskell/Utils/HTTP.hs @@ -2,25 +2,25 @@ module Utils.HTTP ( get ) where -import qualified Data.ByteString.Lazy as BS -import Data.Text (Text) -import qualified Data.Text as T -import Data.Text.Encoding as T -import Data.Text.IO as T -import Network.HTTP.Conduit (Manager) -import qualified Network.HTTP.Conduit as H -import qualified Network.HTTP.Simple as HS -import qualified Network.HTTP.Types.Status as Status +import Control.Lens ((^.)) +import qualified Data.ByteString.Lazy as BS +import Data.Text (Text) +import qualified Data.Text as T +import Data.Text.Encoding as T +import qualified Data.Text.IO as T +import qualified Network.Wreq as Wreq +import Network.Wreq.Session (Session) +import qualified Network.Wreq.Session as Session +import qualified Network.Wreq.Types as Types import Model.URL -get :: Manager -> URL -> IO (Either Text Text) -get manager url = do - request <- H.parseRequest (T.unpack url) - - response <- H.httpLbs (HS.setRequestHeaders requestHeaders request) manager - let body = T.decodeUtf8 . BS.toStrict . H.responseBody $ response - let statusCode = Status.statusCode . H.responseStatus $ response +get :: Session -> URL -> IO (Either Text Text) +get session url = do + let options = Wreq.defaults { Types.headers = headers } + response <- Session.getWith options session (T.unpack url) + let body = T.decodeUtf8 . BS.toStrict $ response ^. Wreq.responseBody + let statusCode = response ^. Wreq.responseStatus ^. Wreq.statusCode if statusCode >= 200 && statusCode < 300 then return . Right $ body @@ -36,11 +36,10 @@ get manager url = do return . Left $ body where - requestHeaders = + headers = [ ("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0") , ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") , ("Accept-Language", "en-US,en;fr;q=0.5") - , ("Accept-Encoding", "gzip, deflate, br") , ("Referer", "https://duckduckgo.com/") , ("DNT", "1") , ("Connection", "keep-alive") diff --git a/src/test/haskell/Main.hs b/src/test/haskell/Main.hs index ba6d466..541cc54 100644 --- a/src/test/haskell/Main.hs +++ b/src/test/haskell/Main.hs @@ -1,6 +1,6 @@ import Data.Maybe (catMaybes) import qualified Data.Text.IO as T -import qualified Network.HTTP.Conduit as H +import qualified Network.Wreq.Session as Session import Test.Hspec import qualified Ads @@ -12,7 +12,7 @@ import qualified Parser.LeboncoinParser as LeboncoinParser main :: IO () main = do - manager <- H.newManager H.tlsManagerSettings + session <- Session.newSession hspec $ do describe "LeboncoinParser" $ do @@ -26,7 +26,7 @@ main = do it "should parse ads from remote page" $ do ads <- FetchAd.leboncoin - manager + session ["https://www.leboncoin.fr/annonces/offres/ile_de_france/"] checkAds ads diff --git a/stack.yaml b/stack.yaml new file mode 100644 index 0000000..fa3fa4d --- /dev/null +++ b/stack.yaml @@ -0,0 +1,8 @@ +resolver: lts-14.4 + +packages: +- . +- location: + git: https://gitlab.com/guyonvarch/config-manager + commit: c0f5e9c5ad8ac88b05ecff9b035c59480829aff1 + extra-dep: true |