From aafc45eb5eed3839a5210a7d48928d975df6a296 Mon Sep 17 00:00:00 2001 From: Joris Guyonvarch Date: Sat, 11 Apr 2015 20:38:08 +0200 Subject: Handle a configuration file to save the url --- .gitignore | 1 + leboncoin-listener.cabal | 2 ++ src/Ad.hs | 10 +++--- src/AdListener.hs | 79 +++++++++++++++++++++++++++++++++++++++++++++++ src/Config.hs | 72 +++++++++++++++++++++++++++++++++++++++++++ src/Main.hs | 80 ++++++------------------------------------------ src/Model/Detail.hs | 4 +-- src/Model/Resume.hs | 5 +-- src/Model/URL.hs | 4 ++- src/Page.hs | 9 +++--- src/Parser/Detail.hs | 3 +- src/Parser/Resume.hs | 7 +++-- src/Parser/Utils.hs | 9 +++--- src/View/Ad.hs | 11 ++++--- 14 files changed, 200 insertions(+), 96 deletions(-) create mode 100644 src/AdListener.hs create mode 100644 src/Config.hs diff --git a/.gitignore b/.gitignore index cef4194..83a814e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .cabal-sandbox cabal.sandbox.config dist +conf diff --git a/leboncoin-listener.cabal b/leboncoin-listener.cabal index 224690f..e1ae0af 100644 --- a/leboncoin-listener.cabal +++ b/leboncoin-listener.cabal @@ -8,5 +8,7 @@ executable leboncoin-listener hs-source-dirs: src build-depends: base , text == 1.2.0.4 + , containers == 0.5.6.3 + , directory == 1.2.2.1 , HTTP == 4000.2.19 , tagsoup == 0.13.3 diff --git a/src/Ad.hs b/src/Ad.hs index 5f3a9f1..7bc66fc 100644 --- a/src/Ad.hs +++ b/src/Ad.hs @@ -3,9 +3,11 @@ module Ad , getAds ) where -import Page (getPage) +import Data.Text (Text) import qualified Data.Text as T +import Page (getPage) + import Model.Ad import Model.Resume import Model.Detail @@ -14,15 +16,15 @@ import Model.URL import Parser.Resume import Parser.Detail -getResumes :: URL -> IO (Either T.Text [Resume]) +getResumes :: URL -> IO (Either Text [Resume]) getResumes url = fmap parseResumes <$> getPage url -getAds :: [Resume] -> IO (Either T.Text [Ad]) +getAds :: [Resume] -> IO (Either Text [Ad]) getAds resumes = do xs <- sequence $ map getAd resumes return $ sequence xs -getAd :: Resume -> IO (Either T.Text Ad) +getAd :: Resume -> IO (Either Text Ad) getAd resume = do page <- getPage (url resume) fmap (\page -> Ad { resume = resume, detail = parseDetail page}) <$> getPage (url resume) diff --git a/src/AdListener.hs b/src/AdListener.hs new file mode 100644 index 0000000..1f97e30 --- /dev/null +++ b/src/AdListener.hs @@ -0,0 +1,79 @@ +{-# LANGUAGE OverloadedStrings #-} + +module AdListener + ( listenToNewAds + ) where + +import Data.List (intersperse) +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Text.IO as T + +import Control.Concurrent (threadDelay) + +import Ad (getResumes, getAds) + +import Model.Ad +import Model.URL +import Model.Resume + +import View.Ad (renderAds) + +import Page +import Parser.Detail + +import Config (Config) +import qualified Config as C + +listenToNewAds :: Config -> [Ad] -> IO () +listenToNewAds config viewedAds = do + eitherResumes <- getResumes (C.url config) + case eitherResumes of + Left error -> + listenError config viewedAds error + Right resumes -> + listenToNewAdsWithResumes config viewedAds resumes + +listenToNewAdsWithResumes :: Config -> [Ad] -> [Resume] -> IO () +listenToNewAdsWithResumes config viewedAds resumes = + let viewedURLs = getURLs $ map resume viewedAds + newResumes = getNewResumes viewedURLs resumes + in do + eitherNewAds <- getAds newResumes + case eitherNewAds of + Left error -> + listenError config viewedAds error + Right newAds -> + do + if not (null newAds) + then + T.putStrLn (newAdsMessage newAds) + else + return () + waitOneMinute + listenToNewAds config (viewedAds ++ newAds) + +newAdsMessage :: [Ad] -> Text +newAdsMessage newAds = + let newAdsMessage = + T.concat + [ "Got " + , T.pack . show . length $ newAds + , " new ads." + ] + line = T.map (\_ -> '-') newAdsMessage + in T.intercalate + "\n" + [ newAdsMessage + , T.concat [line, "\n"] + , renderAds newAds + ] + +listenError :: Config -> [Ad] -> Text -> IO () +listenError config viewedAds error = do + T.putStrLn error + waitOneMinute + listenToNewAds config viewedAds + +waitOneMinute :: IO () +waitOneMinute = threadDelay (1000 * 1000 * 60) diff --git a/src/Config.hs b/src/Config.hs new file mode 100644 index 0000000..c09f69e --- /dev/null +++ b/src/Config.hs @@ -0,0 +1,72 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Config + ( configUsage + , Config(..) + , getConfig + ) where + +import Data.Maybe (catMaybes) +import Data.Map (Map) +import qualified Data.Map as M +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Text.IO as T + +import Control.Monad (guard) + +import System.Directory (doesFileExist) + +import Model.URL + +configUsage :: Text +configUsage = + T.intercalate + "\n" + [ T.concat + [ "Please provide an url for leboncoin in the file named: " + , T.pack configPath + ] + , "url = http://…" + ] + +configPath :: FilePath +configPath = "conf" + +data Config = Config + { url :: URL + } deriving (Eq, Read, Show) + +getConfig :: IO (Maybe Config) +getConfig = do + exists <- doesFileExist configPath + if exists + then + configFromFile <$> T.readFile configPath + else + return Nothing + +configFromFile :: Text -> Maybe Config +configFromFile = + configFromMap + . M.fromList + . catMaybes + . map lineConfig + . filter (not . T.null) + . map T.strip + . T.lines + +configFromMap :: Map Text Text -> Maybe Config +configFromMap map = do + url <- M.lookup "url" map + return $ Config { url = url } + +lineConfig :: Text -> Maybe (Text, Text) +lineConfig line = do + (key, value) <- twoElementsList (map T.strip . T.splitOn "=" $ line) + guard (T.length key > 0) + return (key, value) + +twoElementsList :: [a] -> Maybe (a, a) +twoElementsList [x, y] = Just (x, y) +twoElementsLisst _ = Nothing diff --git a/src/Main.hs b/src/Main.hs index 6208953..ad70402 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -4,78 +4,18 @@ module Main ( main ) where -import Data.List (intersperse) -import qualified Data.Text as T import qualified Data.Text.IO as T -import Control.Concurrent (threadDelay) +import AdListener (listenToNewAds) -import Ad (getResumes, getAds) - -import Model.Ad -import Model.URL -import Model.Resume - -import View.Ad (renderAds) - -import Page -import Parser.Detail - -listenURL :: URL -listenURL = "http://www.leboncoin.fr/annonces/offres/corse/" +import Config (Config) +import qualified Config as C main :: IO () -main = listenToNewAds [] - -listenToNewAds :: [Ad] -> IO () -listenToNewAds viewedAds = do - eitherResumes <- getResumes listenURL - case eitherResumes of - Left error -> - listenError viewedAds error - Right resumes -> - listenToNewAdsWithResumes viewedAds resumes - -listenToNewAdsWithResumes :: [Ad] -> [Resume] -> IO () -listenToNewAdsWithResumes viewedAds resumes = - let viewedURLs = getURLs $ map resume viewedAds - newResumes = getNewResumes viewedURLs resumes - in do - eitherNewAds <- getAds newResumes - case eitherNewAds of - Left error -> - listenError viewedAds error - Right newAds -> - do - if not (null newAds) - then - T.putStrLn (newAdsMessage newAds) - else - return () - waitOneMinute - listenToNewAds (viewedAds ++ newAds) - -newAdsMessage :: [Ad] -> T.Text -newAdsMessage newAds = - let newAdsMessage = - T.concat - [ "Got " - , T.pack . show . length $ newAds - , " new ads." - ] - line = T.map (\_ -> '-') newAdsMessage - in T.intercalate - "\n" - [ newAdsMessage - , T.concat [line, "\n"] - , renderAds newAds - ] - -listenError :: [Ad] -> T.Text -> IO () -listenError viewedAds error = do - T.putStrLn error - waitOneMinute - listenToNewAds viewedAds - -waitOneMinute :: IO () -waitOneMinute = threadDelay (1000 * 1000 * 60) +main = do + maybeConfig <- C.getConfig + case maybeConfig of + Just config -> + listenToNewAds config [] + Nothing -> + T.putStrLn C.configUsage diff --git a/src/Model/Detail.hs b/src/Model/Detail.hs index 684a718..952cb7a 100644 --- a/src/Model/Detail.hs +++ b/src/Model/Detail.hs @@ -2,8 +2,8 @@ module Model.Detail ( Detail(..) ) where -import qualified Data.Text as T +import Data.Text data Detail = Detail - { description :: Maybe T.Text + { description :: Maybe Text } deriving (Eq, Read, Show) diff --git a/src/Model/Resume.hs b/src/Model/Resume.hs index 9d966df..3d307f2 100644 --- a/src/Model/Resume.hs +++ b/src/Model/Resume.hs @@ -5,13 +5,14 @@ module Model.Resume ) where import Data.List ((\\)) +import Data.Text (Text) import qualified Data.Text as T import Model.URL data Resume = Resume - { name :: T.Text - , price :: Maybe T.Text + { name :: Text + , price :: Maybe Text , url :: URL } deriving (Eq, Read, Show) diff --git a/src/Model/URL.hs b/src/Model/URL.hs index 91cf22a..2114113 100644 --- a/src/Model/URL.hs +++ b/src/Model/URL.hs @@ -2,4 +2,6 @@ module Model.URL ( URL ) where -type URL = String +import Data.Text + +type URL = Text diff --git a/src/Page.hs b/src/Page.hs index b048410..da15ce4 100644 --- a/src/Page.hs +++ b/src/Page.hs @@ -4,18 +4,19 @@ module Page import Control.Exception (SomeException, try) +import Data.Text (Text) import qualified Data.Text as T import Network.HTTP (simpleHTTP, getRequest, getResponseBody) import Model.URL -getPage :: URL -> IO (Either T.Text T.Text) +getPage :: URL -> IO (Either Text Text) getPage url = - mapLeft (T.pack . show) <$> (try (unsafeGetPage url) :: IO (Either SomeException T.Text)) + mapLeft (T.pack . show) <$> (try (unsafeGetPage url) :: IO (Either SomeException Text)) -unsafeGetPage :: URL -> IO T.Text -unsafeGetPage url = simpleHTTP (getRequest url) >>= (\x -> T.pack <$> getResponseBody x) +unsafeGetPage :: URL -> IO Text +unsafeGetPage url = simpleHTTP (getRequest (T.unpack url)) >>= (\x -> T.pack <$> getResponseBody x) mapLeft :: (a -> c) -> Either a b -> Either c b mapLeft f (Left l) = Left (f l) diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs index b787772..4144964 100644 --- a/src/Parser/Detail.hs +++ b/src/Parser/Detail.hs @@ -2,6 +2,7 @@ module Parser.Detail ( parseDetail ) where +import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup @@ -10,7 +11,7 @@ import Model.Detail import Parser.Utils -parseDetail :: T.Text -> Detail +parseDetail :: Text -> Detail parseDetail page = let tags = parseTags page descriptionTags = getTagsBetween "