diff options
author | Joris Guyonvarch | 2015-04-18 21:59:22 +0200 |
---|---|---|
committer | Joris Guyonvarch | 2015-04-18 21:59:22 +0200 |
commit | 389d979eb3eaa18beb8a6da9f4a03bdb6acc1722 (patch) | |
tree | 9d475b868ad13162c0ecba42b2138058c2e2e68b /src | |
parent | 86f9991deeb44a6cc81044e61a9ad3ee001c5ced (diff) |
Parsing utag_list from a detail page and showing the keys that are given in the configuration file
Diffstat (limited to 'src')
-rw-r--r-- | src/AdListener.hs | 10 | ||||
-rw-r--r-- | src/Config.hs | 24 | ||||
-rw-r--r-- | src/Main.hs | 12 | ||||
-rw-r--r-- | src/Model/Config.hs | 13 | ||||
-rw-r--r-- | src/Model/Detail.hs | 2 | ||||
-rw-r--r-- | src/Parser/Detail.hs | 40 | ||||
-rw-r--r-- | src/Parser/Utils.hs | 15 | ||||
-rw-r--r-- | src/View/Html/Ad.hs | 38 | ||||
-rw-r--r-- | src/View/Plain/Ad.hs | 53 |
9 files changed, 159 insertions, 48 deletions
diff --git a/src/AdListener.hs b/src/AdListener.hs index 4fc9b20..eee2de4 100644 --- a/src/AdListener.hs +++ b/src/AdListener.hs @@ -65,7 +65,7 @@ listenToNewAdsWithResumes config viewedURLs resumes = time <- getCurrentFormattedTime if not (null newAds) then - let message = P.renderConsoleAds time newAds + let message = P.renderConsoleAds config time newAds in do T.putStrLn message trySendMail config newAds @@ -77,11 +77,11 @@ listenToNewAdsWithResumes config viewedURLs resumes = trySendMail :: Config -> [Ad] -> IO () trySendMail config ads = case C.mailTo config of - Nothing -> + [] -> return () - Just mailTo -> - let (title, plainBody) = P.renderAds ads - htmlBody = H.renderAds ads + mailTo -> + let (title, plainBody) = P.renderAds config ads + htmlBody = H.renderAds config ads in do eitherMailSuccess <- sendMail mailTo title plainBody htmlBody case eitherMailSuccess of diff --git a/src/Config.hs b/src/Config.hs index 76371be..0a421fa 100644 --- a/src/Config.hs +++ b/src/Config.hs @@ -6,7 +6,7 @@ module Config , getConfig ) where -import Data.Maybe (catMaybes, isJust) +import Data.Maybe (catMaybes, isJust, fromMaybe) import Data.Map (Map) import qualified Data.Map as M import Data.Text (Text) @@ -18,6 +18,7 @@ import Control.Monad (guard) import System.Directory (doesFileExist) import Model.URL +import Model.Config import Utils.Text @@ -33,6 +34,7 @@ configUsage = , "" , " - url (required)" , " - mailTo (optional)" + , " - properties (optional)" , "" , " Example:" , "" @@ -41,16 +43,14 @@ configUsage = , "" , " # The mailTo field is an optional list" , " # mailTo = jean.dupont@mail.fr, john.smith@mail.com" + , "" + , " # The properties field is an optional list" + , " # properties = cp, city, surface, ges" ] configPath :: FilePath configPath = "conf" -data Config = Config - { url :: URL - , mailTo :: Maybe [Text] - } deriving (Eq, Read, Show) - getConfig :: IO (Maybe Config) getConfig = do exists <- doesFileExist configPath @@ -74,8 +74,16 @@ configFromFile = configFromMap :: Map Text Text -> Maybe Config configFromMap map = do url <- M.lookup "url" map - let mailTo = fmap T.strip . T.splitOn "," <$> M.lookup "mailTo" map - return $ Config { url = url, mailTo = mailTo } + let config = + Config + { url = url + , mailTo = fieldValues "mailTo" map + , properties = fieldValues "properties" map + } + return config + +fieldValues :: Text -> Map Text Text -> [Text] +fieldValues field map = fromMaybe [] $ fmap T.strip . T.splitOn "," <$> M.lookup field map lineConfig :: Text -> Maybe (Text, Text) lineConfig line = do diff --git a/src/Main.hs b/src/Main.hs index f38646b..82135a2 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -19,3 +19,15 @@ main = do listenToNewAds config Nothing -> T.putStrLn C.configUsage + +-- import Page +-- import Parser.Detail + +-- main :: IO () +-- main = do +-- eitherPage <- getPage "http://www.leboncoin.fr/locations/794858813.htm?ca=18_s" +-- case eitherPage of +-- Right page -> +-- putStrLn (show $ parseDetail page) +-- Left error -> +-- T.putStrLn error diff --git a/src/Model/Config.hs b/src/Model/Config.hs new file mode 100644 index 0000000..42b390e --- /dev/null +++ b/src/Model/Config.hs @@ -0,0 +1,13 @@ +module Model.Config + ( Config(..) + ) where + +import Data.Text + +import Model.URL + +data Config = Config + { url :: URL + , mailTo :: [Text] + , properties :: [Text] + } deriving (Eq, Read, Show) diff --git a/src/Model/Detail.hs b/src/Model/Detail.hs index a170ca6..c0e8d5f 100644 --- a/src/Model/Detail.hs +++ b/src/Model/Detail.hs @@ -3,10 +3,12 @@ module Model.Detail ) where import Data.Text +import Data.Map (Map) import Model.URL data Detail = Detail { description :: Maybe Text , images :: [URL] + , properties :: Map Text Text } deriving (Eq, Read, Show) diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs index 3a91ac2..3f424e9 100644 --- a/src/Parser/Detail.hs +++ b/src/Parser/Detail.hs @@ -5,6 +5,11 @@ module Parser.Detail import Data.Text (Text) import qualified Data.Text as T +import Data.Map (Map) +import qualified Data.Map as M + +import Data.Maybe (catMaybes, fromMaybe) + import Text.HTML.TagSoup import Model.Detail @@ -14,9 +19,11 @@ import Parser.Utils parseDetail :: Text -> Detail parseDetail page = let tags = parseTags page - description = parseDescription tags - images = getTagAttributes "<meta itemprop=image>" (T.pack "content") tags - in Detail { description = description, images = images } + in Detail + { description = parseDescription tags + , images = getTagAttributes "<meta itemprop=image>" (T.pack "content") tags + , properties = parseProperties tags + } parseDescription :: [Tag Text] -> Maybe Text parseDescription tags = @@ -27,3 +34,30 @@ parseDescription tags = else let replaceBr = map (\tag -> if tag ~== "<br>" then TagText (T.pack "\n") else tag) in Just . T.strip . renderTags . replaceBr $ descriptionTags + +parseProperties :: [Tag Text] -> Map Text Text +parseProperties tags = + let mbUtagData = getTagTextAfter "<script>" . getTagsAfter "<body>" $ tags + in fromMaybe M.empty (fmap parseUtagData mbUtagData) + +parseUtagData :: Text -> Map Text Text +parseUtagData = + M.fromList + . catMaybes + . fmap parseUtag + . T.splitOn (T.pack ",") + . T.takeWhile (/= '}') + . T.drop 1 + . T.dropWhile (/= '{') + +parseUtag :: Text -> Maybe (Text, Text) +parseUtag utag = + case T.splitOn (T.pack ":") utag of + [x, y] -> Just (T.strip x, removeQuotes y) + _ -> Nothing + +removeQuotes :: Text -> Text +removeQuotes = + T.takeWhile (/= '\"') + . T.dropWhile (== '\"') + . T.strip diff --git a/src/Parser/Utils.hs b/src/Parser/Utils.hs index c03ab03..d72a1ce 100644 --- a/src/Parser/Utils.hs +++ b/src/Parser/Utils.hs @@ -1,5 +1,7 @@ module Parser.Utils - ( getTagsBetween + ( getTagsBefore + , getTagsAfter + , getTagsBetween , getTagAttributes , getTagAttribute , getTagTextAfter @@ -12,11 +14,14 @@ import qualified Data.Text as T import Text.HTML.TagSoup +getTagsBefore :: String -> [Tag Text] -> [Tag Text] +getTagsBefore selector = takeWhile (~/= selector) + +getTagsAfter :: String -> [Tag Text] -> [Tag Text] +getTagsAfter selector = drop 1 . dropWhile (~/= selector) + getTagsBetween :: String -> String -> [Tag Text] -> [Tag Text] -getTagsBetween beginSelector endSelector = - takeWhile (~/= endSelector) - . drop 1 - . dropWhile (~/= beginSelector) +getTagsBetween begin end = getTagsBefore end . getTagsAfter begin getTagAttributes :: String -> Text -> [Tag Text] -> [Text] getTagAttributes selector attribute = diff --git a/src/View/Html/Ad.hs b/src/View/Html/Ad.hs index fce164e..f3d9ece 100644 --- a/src/View/Html/Ad.hs +++ b/src/View/Html/Ad.hs @@ -7,9 +7,11 @@ module View.Html.Ad import Data.Text (Text) import qualified Data.Text as T import Data.Text.Lazy (toStrict) -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, catMaybes) import Data.String (fromString) import Data.List (intersperse) +import Data.Map (Map) +import qualified Data.Map as M import Text.Blaze.Html import Text.Blaze.Html5 (Html) @@ -28,20 +30,21 @@ import Model.Detail (Detail) import qualified Model.Detail as D import Model.URL +import Model.Config -renderAds :: [Ad] -> Text -renderAds = toStrict . renderHtml . adsHtml +renderAds :: Config -> [Ad] -> Text +renderAds config = toStrict . renderHtml . (adsHtml config) -adsHtml :: [Ad] -> Html -adsHtml ads = H.div (mapM_ adHtml ads) +adsHtml :: Config -> [Ad] -> Html +adsHtml config ads = H.div (mapM_ (adHtml config) ads) -adHtml :: Ad -> Html -adHtml ad = +adHtml :: Config -> Ad -> Html +adHtml config ad = let resume = A.resume ad detail = A.detail ad in do resumeHtml resume - detailHtml detail + detailHtml config detail resumeHtml :: Resume -> Html resumeHtml resume = @@ -55,8 +58,9 @@ resumeHtml resume = H.h1 (toHtml title) linkHtml url -detailHtml :: Detail -> Html -detailHtml detail = do +detailHtml :: Config -> Detail -> Html +detailHtml config detail = do + propertiesHtml (properties config) (D.properties detail) case D.description detail of Just description -> descriptionHtml description @@ -64,6 +68,20 @@ detailHtml detail = do H.div "" mapM_ imageLinkHtml (D.images detail) +propertiesHtml :: [Text] -> Map Text Text -> Html +propertiesHtml keys properties = + H.dl $ + sequence_ $ catMaybes $ map (propertyHtml properties) keys + +propertyHtml :: Map Text Text -> Text -> Maybe Html +propertyHtml properties key = + fmap + (\value -> do + H.dt (toHtml key) + H.dd (toHtml value) + ) + (M.lookup key properties) + descriptionHtml :: Text -> Html descriptionHtml = H.p . sequence_ . intersperse H.br . fmap toHtml . T.lines diff --git a/src/View/Plain/Ad.hs b/src/View/Plain/Ad.hs index 9dc5289..5120226 100644 --- a/src/View/Plain/Ad.hs +++ b/src/View/Plain/Ad.hs @@ -6,25 +6,28 @@ module View.Plain.Ad ) where import Data.List (intersperse) -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, catMaybes) +import Data.Map (Map) +import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T -import Model.Ad (Ad(..)) +import Model.Ad (Ad) import qualified Model.Ad as Ad -import Model.Resume (Resume(..)) +import Model.Resume (Resume) import qualified Model.Resume as Resume -import Model.Detail (Detail(..)) +import Model.Detail (Detail) import qualified Model.Detail as Detail import Model.URL +import Model.Config -renderConsoleAds :: Text -> [Ad] -> Text -renderConsoleAds time ads = - let (title, message) = renderAds ads +renderConsoleAds :: Config -> Text -> [Ad] -> Text +renderConsoleAds config time ads = + let (title, message) = renderAds config ads titleWithTime = T.concat [ "\n[" @@ -41,10 +44,10 @@ renderConsoleAds time ads = , message ] -renderAds :: [Ad] -> (Text, Text) -renderAds ads = +renderAds :: Config -> [Ad] -> (Text, Text) +renderAds config ads = let titleMessage = renderTitle $ length ads - adsMessage = T.intercalate "\n\n" . map renderAd $ ads + adsMessage = T.intercalate "\n\n" . map (renderAd config) $ ads in (titleMessage, adsMessage) renderTitle :: Int -> Text @@ -60,12 +63,12 @@ renderTitle count = , if count > 1 then "s" else "" ] -renderAd :: Ad -> Text -renderAd ad = +renderAd :: Config -> Ad -> Text +renderAd config ad = T.concat [ renderResume (Ad.resume ad) - , "\n\n" - , renderDetail (Ad.detail ad) + , "\n" + , renderDetail config (Ad.detail ad) ] renderResume :: Resume -> Text @@ -75,13 +78,29 @@ renderResume resume = titleLine = T.concat [Resume.name resume, price] in T.intercalate "\n" [titleLine, Resume.url resume] -renderDetail :: Detail -> Text -renderDetail detail = +renderDetail :: Config -> Detail -> Text +renderDetail config detail = T.concat - [ fromMaybe "−" (Detail.description detail) + [ renderProperties (properties config) (Detail.properties detail) + , fromMaybe "−" (Detail.description detail) , renderURLs "\n\nImages:" (Detail.images detail) ] +renderProperties :: [Text] -> Map Text Text -> Text +renderProperties [] properties = "" +renderProperties keys properties = + T.concat + [ "\n" + , T.concat (catMaybes $ map (renderProperty properties) keys) + , "\n" + ] + +renderProperty :: Map Text Text -> Text -> Maybe Text +renderProperty properties key = + fmap + (\value -> T.concat [key, ": ", value, "\n"]) + (M.lookup key properties) + renderURLs :: Text -> [URL] -> Text renderURLs title [] = "" renderURLs title urls = |