aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJoris Guyonvarch2015-04-18 21:59:22 +0200
committerJoris Guyonvarch2015-04-18 21:59:22 +0200
commit389d979eb3eaa18beb8a6da9f4a03bdb6acc1722 (patch)
tree9d475b868ad13162c0ecba42b2138058c2e2e68b /src
parent86f9991deeb44a6cc81044e61a9ad3ee001c5ced (diff)
Parsing utag_list from a detail page and showing the keys that are given in the configuration file
Diffstat (limited to 'src')
-rw-r--r--src/AdListener.hs10
-rw-r--r--src/Config.hs24
-rw-r--r--src/Main.hs12
-rw-r--r--src/Model/Config.hs13
-rw-r--r--src/Model/Detail.hs2
-rw-r--r--src/Parser/Detail.hs40
-rw-r--r--src/Parser/Utils.hs15
-rw-r--r--src/View/Html/Ad.hs38
-rw-r--r--src/View/Plain/Ad.hs53
9 files changed, 159 insertions, 48 deletions
diff --git a/src/AdListener.hs b/src/AdListener.hs
index 4fc9b20..eee2de4 100644
--- a/src/AdListener.hs
+++ b/src/AdListener.hs
@@ -65,7 +65,7 @@ listenToNewAdsWithResumes config viewedURLs resumes =
time <- getCurrentFormattedTime
if not (null newAds)
then
- let message = P.renderConsoleAds time newAds
+ let message = P.renderConsoleAds config time newAds
in do
T.putStrLn message
trySendMail config newAds
@@ -77,11 +77,11 @@ listenToNewAdsWithResumes config viewedURLs resumes =
trySendMail :: Config -> [Ad] -> IO ()
trySendMail config ads =
case C.mailTo config of
- Nothing ->
+ [] ->
return ()
- Just mailTo ->
- let (title, plainBody) = P.renderAds ads
- htmlBody = H.renderAds ads
+ mailTo ->
+ let (title, plainBody) = P.renderAds config ads
+ htmlBody = H.renderAds config ads
in do
eitherMailSuccess <- sendMail mailTo title plainBody htmlBody
case eitherMailSuccess of
diff --git a/src/Config.hs b/src/Config.hs
index 76371be..0a421fa 100644
--- a/src/Config.hs
+++ b/src/Config.hs
@@ -6,7 +6,7 @@ module Config
, getConfig
) where
-import Data.Maybe (catMaybes, isJust)
+import Data.Maybe (catMaybes, isJust, fromMaybe)
import Data.Map (Map)
import qualified Data.Map as M
import Data.Text (Text)
@@ -18,6 +18,7 @@ import Control.Monad (guard)
import System.Directory (doesFileExist)
import Model.URL
+import Model.Config
import Utils.Text
@@ -33,6 +34,7 @@ configUsage =
, ""
, " - url (required)"
, " - mailTo (optional)"
+ , " - properties (optional)"
, ""
, " Example:"
, ""
@@ -41,16 +43,14 @@ configUsage =
, ""
, " # The mailTo field is an optional list"
, " # mailTo = jean.dupont@mail.fr, john.smith@mail.com"
+ , ""
+ , " # The properties field is an optional list"
+ , " # properties = cp, city, surface, ges"
]
configPath :: FilePath
configPath = "conf"
-data Config = Config
- { url :: URL
- , mailTo :: Maybe [Text]
- } deriving (Eq, Read, Show)
-
getConfig :: IO (Maybe Config)
getConfig = do
exists <- doesFileExist configPath
@@ -74,8 +74,16 @@ configFromFile =
configFromMap :: Map Text Text -> Maybe Config
configFromMap map = do
url <- M.lookup "url" map
- let mailTo = fmap T.strip . T.splitOn "," <$> M.lookup "mailTo" map
- return $ Config { url = url, mailTo = mailTo }
+ let config =
+ Config
+ { url = url
+ , mailTo = fieldValues "mailTo" map
+ , properties = fieldValues "properties" map
+ }
+ return config
+
+fieldValues :: Text -> Map Text Text -> [Text]
+fieldValues field map = fromMaybe [] $ fmap T.strip . T.splitOn "," <$> M.lookup field map
lineConfig :: Text -> Maybe (Text, Text)
lineConfig line = do
diff --git a/src/Main.hs b/src/Main.hs
index f38646b..82135a2 100644
--- a/src/Main.hs
+++ b/src/Main.hs
@@ -19,3 +19,15 @@ main = do
listenToNewAds config
Nothing ->
T.putStrLn C.configUsage
+
+-- import Page
+-- import Parser.Detail
+
+-- main :: IO ()
+-- main = do
+-- eitherPage <- getPage "http://www.leboncoin.fr/locations/794858813.htm?ca=18_s"
+-- case eitherPage of
+-- Right page ->
+-- putStrLn (show $ parseDetail page)
+-- Left error ->
+-- T.putStrLn error
diff --git a/src/Model/Config.hs b/src/Model/Config.hs
new file mode 100644
index 0000000..42b390e
--- /dev/null
+++ b/src/Model/Config.hs
@@ -0,0 +1,13 @@
+module Model.Config
+ ( Config(..)
+ ) where
+
+import Data.Text
+
+import Model.URL
+
+data Config = Config
+ { url :: URL
+ , mailTo :: [Text]
+ , properties :: [Text]
+ } deriving (Eq, Read, Show)
diff --git a/src/Model/Detail.hs b/src/Model/Detail.hs
index a170ca6..c0e8d5f 100644
--- a/src/Model/Detail.hs
+++ b/src/Model/Detail.hs
@@ -3,10 +3,12 @@ module Model.Detail
) where
import Data.Text
+import Data.Map (Map)
import Model.URL
data Detail = Detail
{ description :: Maybe Text
, images :: [URL]
+ , properties :: Map Text Text
} deriving (Eq, Read, Show)
diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs
index 3a91ac2..3f424e9 100644
--- a/src/Parser/Detail.hs
+++ b/src/Parser/Detail.hs
@@ -5,6 +5,11 @@ module Parser.Detail
import Data.Text (Text)
import qualified Data.Text as T
+import Data.Map (Map)
+import qualified Data.Map as M
+
+import Data.Maybe (catMaybes, fromMaybe)
+
import Text.HTML.TagSoup
import Model.Detail
@@ -14,9 +19,11 @@ import Parser.Utils
parseDetail :: Text -> Detail
parseDetail page =
let tags = parseTags page
- description = parseDescription tags
- images = getTagAttributes "<meta itemprop=image>" (T.pack "content") tags
- in Detail { description = description, images = images }
+ in Detail
+ { description = parseDescription tags
+ , images = getTagAttributes "<meta itemprop=image>" (T.pack "content") tags
+ , properties = parseProperties tags
+ }
parseDescription :: [Tag Text] -> Maybe Text
parseDescription tags =
@@ -27,3 +34,30 @@ parseDescription tags =
else
let replaceBr = map (\tag -> if tag ~== "<br>" then TagText (T.pack "\n") else tag)
in Just . T.strip . renderTags . replaceBr $ descriptionTags
+
+parseProperties :: [Tag Text] -> Map Text Text
+parseProperties tags =
+ let mbUtagData = getTagTextAfter "<script>" . getTagsAfter "<body>" $ tags
+ in fromMaybe M.empty (fmap parseUtagData mbUtagData)
+
+parseUtagData :: Text -> Map Text Text
+parseUtagData =
+ M.fromList
+ . catMaybes
+ . fmap parseUtag
+ . T.splitOn (T.pack ",")
+ . T.takeWhile (/= '}')
+ . T.drop 1
+ . T.dropWhile (/= '{')
+
+parseUtag :: Text -> Maybe (Text, Text)
+parseUtag utag =
+ case T.splitOn (T.pack ":") utag of
+ [x, y] -> Just (T.strip x, removeQuotes y)
+ _ -> Nothing
+
+removeQuotes :: Text -> Text
+removeQuotes =
+ T.takeWhile (/= '\"')
+ . T.dropWhile (== '\"')
+ . T.strip
diff --git a/src/Parser/Utils.hs b/src/Parser/Utils.hs
index c03ab03..d72a1ce 100644
--- a/src/Parser/Utils.hs
+++ b/src/Parser/Utils.hs
@@ -1,5 +1,7 @@
module Parser.Utils
- ( getTagsBetween
+ ( getTagsBefore
+ , getTagsAfter
+ , getTagsBetween
, getTagAttributes
, getTagAttribute
, getTagTextAfter
@@ -12,11 +14,14 @@ import qualified Data.Text as T
import Text.HTML.TagSoup
+getTagsBefore :: String -> [Tag Text] -> [Tag Text]
+getTagsBefore selector = takeWhile (~/= selector)
+
+getTagsAfter :: String -> [Tag Text] -> [Tag Text]
+getTagsAfter selector = drop 1 . dropWhile (~/= selector)
+
getTagsBetween :: String -> String -> [Tag Text] -> [Tag Text]
-getTagsBetween beginSelector endSelector =
- takeWhile (~/= endSelector)
- . drop 1
- . dropWhile (~/= beginSelector)
+getTagsBetween begin end = getTagsBefore end . getTagsAfter begin
getTagAttributes :: String -> Text -> [Tag Text] -> [Text]
getTagAttributes selector attribute =
diff --git a/src/View/Html/Ad.hs b/src/View/Html/Ad.hs
index fce164e..f3d9ece 100644
--- a/src/View/Html/Ad.hs
+++ b/src/View/Html/Ad.hs
@@ -7,9 +7,11 @@ module View.Html.Ad
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Lazy (toStrict)
-import Data.Maybe (fromMaybe)
+import Data.Maybe (fromMaybe, catMaybes)
import Data.String (fromString)
import Data.List (intersperse)
+import Data.Map (Map)
+import qualified Data.Map as M
import Text.Blaze.Html
import Text.Blaze.Html5 (Html)
@@ -28,20 +30,21 @@ import Model.Detail (Detail)
import qualified Model.Detail as D
import Model.URL
+import Model.Config
-renderAds :: [Ad] -> Text
-renderAds = toStrict . renderHtml . adsHtml
+renderAds :: Config -> [Ad] -> Text
+renderAds config = toStrict . renderHtml . (adsHtml config)
-adsHtml :: [Ad] -> Html
-adsHtml ads = H.div (mapM_ adHtml ads)
+adsHtml :: Config -> [Ad] -> Html
+adsHtml config ads = H.div (mapM_ (adHtml config) ads)
-adHtml :: Ad -> Html
-adHtml ad =
+adHtml :: Config -> Ad -> Html
+adHtml config ad =
let resume = A.resume ad
detail = A.detail ad
in do
resumeHtml resume
- detailHtml detail
+ detailHtml config detail
resumeHtml :: Resume -> Html
resumeHtml resume =
@@ -55,8 +58,9 @@ resumeHtml resume =
H.h1 (toHtml title)
linkHtml url
-detailHtml :: Detail -> Html
-detailHtml detail = do
+detailHtml :: Config -> Detail -> Html
+detailHtml config detail = do
+ propertiesHtml (properties config) (D.properties detail)
case D.description detail of
Just description ->
descriptionHtml description
@@ -64,6 +68,20 @@ detailHtml detail = do
H.div ""
mapM_ imageLinkHtml (D.images detail)
+propertiesHtml :: [Text] -> Map Text Text -> Html
+propertiesHtml keys properties =
+ H.dl $
+ sequence_ $ catMaybes $ map (propertyHtml properties) keys
+
+propertyHtml :: Map Text Text -> Text -> Maybe Html
+propertyHtml properties key =
+ fmap
+ (\value -> do
+ H.dt (toHtml key)
+ H.dd (toHtml value)
+ )
+ (M.lookup key properties)
+
descriptionHtml :: Text -> Html
descriptionHtml = H.p . sequence_ . intersperse H.br . fmap toHtml . T.lines
diff --git a/src/View/Plain/Ad.hs b/src/View/Plain/Ad.hs
index 9dc5289..5120226 100644
--- a/src/View/Plain/Ad.hs
+++ b/src/View/Plain/Ad.hs
@@ -6,25 +6,28 @@ module View.Plain.Ad
) where
import Data.List (intersperse)
-import Data.Maybe (fromMaybe)
+import Data.Maybe (fromMaybe, catMaybes)
+import Data.Map (Map)
+import qualified Data.Map as M
import Data.Text (Text)
import qualified Data.Text as T
-import Model.Ad (Ad(..))
+import Model.Ad (Ad)
import qualified Model.Ad as Ad
-import Model.Resume (Resume(..))
+import Model.Resume (Resume)
import qualified Model.Resume as Resume
-import Model.Detail (Detail(..))
+import Model.Detail (Detail)
import qualified Model.Detail as Detail
import Model.URL
+import Model.Config
-renderConsoleAds :: Text -> [Ad] -> Text
-renderConsoleAds time ads =
- let (title, message) = renderAds ads
+renderConsoleAds :: Config -> Text -> [Ad] -> Text
+renderConsoleAds config time ads =
+ let (title, message) = renderAds config ads
titleWithTime =
T.concat
[ "\n["
@@ -41,10 +44,10 @@ renderConsoleAds time ads =
, message
]
-renderAds :: [Ad] -> (Text, Text)
-renderAds ads =
+renderAds :: Config -> [Ad] -> (Text, Text)
+renderAds config ads =
let titleMessage = renderTitle $ length ads
- adsMessage = T.intercalate "\n\n" . map renderAd $ ads
+ adsMessage = T.intercalate "\n\n" . map (renderAd config) $ ads
in (titleMessage, adsMessage)
renderTitle :: Int -> Text
@@ -60,12 +63,12 @@ renderTitle count =
, if count > 1 then "s" else ""
]
-renderAd :: Ad -> Text
-renderAd ad =
+renderAd :: Config -> Ad -> Text
+renderAd config ad =
T.concat
[ renderResume (Ad.resume ad)
- , "\n\n"
- , renderDetail (Ad.detail ad)
+ , "\n"
+ , renderDetail config (Ad.detail ad)
]
renderResume :: Resume -> Text
@@ -75,13 +78,29 @@ renderResume resume =
titleLine = T.concat [Resume.name resume, price]
in T.intercalate "\n" [titleLine, Resume.url resume]
-renderDetail :: Detail -> Text
-renderDetail detail =
+renderDetail :: Config -> Detail -> Text
+renderDetail config detail =
T.concat
- [ fromMaybe "−" (Detail.description detail)
+ [ renderProperties (properties config) (Detail.properties detail)
+ , fromMaybe "−" (Detail.description detail)
, renderURLs "\n\nImages:" (Detail.images detail)
]
+renderProperties :: [Text] -> Map Text Text -> Text
+renderProperties [] properties = ""
+renderProperties keys properties =
+ T.concat
+ [ "\n"
+ , T.concat (catMaybes $ map (renderProperty properties) keys)
+ , "\n"
+ ]
+
+renderProperty :: Map Text Text -> Text -> Maybe Text
+renderProperty properties key =
+ fmap
+ (\value -> T.concat [key, ": ", value, "\n"])
+ (M.lookup key properties)
+
renderURLs :: Text -> [URL] -> Text
renderURLs title [] = ""
renderURLs title urls =