aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJoris Guyonvarch2015-04-11 19:20:04 +0200
committerJoris Guyonvarch2015-04-11 19:20:04 +0200
commit4ddd6d1f6df2bab75d42b6d45b816e92e7173529 (patch)
tree09ddfd62fe32e210a87eb15eb86ee07ab9f24623 /src
parent88b7f848da3515d67cfb989b98ad5285a037993e (diff)
Fixing parsing errors, and use Text from now
Diffstat (limited to 'src')
-rw-r--r--src/Ad.hs24
-rw-r--r--src/Main.hs95
-rw-r--r--src/Model/Detail.hs4
-rw-r--r--src/Model/Resume.hs10
-rw-r--r--src/Page.hs11
-rw-r--r--src/Parser/Detail.hs13
-rw-r--r--src/Parser/Resume.hs13
-rw-r--r--src/Parser/Utils.hs31
-rw-r--r--src/View/Ad.hs27
9 files changed, 135 insertions, 93 deletions
diff --git a/src/Ad.hs b/src/Ad.hs
index 6cd1d8a..5f3a9f1 100644
--- a/src/Ad.hs
+++ b/src/Ad.hs
@@ -1,9 +1,10 @@
module Ad
- ( getAds
- , getResumes
+ ( getResumes
+ , getAds
) where
import Page (getPage)
+import qualified Data.Text as T
import Model.Ad
import Model.Resume
@@ -13,24 +14,15 @@ import Model.URL
import Parser.Resume
import Parser.Detail
-getResumes :: URL -> IO (Either String [Resume])
+getResumes :: URL -> IO (Either T.Text [Resume])
getResumes url = fmap parseResumes <$> getPage url
-getAds :: URL -> IO (Either String [Ad])
-getAds url = do
- eitherPage <- getPage url
- case eitherPage of
- Left error ->
- return (Left error)
- Right page ->
- getAdsFromPage page
-
-getAdsFromPage :: String -> IO (Either String [Ad])
-getAdsFromPage page = do
- xs <- sequence $ map getAd (parseResumes page)
+getAds :: [Resume] -> IO (Either T.Text [Ad])
+getAds resumes = do
+ xs <- sequence $ map getAd resumes
return $ sequence xs
-getAd :: Resume -> IO (Either String Ad)
+getAd :: Resume -> IO (Either T.Text Ad)
getAd resume = do
page <- getPage (url resume)
fmap (\page -> Ad { resume = resume, detail = parseDetail page}) <$> getPage (url resume)
diff --git a/src/Main.hs b/src/Main.hs
index f9cd7f0..6208953 100644
--- a/src/Main.hs
+++ b/src/Main.hs
@@ -1,64 +1,81 @@
+{-# LANGUAGE OverloadedStrings #-}
+
module Main
( main
) where
import Data.List (intersperse)
+import qualified Data.Text as T
+import qualified Data.Text.IO as T
import Control.Concurrent (threadDelay)
-import Ad (getResumes)
+import Ad (getResumes, getAds)
import Model.Ad
import Model.URL
-import Model.Resume (getURLs, getNewResumes)
+import Model.Resume
-import View.Ad (renderResume)
+import View.Ad (renderAds)
import Page
import Parser.Detail
-url :: URL
-url = "http://www.leboncoin.fr/annonces/offres/corse/"
+listenURL :: URL
+listenURL = "http://www.leboncoin.fr/annonces/offres/corse/"
main :: IO ()
-main = initListenToNewResumes
+main = listenToNewAds []
-initListenToNewResumes :: IO ()
-initListenToNewResumes = do
- eitherResumes <- getResumes url
+listenToNewAds :: [Ad] -> IO ()
+listenToNewAds viewedAds = do
+ eitherResumes <- getResumes listenURL
case eitherResumes of
- Left error -> do
- putStrLn error
- waitOneMinute
- initListenToNewResumes
+ Left error ->
+ listenError viewedAds error
Right resumes ->
- let viewedURLs = getURLs resumes
- in do
- putStrLn "Initialization complete"
- waitOneMinute
- listenToNewResumes viewedURLs
+ listenToNewAdsWithResumes viewedAds resumes
-listenToNewResumes :: [URL] -> IO ()
-listenToNewResumes viewedURLs = do
- eitherResumes <- getResumes url
- case eitherResumes of
- Left error -> do
- putStrLn error
- waitOneMinute
- listenToNewResumes viewedURLs
- Right resumes ->
- let (newViewdURLs, newResumes) = getNewResumes viewedURLs resumes
- newAdsCount = length newResumes
- in do
- if newAdsCount > 0
- then
- do
- putStrLn ("Got " ++ (show newAdsCount) ++ " new ads.\n")
- putStrLn (concat . intersperse "\n\n" . map renderResume $ newResumes)
- else
- return ()
- waitOneMinute
- listenToNewResumes newViewdURLs
+listenToNewAdsWithResumes :: [Ad] -> [Resume] -> IO ()
+listenToNewAdsWithResumes viewedAds resumes =
+ let viewedURLs = getURLs $ map resume viewedAds
+ newResumes = getNewResumes viewedURLs resumes
+ in do
+ eitherNewAds <- getAds newResumes
+ case eitherNewAds of
+ Left error ->
+ listenError viewedAds error
+ Right newAds ->
+ do
+ if not (null newAds)
+ then
+ T.putStrLn (newAdsMessage newAds)
+ else
+ return ()
+ waitOneMinute
+ listenToNewAds (viewedAds ++ newAds)
+
+newAdsMessage :: [Ad] -> T.Text
+newAdsMessage newAds =
+ let newAdsMessage =
+ T.concat
+ [ "Got "
+ , T.pack . show . length $ newAds
+ , " new ads."
+ ]
+ line = T.map (\_ -> '-') newAdsMessage
+ in T.intercalate
+ "\n"
+ [ newAdsMessage
+ , T.concat [line, "\n"]
+ , renderAds newAds
+ ]
+
+listenError :: [Ad] -> T.Text -> IO ()
+listenError viewedAds error = do
+ T.putStrLn error
+ waitOneMinute
+ listenToNewAds viewedAds
waitOneMinute :: IO ()
waitOneMinute = threadDelay (1000 * 1000 * 60)
diff --git a/src/Model/Detail.hs b/src/Model/Detail.hs
index f00a7eb..684a718 100644
--- a/src/Model/Detail.hs
+++ b/src/Model/Detail.hs
@@ -2,6 +2,8 @@ module Model.Detail
( Detail(..)
) where
+import qualified Data.Text as T
+
data Detail = Detail
- { description :: Maybe String
+ { description :: Maybe T.Text
} deriving (Eq, Read, Show)
diff --git a/src/Model/Resume.hs b/src/Model/Resume.hs
index f4e9cd5..9d966df 100644
--- a/src/Model/Resume.hs
+++ b/src/Model/Resume.hs
@@ -5,20 +5,20 @@ module Model.Resume
) where
import Data.List ((\\))
+import qualified Data.Text as T
import Model.URL
data Resume = Resume
- { name :: String
- , price :: Maybe String
+ { name :: T.Text
+ , price :: Maybe T.Text
, url :: URL
} deriving (Eq, Read, Show)
-getNewResumes :: [URL] -> [Resume] -> ([URL], [Resume])
+getNewResumes :: [URL] -> [Resume] -> [Resume]
getNewResumes viewdURLs resumes =
let newURLs = (getURLs resumes) \\ viewdURLs
- newResumes = filter (\resume -> elem (url resume) newURLs) resumes
- in (viewdURLs ++ newURLs, newResumes)
+ in filter (\resume -> elem (url resume) newURLs) resumes
getURLs :: [Resume] -> [URL]
getURLs = map url
diff --git a/src/Page.hs b/src/Page.hs
index b70db70..b048410 100644
--- a/src/Page.hs
+++ b/src/Page.hs
@@ -4,11 +4,18 @@ module Page
import Control.Exception (SomeException, try)
+import qualified Data.Text as T
+
import Network.HTTP (simpleHTTP, getRequest, getResponseBody)
-getPage :: String -> IO (Either String String)
+import Model.URL
+
+getPage :: URL -> IO (Either T.Text T.Text)
getPage url =
- mapLeft show <$> (try (simpleHTTP (getRequest url) >>= getResponseBody) :: IO (Either SomeException String))
+ mapLeft (T.pack . show) <$> (try (unsafeGetPage url) :: IO (Either SomeException T.Text))
+
+unsafeGetPage :: URL -> IO T.Text
+unsafeGetPage url = simpleHTTP (getRequest url) >>= (\x -> T.pack <$> getResponseBody x)
mapLeft :: (a -> c) -> Either a b -> Either c b
mapLeft f (Left l) = Left (f l)
diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs
index ab1b0ca..b787772 100644
--- a/src/Parser/Detail.hs
+++ b/src/Parser/Detail.hs
@@ -2,14 +2,23 @@ module Parser.Detail
( parseDetail
) where
+import qualified Data.Text as T
+
import Text.HTML.TagSoup
import Model.Detail
import Parser.Utils
-parseDetail :: String -> Detail
+parseDetail :: T.Text -> Detail
parseDetail page =
let tags = parseTags page
- description = getTagText "<div class=content>" tags
+ descriptionTags = getTagsBetween "<div itemprop=description>" "</div>" tags
+ description =
+ if null descriptionTags
+ then
+ Nothing
+ else
+ let replaceBr = map (\tag -> if tag ~== "<br>" then TagText (T.pack "\n") else tag)
+ in Just . T.strip . renderTags . replaceBr $ descriptionTags
in Detail { description = description }
diff --git a/src/Parser/Resume.hs b/src/Parser/Resume.hs
index bd73912..6cd4415 100644
--- a/src/Parser/Resume.hs
+++ b/src/Parser/Resume.hs
@@ -3,6 +3,7 @@ module Parser.Resume
) where
import Data.Maybe (catMaybes)
+import qualified Data.Text as T
import Text.HTML.TagSoup
@@ -10,7 +11,7 @@ import Model.Resume
import Parser.Utils
-parseResumes :: String -> [Resume]
+parseResumes :: T.Text -> [Resume]
parseResumes page =
case sections (~== "<div class=list-lbc>") (parseTags page) of
[] ->
@@ -19,9 +20,9 @@ parseResumes page =
let lbcTags = takeWhile (~/= "<div id=alertesCartouche>") sectionTags
in catMaybes . fmap parseResume $ partitions (~== "<a>") lbcTags
-parseResume :: [Tag String] -> Maybe Resume
+parseResume :: [Tag T.Text] -> Maybe Resume
parseResume item = do
- name <- getTagText "<h2 class=title>" item
- let price = getTagText "<div class=price>" item
- url <- getTagAttribute "<a>" "href" item
- return Resume { name = name, price = price, url = url }
+ name <- getTagTextAfter "<h2 class=title>" item
+ let price = getTagTextAfter "<div class=price>" item
+ url <- getTagAttribute "<a>" (T.pack "href") item
+ return Resume { name = name, price = price, url = T.unpack url }
diff --git a/src/Parser/Utils.hs b/src/Parser/Utils.hs
index 4864e00..8527777 100644
--- a/src/Parser/Utils.hs
+++ b/src/Parser/Utils.hs
@@ -1,30 +1,35 @@
module Parser.Utils
- ( getTagAttribute
- , getTagText
+ ( getTagsBetween
+ , getTagAttribute
+ , getTagTextAfter
) where
import Data.List (find, findIndex)
import Data.Maybe (listToMaybe)
+import qualified Data.Text as T
import Text.HTML.TagSoup
-getTagAttribute :: String -> String -> [Tag String] -> Maybe String
-getTagAttribute selector attribute item =
- find (~== selector) item >>= maybeTagAttribute attribute
+getTagsBetween :: String -> String -> [Tag T.Text] -> [Tag T.Text]
+getTagsBetween beginSelector endSelector =
+ takeWhile (~/= endSelector)
+ . drop 1
+ . dropWhile (~/= beginSelector)
-getTagText :: String -> [Tag String] -> Maybe String
-getTagText selector item =
- case findIndex (~== selector) item of
- Just index -> fmap trim $ safeGetAt (index + 1) item >>= maybeTagText
+getTagAttribute :: String -> T.Text -> [Tag T.Text] -> Maybe T.Text
+getTagAttribute selector attribute tags =
+ find (~== selector) tags >>= maybeTagAttribute attribute
+
+getTagTextAfter :: String -> [Tag T.Text] -> Maybe T.Text
+getTagTextAfter selector tags =
+ case findIndex (~== selector) tags of
+ Just index -> fmap T.strip $ safeGetAt (index + 1) tags >>= maybeTagText
Nothing -> Nothing
-maybeTagAttribute :: String -> Tag String -> Maybe String
+maybeTagAttribute :: T.Text -> Tag T.Text -> Maybe T.Text
maybeTagAttribute name (TagOpen _ xs) =
fmap snd . find (\(x, _) -> x == name) $ xs
maybeTagAttribute attribute _ = Nothing
-trim :: String -> String
-trim = unwords . words
-
safeGetAt :: Int -> [a] -> Maybe a
safeGetAt index = listToMaybe . drop index
diff --git a/src/View/Ad.hs b/src/View/Ad.hs
index 020fa91..5e408f3 100644
--- a/src/View/Ad.hs
+++ b/src/View/Ad.hs
@@ -1,10 +1,12 @@
+{-# LANGUAGE OverloadedStrings #-}
+
module View.Ad
- ( renderAd
- , renderResume
+ ( renderAds
) where
import Data.List (intersperse)
import Data.Maybe (fromMaybe)
+import qualified Data.Text as T
import Model.Ad (Ad(..))
import qualified Model.Ad as Ad
@@ -13,17 +15,24 @@ import qualified Model.Resume as Resume
import Model.Detail (Detail(..))
import qualified Model.Detail as Detail
-renderAd :: Ad -> String
+renderAds :: [Ad] -> T.Text
+renderAds = T.intercalate "\n\n" . map renderAd
+
+renderAd :: Ad -> T.Text
renderAd ad =
- (renderResume (Ad.resume ad)) ++ "\n\n" ++ (renderDetail (Ad.detail ad)) ++ "\n"
+ T.concat
+ [ renderResume (Ad.resume ad)
+ , "\n\n"
+ , renderDetail (Ad.detail ad)
+ ]
-renderResume :: Resume -> String
+renderResume :: Resume -> T.Text
renderResume resume =
- let formatPrice price = " - " ++ price
+ let formatPrice price = T.concat [" - ", price]
price = fromMaybe "" . fmap formatPrice . Resume.price $ resume
- titleLine = (Resume.name resume) ++ price
- in concat . intersperse "\n" $ [titleLine, Resume.url resume]
+ titleLine = T.concat [Resume.name resume, price]
+ in T.intercalate "\n" [titleLine, T.pack . Resume.url $ resume]
-renderDetail :: Detail -> String
+renderDetail :: Detail -> T.Text
renderDetail detail =
fromMaybe "−" (Detail.description detail)