From 4ddd6d1f6df2bab75d42b6d45b816e92e7173529 Mon Sep 17 00:00:00 2001 From: Joris Guyonvarch Date: Sat, 11 Apr 2015 19:20:04 +0200 Subject: Fixing parsing errors, and use Text from now --- src/Ad.hs | 24 +++++-------- src/Main.hs | 95 +++++++++++++++++++++++++++++++--------------------- src/Model/Detail.hs | 4 ++- src/Model/Resume.hs | 10 +++--- src/Page.hs | 11 ++++-- src/Parser/Detail.hs | 13 +++++-- src/Parser/Resume.hs | 13 +++---- src/Parser/Utils.hs | 31 ++++++++++------- src/View/Ad.hs | 27 ++++++++++----- 9 files changed, 135 insertions(+), 93 deletions(-) (limited to 'src') diff --git a/src/Ad.hs b/src/Ad.hs index 6cd1d8a..5f3a9f1 100644 --- a/src/Ad.hs +++ b/src/Ad.hs @@ -1,9 +1,10 @@ module Ad - ( getAds - , getResumes + ( getResumes + , getAds ) where import Page (getPage) +import qualified Data.Text as T import Model.Ad import Model.Resume @@ -13,24 +14,15 @@ import Model.URL import Parser.Resume import Parser.Detail -getResumes :: URL -> IO (Either String [Resume]) +getResumes :: URL -> IO (Either T.Text [Resume]) getResumes url = fmap parseResumes <$> getPage url -getAds :: URL -> IO (Either String [Ad]) -getAds url = do - eitherPage <- getPage url - case eitherPage of - Left error -> - return (Left error) - Right page -> - getAdsFromPage page - -getAdsFromPage :: String -> IO (Either String [Ad]) -getAdsFromPage page = do - xs <- sequence $ map getAd (parseResumes page) +getAds :: [Resume] -> IO (Either T.Text [Ad]) +getAds resumes = do + xs <- sequence $ map getAd resumes return $ sequence xs -getAd :: Resume -> IO (Either String Ad) +getAd :: Resume -> IO (Either T.Text Ad) getAd resume = do page <- getPage (url resume) fmap (\page -> Ad { resume = resume, detail = parseDetail page}) <$> getPage (url resume) diff --git a/src/Main.hs b/src/Main.hs index f9cd7f0..6208953 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,64 +1,81 @@ +{-# LANGUAGE OverloadedStrings #-} + module Main ( main ) where import Data.List (intersperse) +import qualified Data.Text as T +import qualified Data.Text.IO as T import Control.Concurrent (threadDelay) -import Ad (getResumes) +import Ad (getResumes, getAds) import Model.Ad import Model.URL -import Model.Resume (getURLs, getNewResumes) +import Model.Resume -import View.Ad (renderResume) +import View.Ad (renderAds) import Page import Parser.Detail -url :: URL -url = "http://www.leboncoin.fr/annonces/offres/corse/" +listenURL :: URL +listenURL = "http://www.leboncoin.fr/annonces/offres/corse/" main :: IO () -main = initListenToNewResumes +main = listenToNewAds [] -initListenToNewResumes :: IO () -initListenToNewResumes = do - eitherResumes <- getResumes url +listenToNewAds :: [Ad] -> IO () +listenToNewAds viewedAds = do + eitherResumes <- getResumes listenURL case eitherResumes of - Left error -> do - putStrLn error - waitOneMinute - initListenToNewResumes + Left error -> + listenError viewedAds error Right resumes -> - let viewedURLs = getURLs resumes - in do - putStrLn "Initialization complete" - waitOneMinute - listenToNewResumes viewedURLs + listenToNewAdsWithResumes viewedAds resumes -listenToNewResumes :: [URL] -> IO () -listenToNewResumes viewedURLs = do - eitherResumes <- getResumes url - case eitherResumes of - Left error -> do - putStrLn error - waitOneMinute - listenToNewResumes viewedURLs - Right resumes -> - let (newViewdURLs, newResumes) = getNewResumes viewedURLs resumes - newAdsCount = length newResumes - in do - if newAdsCount > 0 - then - do - putStrLn ("Got " ++ (show newAdsCount) ++ " new ads.\n") - putStrLn (concat . intersperse "\n\n" . map renderResume $ newResumes) - else - return () - waitOneMinute - listenToNewResumes newViewdURLs +listenToNewAdsWithResumes :: [Ad] -> [Resume] -> IO () +listenToNewAdsWithResumes viewedAds resumes = + let viewedURLs = getURLs $ map resume viewedAds + newResumes = getNewResumes viewedURLs resumes + in do + eitherNewAds <- getAds newResumes + case eitherNewAds of + Left error -> + listenError viewedAds error + Right newAds -> + do + if not (null newAds) + then + T.putStrLn (newAdsMessage newAds) + else + return () + waitOneMinute + listenToNewAds (viewedAds ++ newAds) + +newAdsMessage :: [Ad] -> T.Text +newAdsMessage newAds = + let newAdsMessage = + T.concat + [ "Got " + , T.pack . show . length $ newAds + , " new ads." + ] + line = T.map (\_ -> '-') newAdsMessage + in T.intercalate + "\n" + [ newAdsMessage + , T.concat [line, "\n"] + , renderAds newAds + ] + +listenError :: [Ad] -> T.Text -> IO () +listenError viewedAds error = do + T.putStrLn error + waitOneMinute + listenToNewAds viewedAds waitOneMinute :: IO () waitOneMinute = threadDelay (1000 * 1000 * 60) diff --git a/src/Model/Detail.hs b/src/Model/Detail.hs index f00a7eb..684a718 100644 --- a/src/Model/Detail.hs +++ b/src/Model/Detail.hs @@ -2,6 +2,8 @@ module Model.Detail ( Detail(..) ) where +import qualified Data.Text as T + data Detail = Detail - { description :: Maybe String + { description :: Maybe T.Text } deriving (Eq, Read, Show) diff --git a/src/Model/Resume.hs b/src/Model/Resume.hs index f4e9cd5..9d966df 100644 --- a/src/Model/Resume.hs +++ b/src/Model/Resume.hs @@ -5,20 +5,20 @@ module Model.Resume ) where import Data.List ((\\)) +import qualified Data.Text as T import Model.URL data Resume = Resume - { name :: String - , price :: Maybe String + { name :: T.Text + , price :: Maybe T.Text , url :: URL } deriving (Eq, Read, Show) -getNewResumes :: [URL] -> [Resume] -> ([URL], [Resume]) +getNewResumes :: [URL] -> [Resume] -> [Resume] getNewResumes viewdURLs resumes = let newURLs = (getURLs resumes) \\ viewdURLs - newResumes = filter (\resume -> elem (url resume) newURLs) resumes - in (viewdURLs ++ newURLs, newResumes) + in filter (\resume -> elem (url resume) newURLs) resumes getURLs :: [Resume] -> [URL] getURLs = map url diff --git a/src/Page.hs b/src/Page.hs index b70db70..b048410 100644 --- a/src/Page.hs +++ b/src/Page.hs @@ -4,11 +4,18 @@ module Page import Control.Exception (SomeException, try) +import qualified Data.Text as T + import Network.HTTP (simpleHTTP, getRequest, getResponseBody) -getPage :: String -> IO (Either String String) +import Model.URL + +getPage :: URL -> IO (Either T.Text T.Text) getPage url = - mapLeft show <$> (try (simpleHTTP (getRequest url) >>= getResponseBody) :: IO (Either SomeException String)) + mapLeft (T.pack . show) <$> (try (unsafeGetPage url) :: IO (Either SomeException T.Text)) + +unsafeGetPage :: URL -> IO T.Text +unsafeGetPage url = simpleHTTP (getRequest url) >>= (\x -> T.pack <$> getResponseBody x) mapLeft :: (a -> c) -> Either a b -> Either c b mapLeft f (Left l) = Left (f l) diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs index ab1b0ca..b787772 100644 --- a/src/Parser/Detail.hs +++ b/src/Parser/Detail.hs @@ -2,14 +2,23 @@ module Parser.Detail ( parseDetail ) where +import qualified Data.Text as T + import Text.HTML.TagSoup import Model.Detail import Parser.Utils -parseDetail :: String -> Detail +parseDetail :: T.Text -> Detail parseDetail page = let tags = parseTags page - description = getTagText "
" tags + descriptionTags = getTagsBetween "
" "
" tags + description = + if null descriptionTags + then + Nothing + else + let replaceBr = map (\tag -> if tag ~== "
" then TagText (T.pack "\n") else tag) + in Just . T.strip . renderTags . replaceBr $ descriptionTags in Detail { description = description } diff --git a/src/Parser/Resume.hs b/src/Parser/Resume.hs index bd73912..6cd4415 100644 --- a/src/Parser/Resume.hs +++ b/src/Parser/Resume.hs @@ -3,6 +3,7 @@ module Parser.Resume ) where import Data.Maybe (catMaybes) +import qualified Data.Text as T import Text.HTML.TagSoup @@ -10,7 +11,7 @@ import Model.Resume import Parser.Utils -parseResumes :: String -> [Resume] +parseResumes :: T.Text -> [Resume] parseResumes page = case sections (~== "
") (parseTags page) of [] -> @@ -19,9 +20,9 @@ parseResumes page = let lbcTags = takeWhile (~/= "
") sectionTags in catMaybes . fmap parseResume $ partitions (~== "") lbcTags -parseResume :: [Tag String] -> Maybe Resume +parseResume :: [Tag T.Text] -> Maybe Resume parseResume item = do - name <- getTagText "

" item - let price = getTagText "
" item - url <- getTagAttribute "" "href" item - return Resume { name = name, price = price, url = url } + name <- getTagTextAfter "

" item + let price = getTagTextAfter "
" item + url <- getTagAttribute "" (T.pack "href") item + return Resume { name = name, price = price, url = T.unpack url } diff --git a/src/Parser/Utils.hs b/src/Parser/Utils.hs index 4864e00..8527777 100644 --- a/src/Parser/Utils.hs +++ b/src/Parser/Utils.hs @@ -1,30 +1,35 @@ module Parser.Utils - ( getTagAttribute - , getTagText + ( getTagsBetween + , getTagAttribute + , getTagTextAfter ) where import Data.List (find, findIndex) import Data.Maybe (listToMaybe) +import qualified Data.Text as T import Text.HTML.TagSoup -getTagAttribute :: String -> String -> [Tag String] -> Maybe String -getTagAttribute selector attribute item = - find (~== selector) item >>= maybeTagAttribute attribute +getTagsBetween :: String -> String -> [Tag T.Text] -> [Tag T.Text] +getTagsBetween beginSelector endSelector = + takeWhile (~/= endSelector) + . drop 1 + . dropWhile (~/= beginSelector) -getTagText :: String -> [Tag String] -> Maybe String -getTagText selector item = - case findIndex (~== selector) item of - Just index -> fmap trim $ safeGetAt (index + 1) item >>= maybeTagText +getTagAttribute :: String -> T.Text -> [Tag T.Text] -> Maybe T.Text +getTagAttribute selector attribute tags = + find (~== selector) tags >>= maybeTagAttribute attribute + +getTagTextAfter :: String -> [Tag T.Text] -> Maybe T.Text +getTagTextAfter selector tags = + case findIndex (~== selector) tags of + Just index -> fmap T.strip $ safeGetAt (index + 1) tags >>= maybeTagText Nothing -> Nothing -maybeTagAttribute :: String -> Tag String -> Maybe String +maybeTagAttribute :: T.Text -> Tag T.Text -> Maybe T.Text maybeTagAttribute name (TagOpen _ xs) = fmap snd . find (\(x, _) -> x == name) $ xs maybeTagAttribute attribute _ = Nothing -trim :: String -> String -trim = unwords . words - safeGetAt :: Int -> [a] -> Maybe a safeGetAt index = listToMaybe . drop index diff --git a/src/View/Ad.hs b/src/View/Ad.hs index 020fa91..5e408f3 100644 --- a/src/View/Ad.hs +++ b/src/View/Ad.hs @@ -1,10 +1,12 @@ +{-# LANGUAGE OverloadedStrings #-} + module View.Ad - ( renderAd - , renderResume + ( renderAds ) where import Data.List (intersperse) import Data.Maybe (fromMaybe) +import qualified Data.Text as T import Model.Ad (Ad(..)) import qualified Model.Ad as Ad @@ -13,17 +15,24 @@ import qualified Model.Resume as Resume import Model.Detail (Detail(..)) import qualified Model.Detail as Detail -renderAd :: Ad -> String +renderAds :: [Ad] -> T.Text +renderAds = T.intercalate "\n\n" . map renderAd + +renderAd :: Ad -> T.Text renderAd ad = - (renderResume (Ad.resume ad)) ++ "\n\n" ++ (renderDetail (Ad.detail ad)) ++ "\n" + T.concat + [ renderResume (Ad.resume ad) + , "\n\n" + , renderDetail (Ad.detail ad) + ] -renderResume :: Resume -> String +renderResume :: Resume -> T.Text renderResume resume = - let formatPrice price = " - " ++ price + let formatPrice price = T.concat [" - ", price] price = fromMaybe "" . fmap formatPrice . Resume.price $ resume - titleLine = (Resume.name resume) ++ price - in concat . intersperse "\n" $ [titleLine, Resume.url resume] + titleLine = T.concat [Resume.name resume, price] + in T.intercalate "\n" [titleLine, T.pack . Resume.url $ resume] -renderDetail :: Detail -> String +renderDetail :: Detail -> T.Text renderDetail detail = fromMaybe "−" (Detail.description detail) -- cgit v1.2.3