From 9f389a05cc883213327b8d17db6d23c3ff8fb4e1 Mon Sep 17 00:00:00 2001 From: Joris Guyonvarch Date: Sat, 11 Apr 2015 14:06:14 +0200 Subject: Set up a listener that diff new ads and show only the new ones --- .gitignore | 1 - gen | 2 +- src/Ad.hs | 11 +++++++---- src/Main.hs | 56 ++++++++++++++++++++++++++++++++++++++++++---------- src/Model/Resume.hs | 17 +++++++++++++++- src/Model/URL.hs | 5 +++++ src/Parser/Detail.hs | 7 ++++--- src/View/Ad.hs | 1 + 8 files changed, 80 insertions(+), 20 deletions(-) create mode 100644 src/Model/URL.hs diff --git a/.gitignore b/.gitignore index 54228d1..cef4194 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ .cabal-sandbox cabal.sandbox.config dist -result.html diff --git a/gen b/gen index 5241d5e..73ad9f6 100755 --- a/gen +++ b/gen @@ -1,2 +1,2 @@ #!/bin/bash -cabal build && ./dist/build/leboncoin/leboncoin +cabal build && ./dist/build/leboncoin-listener/leboncoin-listener diff --git a/src/Ad.hs b/src/Ad.hs index 0fa131c..6cd1d8a 100644 --- a/src/Ad.hs +++ b/src/Ad.hs @@ -1,19 +1,22 @@ module Ad ( getAds + , getResumes ) where -import Text.HTML.TagSoup (parseTags) - import Page (getPage) import Model.Ad import Model.Resume import Model.Detail +import Model.URL import Parser.Resume import Parser.Detail -getAds :: String -> IO (Either String [Ad]) +getResumes :: URL -> IO (Either String [Resume]) +getResumes url = fmap parseResumes <$> getPage url + +getAds :: URL -> IO (Either String [Ad]) getAds url = do eitherPage <- getPage url case eitherPage of @@ -30,4 +33,4 @@ getAdsFromPage page = do getAd :: Resume -> IO (Either String Ad) getAd resume = do page <- getPage (url resume) - fmap (\page -> Ad { resume = resume, detail = parseDetail (parseTags page)}) <$> getPage (url resume) + fmap (\page -> Ad { resume = resume, detail = parseDetail page}) <$> getPage (url resume) diff --git a/src/Main.hs b/src/Main.hs index f352a43..3f2dd37 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -4,24 +4,60 @@ module Main import Data.List (intersperse) -import Ad (getAds) +import Control.Concurrent (threadDelay) + +import Ad (getResumes) import Model.Ad +import Model.URL +import Model.Resume (getURLs, getNewResumes) -import View.Ad (renderAd) +import View.Ad (renderResume) import Page import Parser.Detail -url :: String +url :: URL url = "http://www.leboncoin.fr/annonces/offres/corse/" main :: IO () -main = do - eitherAds <- getAds url - case eitherAds of - Left error -> +main = initListenToNewResumes + +initListenToNewResumes :: IO () +initListenToNewResumes = do + eitherResumes <- getResumes url + case eitherResumes of + Left error -> do putStrLn error - Right ads -> do - writeFile "result.html" (concat . intersperse "\n\n" . map renderAd $ ads) - putStrLn "Done!" + waitOneMinute + initListenToNewResumes + Right resumes -> + let viewedURLs = getURLs resumes + in do + putStrLn "Initialization complete" + waitOneMinute + listenToNewResumes viewedURLs + +listenToNewResumes :: [URL] -> IO () +listenToNewResumes viewedURLs = do + eitherResumes <- getResumes url + case eitherResumes of + Left error -> do + putStrLn error + waitOneMinute + listenToNewResumes viewedURLs + Right resumes -> + let (newViewdURLs, newResumes) = getNewResumes viewedURLs resumes + newAdsCount = length newResumes + in do + if newAdsCount > 0 + then do + putStrLn ("Got " ++ (show newAdsCount) ++ " new ads.\n") + putStrLn (concat . intersperse "\n\n" . map renderResume $ newResumes) + else + return () + waitOneMinute + listenToNewResumes newViewdURLs + +waitOneMinute :: IO () +waitOneMinute = threadDelay (1000 * 1000 * 60) diff --git a/src/Model/Resume.hs b/src/Model/Resume.hs index 1b73baf..f4e9cd5 100644 --- a/src/Model/Resume.hs +++ b/src/Model/Resume.hs @@ -1,9 +1,24 @@ module Model.Resume ( Resume(..) + , getNewResumes + , getURLs ) where +import Data.List ((\\)) + +import Model.URL + data Resume = Resume { name :: String , price :: Maybe String - , url :: String + , url :: URL } deriving (Eq, Read, Show) + +getNewResumes :: [URL] -> [Resume] -> ([URL], [Resume]) +getNewResumes viewdURLs resumes = + let newURLs = (getURLs resumes) \\ viewdURLs + newResumes = filter (\resume -> elem (url resume) newURLs) resumes + in (viewdURLs ++ newURLs, newResumes) + +getURLs :: [Resume] -> [URL] +getURLs = map url diff --git a/src/Model/URL.hs b/src/Model/URL.hs new file mode 100644 index 0000000..91cf22a --- /dev/null +++ b/src/Model/URL.hs @@ -0,0 +1,5 @@ +module Model.URL + ( URL + ) where + +type URL = String diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs index 031d740..ab1b0ca 100644 --- a/src/Parser/Detail.hs +++ b/src/Parser/Detail.hs @@ -8,7 +8,8 @@ import Model.Detail import Parser.Utils -parseDetail :: [Tag String] -> Detail -parseDetail tags = - let description = getTagText "
" tags +parseDetail :: String -> Detail +parseDetail page = + let tags = parseTags page + description = getTagText "
" tags in Detail { description = description } diff --git a/src/View/Ad.hs b/src/View/Ad.hs index 445ef3a..020fa91 100644 --- a/src/View/Ad.hs +++ b/src/View/Ad.hs @@ -1,5 +1,6 @@ module View.Ad ( renderAd + , renderResume ) where import Data.List (intersperse) -- cgit v1.2.3