aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJoris Guyonvarch2015-04-11 11:50:48 +0200
committerJoris Guyonvarch2015-04-11 12:03:22 +0200
commit5977e1454d7738ddb086d37b20337e350e380790 (patch)
treee03261144e3d534434242c1dd037c2a4e4db5a9f /src
Fetch first page ads of a given leboncoin url, fetch also the description page of each item.
Diffstat (limited to 'src')
-rw-r--r--src/Ad.hs33
-rw-r--r--src/Main.hs27
-rw-r--r--src/Model/Ad.hs11
-rw-r--r--src/Model/Detail.hs7
-rw-r--r--src/Model/Resume.hs9
-rw-r--r--src/Page.hs15
-rw-r--r--src/Parser/Detail.hs14
-rw-r--r--src/Parser/Resume.hs27
-rw-r--r--src/Parser/Utils.hs30
-rw-r--r--src/Utils/StringFormat.hs6
-rw-r--r--src/View/Ad.hs28
11 files changed, 207 insertions, 0 deletions
diff --git a/src/Ad.hs b/src/Ad.hs
new file mode 100644
index 0000000..0fa131c
--- /dev/null
+++ b/src/Ad.hs
@@ -0,0 +1,33 @@
+module Ad
+ ( getAds
+ ) where
+
+import Text.HTML.TagSoup (parseTags)
+
+import Page (getPage)
+
+import Model.Ad
+import Model.Resume
+import Model.Detail
+
+import Parser.Resume
+import Parser.Detail
+
+getAds :: String -> IO (Either String [Ad])
+getAds url = do
+ eitherPage <- getPage url
+ case eitherPage of
+ Left error ->
+ return (Left error)
+ Right page ->
+ getAdsFromPage page
+
+getAdsFromPage :: String -> IO (Either String [Ad])
+getAdsFromPage page = do
+ xs <- sequence $ map getAd (parseResumes page)
+ return $ sequence xs
+
+getAd :: Resume -> IO (Either String Ad)
+getAd resume = do
+ page <- getPage (url resume)
+ fmap (\page -> Ad { resume = resume, detail = parseDetail (parseTags page)}) <$> getPage (url resume)
diff --git a/src/Main.hs b/src/Main.hs
new file mode 100644
index 0000000..f352a43
--- /dev/null
+++ b/src/Main.hs
@@ -0,0 +1,27 @@
+module Main
+ ( main
+ ) where
+
+import Data.List (intersperse)
+
+import Ad (getAds)
+
+import Model.Ad
+
+import View.Ad (renderAd)
+
+import Page
+import Parser.Detail
+
+url :: String
+url = "http://www.leboncoin.fr/annonces/offres/corse/"
+
+main :: IO ()
+main = do
+ eitherAds <- getAds url
+ case eitherAds of
+ Left error ->
+ putStrLn error
+ Right ads -> do
+ writeFile "result.html" (concat . intersperse "\n\n" . map renderAd $ ads)
+ putStrLn "Done!"
diff --git a/src/Model/Ad.hs b/src/Model/Ad.hs
new file mode 100644
index 0000000..fe650a6
--- /dev/null
+++ b/src/Model/Ad.hs
@@ -0,0 +1,11 @@
+module Model.Ad
+ ( Ad(..)
+ ) where
+
+import Model.Resume
+import Model.Detail
+
+data Ad = Ad
+ { resume :: Resume
+ , detail :: Detail
+ } deriving (Eq, Read, Show)
diff --git a/src/Model/Detail.hs b/src/Model/Detail.hs
new file mode 100644
index 0000000..f00a7eb
--- /dev/null
+++ b/src/Model/Detail.hs
@@ -0,0 +1,7 @@
+module Model.Detail
+ ( Detail(..)
+ ) where
+
+data Detail = Detail
+ { description :: Maybe String
+ } deriving (Eq, Read, Show)
diff --git a/src/Model/Resume.hs b/src/Model/Resume.hs
new file mode 100644
index 0000000..1b73baf
--- /dev/null
+++ b/src/Model/Resume.hs
@@ -0,0 +1,9 @@
+module Model.Resume
+ ( Resume(..)
+ ) where
+
+data Resume = Resume
+ { name :: String
+ , price :: Maybe String
+ , url :: String
+ } deriving (Eq, Read, Show)
diff --git a/src/Page.hs b/src/Page.hs
new file mode 100644
index 0000000..b70db70
--- /dev/null
+++ b/src/Page.hs
@@ -0,0 +1,15 @@
+module Page
+ ( getPage
+ ) where
+
+import Control.Exception (SomeException, try)
+
+import Network.HTTP (simpleHTTP, getRequest, getResponseBody)
+
+getPage :: String -> IO (Either String String)
+getPage url =
+ mapLeft show <$> (try (simpleHTTP (getRequest url) >>= getResponseBody) :: IO (Either SomeException String))
+
+mapLeft :: (a -> c) -> Either a b -> Either c b
+mapLeft f (Left l) = Left (f l)
+mapLeft _ (Right r) = (Right r)
diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs
new file mode 100644
index 0000000..031d740
--- /dev/null
+++ b/src/Parser/Detail.hs
@@ -0,0 +1,14 @@
+module Parser.Detail
+ ( parseDetail
+ ) where
+
+import Text.HTML.TagSoup
+
+import Model.Detail
+
+import Parser.Utils
+
+parseDetail :: [Tag String] -> Detail
+parseDetail tags =
+ let description = getTagText "<div class=content>" tags
+ in Detail { description = description }
diff --git a/src/Parser/Resume.hs b/src/Parser/Resume.hs
new file mode 100644
index 0000000..bd73912
--- /dev/null
+++ b/src/Parser/Resume.hs
@@ -0,0 +1,27 @@
+module Parser.Resume
+ ( parseResumes
+ ) where
+
+import Data.Maybe (catMaybes)
+
+import Text.HTML.TagSoup
+
+import Model.Resume
+
+import Parser.Utils
+
+parseResumes :: String -> [Resume]
+parseResumes page =
+ case sections (~== "<div class=list-lbc>") (parseTags page) of
+ [] ->
+ []
+ sectionTags : _ ->
+ let lbcTags = takeWhile (~/= "<div id=alertesCartouche>") sectionTags
+ in catMaybes . fmap parseResume $ partitions (~== "<a>") lbcTags
+
+parseResume :: [Tag String] -> Maybe Resume
+parseResume item = do
+ name <- getTagText "<h2 class=title>" item
+ let price = getTagText "<div class=price>" item
+ url <- getTagAttribute "<a>" "href" item
+ return Resume { name = name, price = price, url = url }
diff --git a/src/Parser/Utils.hs b/src/Parser/Utils.hs
new file mode 100644
index 0000000..4864e00
--- /dev/null
+++ b/src/Parser/Utils.hs
@@ -0,0 +1,30 @@
+module Parser.Utils
+ ( getTagAttribute
+ , getTagText
+ ) where
+
+import Data.List (find, findIndex)
+import Data.Maybe (listToMaybe)
+
+import Text.HTML.TagSoup
+
+getTagAttribute :: String -> String -> [Tag String] -> Maybe String
+getTagAttribute selector attribute item =
+ find (~== selector) item >>= maybeTagAttribute attribute
+
+getTagText :: String -> [Tag String] -> Maybe String
+getTagText selector item =
+ case findIndex (~== selector) item of
+ Just index -> fmap trim $ safeGetAt (index + 1) item >>= maybeTagText
+ Nothing -> Nothing
+
+maybeTagAttribute :: String -> Tag String -> Maybe String
+maybeTagAttribute name (TagOpen _ xs) =
+ fmap snd . find (\(x, _) -> x == name) $ xs
+maybeTagAttribute attribute _ = Nothing
+
+trim :: String -> String
+trim = unwords . words
+
+safeGetAt :: Int -> [a] -> Maybe a
+safeGetAt index = listToMaybe . drop index
diff --git a/src/Utils/StringFormat.hs b/src/Utils/StringFormat.hs
new file mode 100644
index 0000000..757f0e6
--- /dev/null
+++ b/src/Utils/StringFormat.hs
@@ -0,0 +1,6 @@
+module Utils.StringFormat
+ ( formatParagraph
+ ) where
+
+formatParagraph : String -> String
+formatParagraph = id
diff --git a/src/View/Ad.hs b/src/View/Ad.hs
new file mode 100644
index 0000000..445ef3a
--- /dev/null
+++ b/src/View/Ad.hs
@@ -0,0 +1,28 @@
+module View.Ad
+ ( renderAd
+ ) where
+
+import Data.List (intersperse)
+import Data.Maybe (fromMaybe)
+
+import Model.Ad (Ad(..))
+import qualified Model.Ad as Ad
+import Model.Resume (Resume(..))
+import qualified Model.Resume as Resume
+import Model.Detail (Detail(..))
+import qualified Model.Detail as Detail
+
+renderAd :: Ad -> String
+renderAd ad =
+ (renderResume (Ad.resume ad)) ++ "\n\n" ++ (renderDetail (Ad.detail ad)) ++ "\n"
+
+renderResume :: Resume -> String
+renderResume resume =
+ let formatPrice price = " - " ++ price
+ price = fromMaybe "" . fmap formatPrice . Resume.price $ resume
+ titleLine = (Resume.name resume) ++ price
+ in concat . intersperse "\n" $ [titleLine, Resume.url resume]
+
+renderDetail :: Detail -> String
+renderDetail detail =
+ fromMaybe "−" (Detail.description detail)