From 5977e1454d7738ddb086d37b20337e350e380790 Mon Sep 17 00:00:00 2001 From: Joris Guyonvarch Date: Sat, 11 Apr 2015 11:50:48 +0200 Subject: Fetch first page ads of a given leboncoin url, fetch also the description page of each item. --- .gitignore | 4 ++++ gen | 2 ++ leboncoin-listener.cabal | 11 +++++++++++ src/Ad.hs | 33 +++++++++++++++++++++++++++++++++ src/Main.hs | 27 +++++++++++++++++++++++++++ src/Model/Ad.hs | 11 +++++++++++ src/Model/Detail.hs | 7 +++++++ src/Model/Resume.hs | 9 +++++++++ src/Page.hs | 15 +++++++++++++++ src/Parser/Detail.hs | 14 ++++++++++++++ src/Parser/Resume.hs | 27 +++++++++++++++++++++++++++ src/Parser/Utils.hs | 30 ++++++++++++++++++++++++++++++ src/Utils/StringFormat.hs | 6 ++++++ src/View/Ad.hs | 28 ++++++++++++++++++++++++++++ 14 files changed, 224 insertions(+) create mode 100644 .gitignore create mode 100755 gen create mode 100644 leboncoin-listener.cabal create mode 100644 src/Ad.hs create mode 100644 src/Main.hs create mode 100644 src/Model/Ad.hs create mode 100644 src/Model/Detail.hs create mode 100644 src/Model/Resume.hs create mode 100644 src/Page.hs create mode 100644 src/Parser/Detail.hs create mode 100644 src/Parser/Resume.hs create mode 100644 src/Parser/Utils.hs create mode 100644 src/Utils/StringFormat.hs create mode 100644 src/View/Ad.hs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..54228d1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.cabal-sandbox +cabal.sandbox.config +dist +result.html diff --git a/gen b/gen new file mode 100755 index 0000000..5241d5e --- /dev/null +++ b/gen @@ -0,0 +1,2 @@ +#!/bin/bash +cabal build && ./dist/build/leboncoin/leboncoin diff --git a/leboncoin-listener.cabal b/leboncoin-listener.cabal new file mode 100644 index 0000000..b75de6e --- /dev/null +++ b/leboncoin-listener.cabal @@ -0,0 +1,11 @@ +name: leboncoin-listener +version: 0.0.1 +build-type: Simple +cabal-version: >= 1.8 + +executable leboncoin-listener + main-is: Main.hs + hs-source-dirs: src + build-depends: base + , HTTP == 4000.2.19 + , tagsoup == 0.13.3 diff --git a/src/Ad.hs b/src/Ad.hs new file mode 100644 index 0000000..0fa131c --- /dev/null +++ b/src/Ad.hs @@ -0,0 +1,33 @@ +module Ad + ( getAds + ) where + +import Text.HTML.TagSoup (parseTags) + +import Page (getPage) + +import Model.Ad +import Model.Resume +import Model.Detail + +import Parser.Resume +import Parser.Detail + +getAds :: String -> IO (Either String [Ad]) +getAds url = do + eitherPage <- getPage url + case eitherPage of + Left error -> + return (Left error) + Right page -> + getAdsFromPage page + +getAdsFromPage :: String -> IO (Either String [Ad]) +getAdsFromPage page = do + xs <- sequence $ map getAd (parseResumes page) + return $ sequence xs + +getAd :: Resume -> IO (Either String Ad) +getAd resume = do + page <- getPage (url resume) + fmap (\page -> Ad { resume = resume, detail = parseDetail (parseTags page)}) <$> getPage (url resume) diff --git a/src/Main.hs b/src/Main.hs new file mode 100644 index 0000000..f352a43 --- /dev/null +++ b/src/Main.hs @@ -0,0 +1,27 @@ +module Main + ( main + ) where + +import Data.List (intersperse) + +import Ad (getAds) + +import Model.Ad + +import View.Ad (renderAd) + +import Page +import Parser.Detail + +url :: String +url = "http://www.leboncoin.fr/annonces/offres/corse/" + +main :: IO () +main = do + eitherAds <- getAds url + case eitherAds of + Left error -> + putStrLn error + Right ads -> do + writeFile "result.html" (concat . intersperse "\n\n" . map renderAd $ ads) + putStrLn "Done!" diff --git a/src/Model/Ad.hs b/src/Model/Ad.hs new file mode 100644 index 0000000..fe650a6 --- /dev/null +++ b/src/Model/Ad.hs @@ -0,0 +1,11 @@ +module Model.Ad + ( Ad(..) + ) where + +import Model.Resume +import Model.Detail + +data Ad = Ad + { resume :: Resume + , detail :: Detail + } deriving (Eq, Read, Show) diff --git a/src/Model/Detail.hs b/src/Model/Detail.hs new file mode 100644 index 0000000..f00a7eb --- /dev/null +++ b/src/Model/Detail.hs @@ -0,0 +1,7 @@ +module Model.Detail + ( Detail(..) + ) where + +data Detail = Detail + { description :: Maybe String + } deriving (Eq, Read, Show) diff --git a/src/Model/Resume.hs b/src/Model/Resume.hs new file mode 100644 index 0000000..1b73baf --- /dev/null +++ b/src/Model/Resume.hs @@ -0,0 +1,9 @@ +module Model.Resume + ( Resume(..) + ) where + +data Resume = Resume + { name :: String + , price :: Maybe String + , url :: String + } deriving (Eq, Read, Show) diff --git a/src/Page.hs b/src/Page.hs new file mode 100644 index 0000000..b70db70 --- /dev/null +++ b/src/Page.hs @@ -0,0 +1,15 @@ +module Page + ( getPage + ) where + +import Control.Exception (SomeException, try) + +import Network.HTTP (simpleHTTP, getRequest, getResponseBody) + +getPage :: String -> IO (Either String String) +getPage url = + mapLeft show <$> (try (simpleHTTP (getRequest url) >>= getResponseBody) :: IO (Either SomeException String)) + +mapLeft :: (a -> c) -> Either a b -> Either c b +mapLeft f (Left l) = Left (f l) +mapLeft _ (Right r) = (Right r) diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs new file mode 100644 index 0000000..031d740 --- /dev/null +++ b/src/Parser/Detail.hs @@ -0,0 +1,14 @@ +module Parser.Detail + ( parseDetail + ) where + +import Text.HTML.TagSoup + +import Model.Detail + +import Parser.Utils + +parseDetail :: [Tag String] -> Detail +parseDetail tags = + let description = getTagText "