From 5977e1454d7738ddb086d37b20337e350e380790 Mon Sep 17 00:00:00 2001 From: Joris Guyonvarch Date: Sat, 11 Apr 2015 11:50:48 +0200 Subject: Fetch first page ads of a given leboncoin url, fetch also the description page of each item. --- src/Parser/Detail.hs | 14 ++++++++++++++ src/Parser/Resume.hs | 27 +++++++++++++++++++++++++++ src/Parser/Utils.hs | 30 ++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+) create mode 100644 src/Parser/Detail.hs create mode 100644 src/Parser/Resume.hs create mode 100644 src/Parser/Utils.hs (limited to 'src/Parser') diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs new file mode 100644 index 0000000..031d740 --- /dev/null +++ b/src/Parser/Detail.hs @@ -0,0 +1,14 @@ +module Parser.Detail + ( parseDetail + ) where + +import Text.HTML.TagSoup + +import Model.Detail + +import Parser.Utils + +parseDetail :: [Tag String] -> Detail +parseDetail tags = + let description = getTagText "
" tags + in Detail { description = description } diff --git a/src/Parser/Resume.hs b/src/Parser/Resume.hs new file mode 100644 index 0000000..bd73912 --- /dev/null +++ b/src/Parser/Resume.hs @@ -0,0 +1,27 @@ +module Parser.Resume + ( parseResumes + ) where + +import Data.Maybe (catMaybes) + +import Text.HTML.TagSoup + +import Model.Resume + +import Parser.Utils + +parseResumes :: String -> [Resume] +parseResumes page = + case sections (~== "
") (parseTags page) of + [] -> + [] + sectionTags : _ -> + let lbcTags = takeWhile (~/= "