From 5977e1454d7738ddb086d37b20337e350e380790 Mon Sep 17 00:00:00 2001 From: Joris Guyonvarch Date: Sat, 11 Apr 2015 11:50:48 +0200 Subject: Fetch first page ads of a given leboncoin url, fetch also the description page of each item. --- src/Parser/Resume.hs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 src/Parser/Resume.hs (limited to 'src/Parser/Resume.hs') diff --git a/src/Parser/Resume.hs b/src/Parser/Resume.hs new file mode 100644 index 0000000..bd73912 --- /dev/null +++ b/src/Parser/Resume.hs @@ -0,0 +1,27 @@ +module Parser.Resume + ( parseResumes + ) where + +import Data.Maybe (catMaybes) + +import Text.HTML.TagSoup + +import Model.Resume + +import Parser.Utils + +parseResumes :: String -> [Resume] +parseResumes page = + case sections (~== "
") (parseTags page) of + [] -> + [] + sectionTags : _ -> + let lbcTags = takeWhile (~/= "
") sectionTags + in catMaybes . fmap parseResume $ partitions (~== "") lbcTags + +parseResume :: [Tag String] -> Maybe Resume +parseResume item = do + name <- getTagText "

" item + let price = getTagText "