diff options
author | Joris | 2016-07-14 11:57:12 +0000 |
---|---|---|
committer | Joris | 2016-07-14 12:00:05 +0000 |
commit | 69e69017b75d1cdaa1fd2aef2818de5111b29735 (patch) | |
tree | 99dba8f67dc1c55b2cc22f33f81c59c7355b337b /src/Parser/Resume.hs | |
parent | 04f9a66c66ca137d9fee6ccca228c41fec960fe0 (diff) | |
download | ad-listener-69e69017b75d1cdaa1fd2aef2818de5111b29735.tar.gz ad-listener-69e69017b75d1cdaa1fd2aef2818de5111b29735.tar.bz2 ad-listener-69e69017b75d1cdaa1fd2aef2818de5111b29735.zip |
Update code and fix parsers
Diffstat (limited to 'src/Parser/Resume.hs')
-rw-r--r-- | src/Parser/Resume.hs | 20 |
1 files changed, 10 insertions, 10 deletions
diff --git a/src/Parser/Resume.hs b/src/Parser/Resume.hs index 76faca4..f300ec3 100644 --- a/src/Parser/Resume.hs +++ b/src/Parser/Resume.hs @@ -1,5 +1,5 @@ module Parser.Resume - ( parseResumes + ( parse ) where import Data.Maybe (catMaybes) @@ -8,22 +8,22 @@ import qualified Data.Text as T import Text.HTML.TagSoup -import Model.Resume +import Model.Resume (Resume(Resume)) import Parser.Utils -parseResumes :: Text -> [Resume] -parseResumes page = - case sections (~== "<div class=list-lbc>") (parseTags page) of +parse :: Text -> [Resume] +parse page = + case dropWhile (not . hasClass (T.pack "section") (T.pack "tabsContent")) (parseTags page) of [] -> [] - sectionTags : _ -> - let lbcTags = takeWhile (~/= "<div id=alertesCartouche>") sectionTags + sectionTags -> + let lbcTags = takeWhile (not . hasClass (T.pack "div") (T.pack "information-immo")) sectionTags in catMaybes . fmap parseResume $ partitions (~== "<a>") lbcTags parseResume :: [Tag Text] -> Maybe Resume parseResume item = do - name <- getTagTextAfter "<h2 class=title>" item - let price = getTagTextAfter "<div class=price>" item + name <- getTagTextAfter "<h2 class=item_title>" item + let price = getTagTextAfter "<h3 class=item_price>" item url <- getTagAttribute "<a>" (T.pack "href") item - return Resume { name = name, price = price, url = url } + return (Resume name price (T.concat [T.pack "https:", url])) |