From 69e69017b75d1cdaa1fd2aef2818de5111b29735 Mon Sep 17 00:00:00 2001 From: Joris Date: Thu, 14 Jul 2016 11:57:12 +0000 Subject: Update code and fix parsers --- src/Parser/Detail.hs | 12 ++++++------ src/Parser/Resume.hs | 20 ++++++++++---------- src/Parser/Utils.hs | 14 ++++++++++++-- 3 files changed, 28 insertions(+), 18 deletions(-) (limited to 'src/Parser') diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs index 3f424e9..2418d07 100644 --- a/src/Parser/Detail.hs +++ b/src/Parser/Detail.hs @@ -1,5 +1,5 @@ module Parser.Detail - ( parseDetail + ( parse ) where import Data.Text (Text) @@ -16,18 +16,18 @@ import Model.Detail import Parser.Utils -parseDetail :: Text -> Detail -parseDetail page = +parse :: Text -> Detail +parse page = let tags = parseTags page in Detail { description = parseDescription tags - , images = getTagAttributes "" (T.pack "content") tags + , images = map (\url -> T.concat [T.pack "https:", url]) $ getTagAttributes "" (T.pack "content") tags , properties = parseProperties tags } parseDescription :: [Tag Text] -> Maybe Text parseDescription tags = - let descriptionTags = getTagsBetween "
" "
" tags + let descriptionTags = getTagsBetween "

" "

" tags in if null descriptionTags then Nothing @@ -37,7 +37,7 @@ parseDescription tags = parseProperties :: [Tag Text] -> Map Text Text parseProperties tags = - let mbUtagData = getTagTextAfter "" . getTagsAfter "" $ tags in fromMaybe M.empty (fmap parseUtagData mbUtagData) parseUtagData :: Text -> Map Text Text diff --git a/src/Parser/Resume.hs b/src/Parser/Resume.hs index 76faca4..f300ec3 100644 --- a/src/Parser/Resume.hs +++ b/src/Parser/Resume.hs @@ -1,5 +1,5 @@ module Parser.Resume - ( parseResumes + ( parse ) where import Data.Maybe (catMaybes) @@ -8,22 +8,22 @@ import qualified Data.Text as T import Text.HTML.TagSoup -import Model.Resume +import Model.Resume (Resume(Resume)) import Parser.Utils -parseResumes :: Text -> [Resume] -parseResumes page = - case sections (~== "
") (parseTags page) of +parse :: Text -> [Resume] +parse page = + case dropWhile (not . hasClass (T.pack "section") (T.pack "tabsContent")) (parseTags page) of [] -> [] - sectionTags : _ -> - let lbcTags = takeWhile (~/= "
") sectionTags + sectionTags -> + let lbcTags = takeWhile (not . hasClass (T.pack "div") (T.pack "information-immo")) sectionTags in catMaybes . fmap parseResume $ partitions (~== "") lbcTags parseResume :: [Tag Text] -> Maybe Resume parseResume item = do - name <- getTagTextAfter "

" item - let price = getTagTextAfter "