diff options
author | Joris Guyonvarch | 2015-04-11 19:20:04 +0200 |
---|---|---|
committer | Joris Guyonvarch | 2015-04-11 19:20:04 +0200 |
commit | 4ddd6d1f6df2bab75d42b6d45b816e92e7173529 (patch) | |
tree | 09ddfd62fe32e210a87eb15eb86ee07ab9f24623 /src/Parser | |
parent | 88b7f848da3515d67cfb989b98ad5285a037993e (diff) |
Fixing parsing errors, and use Text from now
Diffstat (limited to 'src/Parser')
-rw-r--r-- | src/Parser/Detail.hs | 13 | ||||
-rw-r--r-- | src/Parser/Resume.hs | 13 | ||||
-rw-r--r-- | src/Parser/Utils.hs | 31 |
3 files changed, 36 insertions, 21 deletions
diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs index ab1b0ca..b787772 100644 --- a/src/Parser/Detail.hs +++ b/src/Parser/Detail.hs @@ -2,14 +2,23 @@ module Parser.Detail ( parseDetail ) where +import qualified Data.Text as T + import Text.HTML.TagSoup import Model.Detail import Parser.Utils -parseDetail :: String -> Detail +parseDetail :: T.Text -> Detail parseDetail page = let tags = parseTags page - description = getTagText "<div class=content>" tags + descriptionTags = getTagsBetween "<div itemprop=description>" "</div>" tags + description = + if null descriptionTags + then + Nothing + else + let replaceBr = map (\tag -> if tag ~== "<br>" then TagText (T.pack "\n") else tag) + in Just . T.strip . renderTags . replaceBr $ descriptionTags in Detail { description = description } diff --git a/src/Parser/Resume.hs b/src/Parser/Resume.hs index bd73912..6cd4415 100644 --- a/src/Parser/Resume.hs +++ b/src/Parser/Resume.hs @@ -3,6 +3,7 @@ module Parser.Resume ) where import Data.Maybe (catMaybes) +import qualified Data.Text as T import Text.HTML.TagSoup @@ -10,7 +11,7 @@ import Model.Resume import Parser.Utils -parseResumes :: String -> [Resume] +parseResumes :: T.Text -> [Resume] parseResumes page = case sections (~== "<div class=list-lbc>") (parseTags page) of [] -> @@ -19,9 +20,9 @@ parseResumes page = let lbcTags = takeWhile (~/= "<div id=alertesCartouche>") sectionTags in catMaybes . fmap parseResume $ partitions (~== "<a>") lbcTags -parseResume :: [Tag String] -> Maybe Resume +parseResume :: [Tag T.Text] -> Maybe Resume parseResume item = do - name <- getTagText "<h2 class=title>" item - let price = getTagText "<div class=price>" item - url <- getTagAttribute "<a>" "href" item - return Resume { name = name, price = price, url = url } + name <- getTagTextAfter "<h2 class=title>" item + let price = getTagTextAfter "<div class=price>" item + url <- getTagAttribute "<a>" (T.pack "href") item + return Resume { name = name, price = price, url = T.unpack url } diff --git a/src/Parser/Utils.hs b/src/Parser/Utils.hs index 4864e00..8527777 100644 --- a/src/Parser/Utils.hs +++ b/src/Parser/Utils.hs @@ -1,30 +1,35 @@ module Parser.Utils - ( getTagAttribute - , getTagText + ( getTagsBetween + , getTagAttribute + , getTagTextAfter ) where import Data.List (find, findIndex) import Data.Maybe (listToMaybe) +import qualified Data.Text as T import Text.HTML.TagSoup -getTagAttribute :: String -> String -> [Tag String] -> Maybe String -getTagAttribute selector attribute item = - find (~== selector) item >>= maybeTagAttribute attribute +getTagsBetween :: String -> String -> [Tag T.Text] -> [Tag T.Text] +getTagsBetween beginSelector endSelector = + takeWhile (~/= endSelector) + . drop 1 + . dropWhile (~/= beginSelector) -getTagText :: String -> [Tag String] -> Maybe String -getTagText selector item = - case findIndex (~== selector) item of - Just index -> fmap trim $ safeGetAt (index + 1) item >>= maybeTagText +getTagAttribute :: String -> T.Text -> [Tag T.Text] -> Maybe T.Text +getTagAttribute selector attribute tags = + find (~== selector) tags >>= maybeTagAttribute attribute + +getTagTextAfter :: String -> [Tag T.Text] -> Maybe T.Text +getTagTextAfter selector tags = + case findIndex (~== selector) tags of + Just index -> fmap T.strip $ safeGetAt (index + 1) tags >>= maybeTagText Nothing -> Nothing -maybeTagAttribute :: String -> Tag String -> Maybe String +maybeTagAttribute :: T.Text -> Tag T.Text -> Maybe T.Text maybeTagAttribute name (TagOpen _ xs) = fmap snd . find (\(x, _) -> x == name) $ xs maybeTagAttribute attribute _ = Nothing -trim :: String -> String -trim = unwords . words - safeGetAt :: Int -> [a] -> Maybe a safeGetAt index = listToMaybe . drop index |