aboutsummaryrefslogtreecommitdiff
path: root/src/Parser
diff options
context:
space:
mode:
Diffstat (limited to 'src/Parser')
-rw-r--r--src/Parser/Detail.hs13
-rw-r--r--src/Parser/Resume.hs13
-rw-r--r--src/Parser/Utils.hs31
3 files changed, 36 insertions, 21 deletions
diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs
index ab1b0ca..b787772 100644
--- a/src/Parser/Detail.hs
+++ b/src/Parser/Detail.hs
@@ -2,14 +2,23 @@ module Parser.Detail
( parseDetail
) where
+import qualified Data.Text as T
+
import Text.HTML.TagSoup
import Model.Detail
import Parser.Utils
-parseDetail :: String -> Detail
+parseDetail :: T.Text -> Detail
parseDetail page =
let tags = parseTags page
- description = getTagText "<div class=content>" tags
+ descriptionTags = getTagsBetween "<div itemprop=description>" "</div>" tags
+ description =
+ if null descriptionTags
+ then
+ Nothing
+ else
+ let replaceBr = map (\tag -> if tag ~== "<br>" then TagText (T.pack "\n") else tag)
+ in Just . T.strip . renderTags . replaceBr $ descriptionTags
in Detail { description = description }
diff --git a/src/Parser/Resume.hs b/src/Parser/Resume.hs
index bd73912..6cd4415 100644
--- a/src/Parser/Resume.hs
+++ b/src/Parser/Resume.hs
@@ -3,6 +3,7 @@ module Parser.Resume
) where
import Data.Maybe (catMaybes)
+import qualified Data.Text as T
import Text.HTML.TagSoup
@@ -10,7 +11,7 @@ import Model.Resume
import Parser.Utils
-parseResumes :: String -> [Resume]
+parseResumes :: T.Text -> [Resume]
parseResumes page =
case sections (~== "<div class=list-lbc>") (parseTags page) of
[] ->
@@ -19,9 +20,9 @@ parseResumes page =
let lbcTags = takeWhile (~/= "<div id=alertesCartouche>") sectionTags
in catMaybes . fmap parseResume $ partitions (~== "<a>") lbcTags
-parseResume :: [Tag String] -> Maybe Resume
+parseResume :: [Tag T.Text] -> Maybe Resume
parseResume item = do
- name <- getTagText "<h2 class=title>" item
- let price = getTagText "<div class=price>" item
- url <- getTagAttribute "<a>" "href" item
- return Resume { name = name, price = price, url = url }
+ name <- getTagTextAfter "<h2 class=title>" item
+ let price = getTagTextAfter "<div class=price>" item
+ url <- getTagAttribute "<a>" (T.pack "href") item
+ return Resume { name = name, price = price, url = T.unpack url }
diff --git a/src/Parser/Utils.hs b/src/Parser/Utils.hs
index 4864e00..8527777 100644
--- a/src/Parser/Utils.hs
+++ b/src/Parser/Utils.hs
@@ -1,30 +1,35 @@
module Parser.Utils
- ( getTagAttribute
- , getTagText
+ ( getTagsBetween
+ , getTagAttribute
+ , getTagTextAfter
) where
import Data.List (find, findIndex)
import Data.Maybe (listToMaybe)
+import qualified Data.Text as T
import Text.HTML.TagSoup
-getTagAttribute :: String -> String -> [Tag String] -> Maybe String
-getTagAttribute selector attribute item =
- find (~== selector) item >>= maybeTagAttribute attribute
+getTagsBetween :: String -> String -> [Tag T.Text] -> [Tag T.Text]
+getTagsBetween beginSelector endSelector =
+ takeWhile (~/= endSelector)
+ . drop 1
+ . dropWhile (~/= beginSelector)
-getTagText :: String -> [Tag String] -> Maybe String
-getTagText selector item =
- case findIndex (~== selector) item of
- Just index -> fmap trim $ safeGetAt (index + 1) item >>= maybeTagText
+getTagAttribute :: String -> T.Text -> [Tag T.Text] -> Maybe T.Text
+getTagAttribute selector attribute tags =
+ find (~== selector) tags >>= maybeTagAttribute attribute
+
+getTagTextAfter :: String -> [Tag T.Text] -> Maybe T.Text
+getTagTextAfter selector tags =
+ case findIndex (~== selector) tags of
+ Just index -> fmap T.strip $ safeGetAt (index + 1) tags >>= maybeTagText
Nothing -> Nothing
-maybeTagAttribute :: String -> Tag String -> Maybe String
+maybeTagAttribute :: T.Text -> Tag T.Text -> Maybe T.Text
maybeTagAttribute name (TagOpen _ xs) =
fmap snd . find (\(x, _) -> x == name) $ xs
maybeTagAttribute attribute _ = Nothing
-trim :: String -> String
-trim = unwords . words
-
safeGetAt :: Int -> [a] -> Maybe a
safeGetAt index = listToMaybe . drop index