aboutsummaryrefslogtreecommitdiff
path: root/src/Parser
diff options
context:
space:
mode:
authorJoris2016-07-14 11:57:12 +0000
committerJoris2016-07-14 12:00:05 +0000
commit69e69017b75d1cdaa1fd2aef2818de5111b29735 (patch)
tree99dba8f67dc1c55b2cc22f33f81c59c7355b337b /src/Parser
parent04f9a66c66ca137d9fee6ccca228c41fec960fe0 (diff)
Update code and fix parsers
Diffstat (limited to 'src/Parser')
-rw-r--r--src/Parser/Detail.hs12
-rw-r--r--src/Parser/Resume.hs20
-rw-r--r--src/Parser/Utils.hs14
3 files changed, 28 insertions, 18 deletions
diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs
index 3f424e9..2418d07 100644
--- a/src/Parser/Detail.hs
+++ b/src/Parser/Detail.hs
@@ -1,5 +1,5 @@
module Parser.Detail
- ( parseDetail
+ ( parse
) where
import Data.Text (Text)
@@ -16,18 +16,18 @@ import Model.Detail
import Parser.Utils
-parseDetail :: Text -> Detail
-parseDetail page =
+parse :: Text -> Detail
+parse page =
let tags = parseTags page
in Detail
{ description = parseDescription tags
- , images = getTagAttributes "<meta itemprop=image>" (T.pack "content") tags
+ , images = map (\url -> T.concat [T.pack "https:", url]) $ getTagAttributes "<meta itemprop=image>" (T.pack "content") tags
, properties = parseProperties tags
}
parseDescription :: [Tag Text] -> Maybe Text
parseDescription tags =
- let descriptionTags = getTagsBetween "<div itemprop=description>" "</div>" tags
+ let descriptionTags = getTagsBetween "<p itemprop=description>" "</p>" tags
in if null descriptionTags
then
Nothing
@@ -37,7 +37,7 @@ parseDescription tags =
parseProperties :: [Tag Text] -> Map Text Text
parseProperties tags =
- let mbUtagData = getTagTextAfter "<script>" . getTagsAfter "<body>" $ tags
+ let mbUtagData = getTagTextAfter "<script>" . getTagsAfter "</script>" . getTagsAfter "<body>" $ tags
in fromMaybe M.empty (fmap parseUtagData mbUtagData)
parseUtagData :: Text -> Map Text Text
diff --git a/src/Parser/Resume.hs b/src/Parser/Resume.hs
index 76faca4..f300ec3 100644
--- a/src/Parser/Resume.hs
+++ b/src/Parser/Resume.hs
@@ -1,5 +1,5 @@
module Parser.Resume
- ( parseResumes
+ ( parse
) where
import Data.Maybe (catMaybes)
@@ -8,22 +8,22 @@ import qualified Data.Text as T
import Text.HTML.TagSoup
-import Model.Resume
+import Model.Resume (Resume(Resume))
import Parser.Utils
-parseResumes :: Text -> [Resume]
-parseResumes page =
- case sections (~== "<div class=list-lbc>") (parseTags page) of
+parse :: Text -> [Resume]
+parse page =
+ case dropWhile (not . hasClass (T.pack "section") (T.pack "tabsContent")) (parseTags page) of
[] ->
[]
- sectionTags : _ ->
- let lbcTags = takeWhile (~/= "<div id=alertesCartouche>") sectionTags
+ sectionTags ->
+ let lbcTags = takeWhile (not . hasClass (T.pack "div") (T.pack "information-immo")) sectionTags
in catMaybes . fmap parseResume $ partitions (~== "<a>") lbcTags
parseResume :: [Tag Text] -> Maybe Resume
parseResume item = do
- name <- getTagTextAfter "<h2 class=title>" item
- let price = getTagTextAfter "<div class=price>" item
+ name <- getTagTextAfter "<h2 class=item_title>" item
+ let price = getTagTextAfter "<h3 class=item_price>" item
url <- getTagAttribute "<a>" (T.pack "href") item
- return Resume { name = name, price = price, url = url }
+ return (Resume name price (T.concat [T.pack "https:", url]))
diff --git a/src/Parser/Utils.hs b/src/Parser/Utils.hs
index d72a1ce..98694bb 100644
--- a/src/Parser/Utils.hs
+++ b/src/Parser/Utils.hs
@@ -5,14 +5,16 @@ module Parser.Utils
, getTagAttributes
, getTagAttribute
, getTagTextAfter
+ , hasClass
) where
import Data.List (find, findIndex)
-import Data.Maybe (listToMaybe, catMaybes)
+import Data.Maybe (listToMaybe, catMaybes, isJust)
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup
+import Text.HTML.TagSoup.Match (tagOpen)
getTagsBefore :: String -> [Tag Text] -> [Tag Text]
getTagsBefore selector = takeWhile (~/= selector)
@@ -43,7 +45,15 @@ getTagTextAfter selector tags =
maybeTagAttribute :: Text -> Tag Text -> Maybe Text
maybeTagAttribute name (TagOpen _ xs) =
fmap snd . find (\(x, _) -> x == name) $ xs
-maybeTagAttribute attribute _ = Nothing
+maybeTagAttribute _ _ = Nothing
safeGetAt :: Int -> [a] -> Maybe a
safeGetAt index = listToMaybe . drop index
+
+hasClass :: Text -> Text -> Tag Text -> Bool
+hasClass selector className =
+ tagOpen ((==) selector) (isJust . find matchClass)
+ where matchClass (name, values) =
+ ( name == (T.pack "class")
+ && (isJust . find ((==) className) . T.words $ values)
+ )