aboutsummaryrefslogtreecommitdiff
path: root/src/Parser
diff options
context:
space:
mode:
authorJoris2018-06-17 23:24:47 +0200
committerJoris2018-06-18 11:13:55 +0200
commit0a4d3c8f12dc5797a919a00b6bcaf759947687cc (patch)
treebcb89781e22c2314bf0c064ebb37cb7f8a362f5c /src/Parser
parente2a5c7c5c596d057b6fa9c08a8204ce1429cfdc4 (diff)
Add ouest france parser
Diffstat (limited to 'src/Parser')
-rw-r--r--src/Parser/Detail.hs63
-rw-r--r--src/Parser/Resume.hs31
-rw-r--r--src/Parser/Utils.hs59
3 files changed, 0 insertions, 153 deletions
diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs
deleted file mode 100644
index 2418d07..0000000
--- a/src/Parser/Detail.hs
+++ /dev/null
@@ -1,63 +0,0 @@
-module Parser.Detail
- ( parse
- ) where
-
-import Data.Text (Text)
-import qualified Data.Text as T
-
-import Data.Map (Map)
-import qualified Data.Map as M
-
-import Data.Maybe (catMaybes, fromMaybe)
-
-import Text.HTML.TagSoup
-
-import Model.Detail
-
-import Parser.Utils
-
-parse :: Text -> Detail
-parse page =
- let tags = parseTags page
- in Detail
- { description = parseDescription tags
- , images = map (\url -> T.concat [T.pack "https:", url]) $ getTagAttributes "<meta itemprop=image>" (T.pack "content") tags
- , properties = parseProperties tags
- }
-
-parseDescription :: [Tag Text] -> Maybe Text
-parseDescription tags =
- let descriptionTags = getTagsBetween "<p itemprop=description>" "</p>" tags
- in if null descriptionTags
- then
- Nothing
- else
- let replaceBr = map (\tag -> if tag ~== "<br>" then TagText (T.pack "\n") else tag)
- in Just . T.strip . renderTags . replaceBr $ descriptionTags
-
-parseProperties :: [Tag Text] -> Map Text Text
-parseProperties tags =
- let mbUtagData = getTagTextAfter "<script>" . getTagsAfter "</script>" . getTagsAfter "<body>" $ tags
- in fromMaybe M.empty (fmap parseUtagData mbUtagData)
-
-parseUtagData :: Text -> Map Text Text
-parseUtagData =
- M.fromList
- . catMaybes
- . fmap parseUtag
- . T.splitOn (T.pack ",")
- . T.takeWhile (/= '}')
- . T.drop 1
- . T.dropWhile (/= '{')
-
-parseUtag :: Text -> Maybe (Text, Text)
-parseUtag utag =
- case T.splitOn (T.pack ":") utag of
- [x, y] -> Just (T.strip x, removeQuotes y)
- _ -> Nothing
-
-removeQuotes :: Text -> Text
-removeQuotes =
- T.takeWhile (/= '\"')
- . T.dropWhile (== '\"')
- . T.strip
diff --git a/src/Parser/Resume.hs b/src/Parser/Resume.hs
deleted file mode 100644
index 8940be7..0000000
--- a/src/Parser/Resume.hs
+++ /dev/null
@@ -1,31 +0,0 @@
-module Parser.Resume
- ( parse
- ) where
-
-import Data.Maybe (catMaybes, isJust)
-import Data.List (find)
-import Data.Text (Text)
-import qualified Data.Text as T
-
-import Text.HTML.TagSoup
-
-import Model.Resume (Resume(Resume))
-
-import Parser.Utils
-
-parse :: Text -> [Resume]
-parse page =
- case dropWhile (not . hasClass (T.pack "section") (T.pack "tabsContent")) (parseTags page) of
- [] ->
- []
- sectionTags ->
- let lbcTags = takeWhile (not . hasClass (T.pack "div") (T.pack "information-immo")) sectionTags
- in catMaybes . fmap parseResume $ partitions (~== "<a>") lbcTags
-
-parseResume :: [Tag Text] -> Maybe Resume
-parseResume item = do
- name <- getTagTextAfter "<h2 class=item_title>" item
- let price = getTagTextAfter "<h3 class=item_price>" item
- url <- getTagAttribute "<a>" (T.pack "href") item
- let isPro = isJust . find (~== "<span class=ispro>") $ item
- return (Resume name price (T.concat [T.pack "https:", url]) isPro)
diff --git a/src/Parser/Utils.hs b/src/Parser/Utils.hs
deleted file mode 100644
index 98694bb..0000000
--- a/src/Parser/Utils.hs
+++ /dev/null
@@ -1,59 +0,0 @@
-module Parser.Utils
- ( getTagsBefore
- , getTagsAfter
- , getTagsBetween
- , getTagAttributes
- , getTagAttribute
- , getTagTextAfter
- , hasClass
- ) where
-
-import Data.List (find, findIndex)
-import Data.Maybe (listToMaybe, catMaybes, isJust)
-import Data.Text (Text)
-import qualified Data.Text as T
-
-import Text.HTML.TagSoup
-import Text.HTML.TagSoup.Match (tagOpen)
-
-getTagsBefore :: String -> [Tag Text] -> [Tag Text]
-getTagsBefore selector = takeWhile (~/= selector)
-
-getTagsAfter :: String -> [Tag Text] -> [Tag Text]
-getTagsAfter selector = drop 1 . dropWhile (~/= selector)
-
-getTagsBetween :: String -> String -> [Tag Text] -> [Tag Text]
-getTagsBetween begin end = getTagsBefore end . getTagsAfter begin
-
-getTagAttributes :: String -> Text -> [Tag Text] -> [Text]
-getTagAttributes selector attribute =
- catMaybes
- . fmap (maybeTagAttribute attribute)
- . filter (~== selector)
-
-getTagAttribute :: String -> Text -> [Tag Text] -> Maybe Text
-getTagAttribute selector attribute =
- listToMaybe
- . getTagAttributes selector attribute
-
-getTagTextAfter :: String -> [Tag Text] -> Maybe Text
-getTagTextAfter selector tags =
- case findIndex (~== selector) tags of
- Just index -> fmap T.strip $ safeGetAt (index + 1) tags >>= maybeTagText
- Nothing -> Nothing
-
-maybeTagAttribute :: Text -> Tag Text -> Maybe Text
-maybeTagAttribute name (TagOpen _ xs) =
- fmap snd . find (\(x, _) -> x == name) $ xs
-maybeTagAttribute _ _ = Nothing
-
-safeGetAt :: Int -> [a] -> Maybe a
-safeGetAt index = listToMaybe . drop index
-
-hasClass :: Text -> Text -> Tag Text -> Bool
-hasClass selector className =
- tagOpen ((==) selector) (isJust . find matchClass)
- where matchClass (name, values) =
- ( name == (T.pack "class")
- && (isJust . find ((==) className) . T.words $ values)
- )