From 0a4d3c8f12dc5797a919a00b6bcaf759947687cc Mon Sep 17 00:00:00 2001
From: Joris
Date: Sun, 17 Jun 2018 23:24:47 +0200
Subject: Add ouest france parser
---
src/Parser/Detail.hs | 63 ----------------------------------------------------
src/Parser/Resume.hs | 31 --------------------------
src/Parser/Utils.hs | 59 ------------------------------------------------
3 files changed, 153 deletions(-)
delete mode 100644 src/Parser/Detail.hs
delete mode 100644 src/Parser/Resume.hs
delete mode 100644 src/Parser/Utils.hs
(limited to 'src/Parser')
diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs
deleted file mode 100644
index 2418d07..0000000
--- a/src/Parser/Detail.hs
+++ /dev/null
@@ -1,63 +0,0 @@
-module Parser.Detail
- ( parse
- ) where
-
-import Data.Text (Text)
-import qualified Data.Text as T
-
-import Data.Map (Map)
-import qualified Data.Map as M
-
-import Data.Maybe (catMaybes, fromMaybe)
-
-import Text.HTML.TagSoup
-
-import Model.Detail
-
-import Parser.Utils
-
-parse :: Text -> Detail
-parse page =
- let tags = parseTags page
- in Detail
- { description = parseDescription tags
- , images = map (\url -> T.concat [T.pack "https:", url]) $ getTagAttributes "" (T.pack "content") tags
- , properties = parseProperties tags
- }
-
-parseDescription :: [Tag Text] -> Maybe Text
-parseDescription tags =
- let descriptionTags = getTagsBetween "
" "
" tags
- in if null descriptionTags
- then
- Nothing
- else
- let replaceBr = map (\tag -> if tag ~== "
" then TagText (T.pack "\n") else tag)
- in Just . T.strip . renderTags . replaceBr $ descriptionTags
-
-parseProperties :: [Tag Text] -> Map Text Text
-parseProperties tags =
- let mbUtagData = getTagTextAfter "" . getTagsAfter "" $ tags
- in fromMaybe M.empty (fmap parseUtagData mbUtagData)
-
-parseUtagData :: Text -> Map Text Text
-parseUtagData =
- M.fromList
- . catMaybes
- . fmap parseUtag
- . T.splitOn (T.pack ",")
- . T.takeWhile (/= '}')
- . T.drop 1
- . T.dropWhile (/= '{')
-
-parseUtag :: Text -> Maybe (Text, Text)
-parseUtag utag =
- case T.splitOn (T.pack ":") utag of
- [x, y] -> Just (T.strip x, removeQuotes y)
- _ -> Nothing
-
-removeQuotes :: Text -> Text
-removeQuotes =
- T.takeWhile (/= '\"')
- . T.dropWhile (== '\"')
- . T.strip
diff --git a/src/Parser/Resume.hs b/src/Parser/Resume.hs
deleted file mode 100644
index 8940be7..0000000
--- a/src/Parser/Resume.hs
+++ /dev/null
@@ -1,31 +0,0 @@
-module Parser.Resume
- ( parse
- ) where
-
-import Data.Maybe (catMaybes, isJust)
-import Data.List (find)
-import Data.Text (Text)
-import qualified Data.Text as T
-
-import Text.HTML.TagSoup
-
-import Model.Resume (Resume(Resume))
-
-import Parser.Utils
-
-parse :: Text -> [Resume]
-parse page =
- case dropWhile (not . hasClass (T.pack "section") (T.pack "tabsContent")) (parseTags page) of
- [] ->
- []
- sectionTags ->
- let lbcTags = takeWhile (not . hasClass (T.pack "div") (T.pack "information-immo")) sectionTags
- in catMaybes . fmap parseResume $ partitions (~== "") lbcTags
-
-parseResume :: [Tag Text] -> Maybe Resume
-parseResume item = do
- name <- getTagTextAfter "