aboutsummaryrefslogtreecommitdiff
path: root/src/Parser/Detail.hs
diff options
context:
space:
mode:
authorJoris2018-06-17 23:24:47 +0200
committerJoris2018-06-18 11:13:55 +0200
commit0a4d3c8f12dc5797a919a00b6bcaf759947687cc (patch)
treebcb89781e22c2314bf0c064ebb37cb7f8a362f5c /src/Parser/Detail.hs
parente2a5c7c5c596d057b6fa9c08a8204ce1429cfdc4 (diff)
downloadad-listener-0a4d3c8f12dc5797a919a00b6bcaf759947687cc.tar.gz
ad-listener-0a4d3c8f12dc5797a919a00b6bcaf759947687cc.tar.bz2
ad-listener-0a4d3c8f12dc5797a919a00b6bcaf759947687cc.zip
Add ouest france parser
Diffstat (limited to 'src/Parser/Detail.hs')
-rw-r--r--src/Parser/Detail.hs63
1 files changed, 0 insertions, 63 deletions
diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs
deleted file mode 100644
index 2418d07..0000000
--- a/src/Parser/Detail.hs
+++ /dev/null
@@ -1,63 +0,0 @@
-module Parser.Detail
- ( parse
- ) where
-
-import Data.Text (Text)
-import qualified Data.Text as T
-
-import Data.Map (Map)
-import qualified Data.Map as M
-
-import Data.Maybe (catMaybes, fromMaybe)
-
-import Text.HTML.TagSoup
-
-import Model.Detail
-
-import Parser.Utils
-
-parse :: Text -> Detail
-parse page =
- let tags = parseTags page
- in Detail
- { description = parseDescription tags
- , images = map (\url -> T.concat [T.pack "https:", url]) $ getTagAttributes "<meta itemprop=image>" (T.pack "content") tags
- , properties = parseProperties tags
- }
-
-parseDescription :: [Tag Text] -> Maybe Text
-parseDescription tags =
- let descriptionTags = getTagsBetween "<p itemprop=description>" "</p>" tags
- in if null descriptionTags
- then
- Nothing
- else
- let replaceBr = map (\tag -> if tag ~== "<br>" then TagText (T.pack "\n") else tag)
- in Just . T.strip . renderTags . replaceBr $ descriptionTags
-
-parseProperties :: [Tag Text] -> Map Text Text
-parseProperties tags =
- let mbUtagData = getTagTextAfter "<script>" . getTagsAfter "</script>" . getTagsAfter "<body>" $ tags
- in fromMaybe M.empty (fmap parseUtagData mbUtagData)
-
-parseUtagData :: Text -> Map Text Text
-parseUtagData =
- M.fromList
- . catMaybes
- . fmap parseUtag
- . T.splitOn (T.pack ",")
- . T.takeWhile (/= '}')
- . T.drop 1
- . T.dropWhile (/= '{')
-
-parseUtag :: Text -> Maybe (Text, Text)
-parseUtag utag =
- case T.splitOn (T.pack ":") utag of
- [x, y] -> Just (T.strip x, removeQuotes y)
- _ -> Nothing
-
-removeQuotes :: Text -> Text
-removeQuotes =
- T.takeWhile (/= '\"')
- . T.dropWhile (== '\"')
- . T.strip