diff options
author | Joris Guyonvarch | 2015-04-18 21:59:22 +0200 |
---|---|---|
committer | Joris Guyonvarch | 2015-04-18 21:59:22 +0200 |
commit | 389d979eb3eaa18beb8a6da9f4a03bdb6acc1722 (patch) | |
tree | 9d475b868ad13162c0ecba42b2138058c2e2e68b /src/Parser | |
parent | 86f9991deeb44a6cc81044e61a9ad3ee001c5ced (diff) |
Parsing utag_list from a detail page and showing the keys that are given in the configuration file
Diffstat (limited to 'src/Parser')
-rw-r--r-- | src/Parser/Detail.hs | 40 | ||||
-rw-r--r-- | src/Parser/Utils.hs | 15 |
2 files changed, 47 insertions, 8 deletions
diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs index 3a91ac2..3f424e9 100644 --- a/src/Parser/Detail.hs +++ b/src/Parser/Detail.hs @@ -5,6 +5,11 @@ module Parser.Detail import Data.Text (Text) import qualified Data.Text as T +import Data.Map (Map) +import qualified Data.Map as M + +import Data.Maybe (catMaybes, fromMaybe) + import Text.HTML.TagSoup import Model.Detail @@ -14,9 +19,11 @@ import Parser.Utils parseDetail :: Text -> Detail parseDetail page = let tags = parseTags page - description = parseDescription tags - images = getTagAttributes "<meta itemprop=image>" (T.pack "content") tags - in Detail { description = description, images = images } + in Detail + { description = parseDescription tags + , images = getTagAttributes "<meta itemprop=image>" (T.pack "content") tags + , properties = parseProperties tags + } parseDescription :: [Tag Text] -> Maybe Text parseDescription tags = @@ -27,3 +34,30 @@ parseDescription tags = else let replaceBr = map (\tag -> if tag ~== "<br>" then TagText (T.pack "\n") else tag) in Just . T.strip . renderTags . replaceBr $ descriptionTags + +parseProperties :: [Tag Text] -> Map Text Text +parseProperties tags = + let mbUtagData = getTagTextAfter "<script>" . getTagsAfter "<body>" $ tags + in fromMaybe M.empty (fmap parseUtagData mbUtagData) + +parseUtagData :: Text -> Map Text Text +parseUtagData = + M.fromList + . catMaybes + . fmap parseUtag + . T.splitOn (T.pack ",") + . T.takeWhile (/= '}') + . T.drop 1 + . T.dropWhile (/= '{') + +parseUtag :: Text -> Maybe (Text, Text) +parseUtag utag = + case T.splitOn (T.pack ":") utag of + [x, y] -> Just (T.strip x, removeQuotes y) + _ -> Nothing + +removeQuotes :: Text -> Text +removeQuotes = + T.takeWhile (/= '\"') + . T.dropWhile (== '\"') + . T.strip diff --git a/src/Parser/Utils.hs b/src/Parser/Utils.hs index c03ab03..d72a1ce 100644 --- a/src/Parser/Utils.hs +++ b/src/Parser/Utils.hs @@ -1,5 +1,7 @@ module Parser.Utils - ( getTagsBetween + ( getTagsBefore + , getTagsAfter + , getTagsBetween , getTagAttributes , getTagAttribute , getTagTextAfter @@ -12,11 +14,14 @@ import qualified Data.Text as T import Text.HTML.TagSoup +getTagsBefore :: String -> [Tag Text] -> [Tag Text] +getTagsBefore selector = takeWhile (~/= selector) + +getTagsAfter :: String -> [Tag Text] -> [Tag Text] +getTagsAfter selector = drop 1 . dropWhile (~/= selector) + getTagsBetween :: String -> String -> [Tag Text] -> [Tag Text] -getTagsBetween beginSelector endSelector = - takeWhile (~/= endSelector) - . drop 1 - . dropWhile (~/= beginSelector) +getTagsBetween begin end = getTagsBefore end . getTagsAfter begin getTagAttributes :: String -> Text -> [Tag Text] -> [Text] getTagAttributes selector attribute = |