aboutsummaryrefslogtreecommitdiff
path: root/src/Parser
diff options
context:
space:
mode:
Diffstat (limited to 'src/Parser')
-rw-r--r--src/Parser/Detail.hs40
-rw-r--r--src/Parser/Utils.hs15
2 files changed, 47 insertions, 8 deletions
diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs
index 3a91ac2..3f424e9 100644
--- a/src/Parser/Detail.hs
+++ b/src/Parser/Detail.hs
@@ -5,6 +5,11 @@ module Parser.Detail
import Data.Text (Text)
import qualified Data.Text as T
+import Data.Map (Map)
+import qualified Data.Map as M
+
+import Data.Maybe (catMaybes, fromMaybe)
+
import Text.HTML.TagSoup
import Model.Detail
@@ -14,9 +19,11 @@ import Parser.Utils
parseDetail :: Text -> Detail
parseDetail page =
let tags = parseTags page
- description = parseDescription tags
- images = getTagAttributes "<meta itemprop=image>" (T.pack "content") tags
- in Detail { description = description, images = images }
+ in Detail
+ { description = parseDescription tags
+ , images = getTagAttributes "<meta itemprop=image>" (T.pack "content") tags
+ , properties = parseProperties tags
+ }
parseDescription :: [Tag Text] -> Maybe Text
parseDescription tags =
@@ -27,3 +34,30 @@ parseDescription tags =
else
let replaceBr = map (\tag -> if tag ~== "<br>" then TagText (T.pack "\n") else tag)
in Just . T.strip . renderTags . replaceBr $ descriptionTags
+
+parseProperties :: [Tag Text] -> Map Text Text
+parseProperties tags =
+ let mbUtagData = getTagTextAfter "<script>" . getTagsAfter "<body>" $ tags
+ in fromMaybe M.empty (fmap parseUtagData mbUtagData)
+
+parseUtagData :: Text -> Map Text Text
+parseUtagData =
+ M.fromList
+ . catMaybes
+ . fmap parseUtag
+ . T.splitOn (T.pack ",")
+ . T.takeWhile (/= '}')
+ . T.drop 1
+ . T.dropWhile (/= '{')
+
+parseUtag :: Text -> Maybe (Text, Text)
+parseUtag utag =
+ case T.splitOn (T.pack ":") utag of
+ [x, y] -> Just (T.strip x, removeQuotes y)
+ _ -> Nothing
+
+removeQuotes :: Text -> Text
+removeQuotes =
+ T.takeWhile (/= '\"')
+ . T.dropWhile (== '\"')
+ . T.strip
diff --git a/src/Parser/Utils.hs b/src/Parser/Utils.hs
index c03ab03..d72a1ce 100644
--- a/src/Parser/Utils.hs
+++ b/src/Parser/Utils.hs
@@ -1,5 +1,7 @@
module Parser.Utils
- ( getTagsBetween
+ ( getTagsBefore
+ , getTagsAfter
+ , getTagsBetween
, getTagAttributes
, getTagAttribute
, getTagTextAfter
@@ -12,11 +14,14 @@ import qualified Data.Text as T
import Text.HTML.TagSoup
+getTagsBefore :: String -> [Tag Text] -> [Tag Text]
+getTagsBefore selector = takeWhile (~/= selector)
+
+getTagsAfter :: String -> [Tag Text] -> [Tag Text]
+getTagsAfter selector = drop 1 . dropWhile (~/= selector)
+
getTagsBetween :: String -> String -> [Tag Text] -> [Tag Text]
-getTagsBetween beginSelector endSelector =
- takeWhile (~/= endSelector)
- . drop 1
- . dropWhile (~/= beginSelector)
+getTagsBetween begin end = getTagsBefore end . getTagsAfter begin
getTagAttributes :: String -> Text -> [Tag Text] -> [Text]
getTagAttributes selector attribute =