aboutsummaryrefslogtreecommitdiff
path: root/src/Parser/Detail.hs
blob: 2418d0750b368f9c4d598912b201761f83ca062f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
module Parser.Detail
  ( parse
  ) where

import Data.Text (Text)
import qualified Data.Text as T

import Data.Map (Map)
import qualified Data.Map as M

import Data.Maybe (catMaybes, fromMaybe)

import Text.HTML.TagSoup

import Model.Detail

import Parser.Utils

parse :: Text -> Detail
parse page =
  let tags = parseTags page
  in  Detail
        { description = parseDescription tags
        , images = map (\url -> T.concat [T.pack "https:", url]) $ getTagAttributes "<meta itemprop=image>" (T.pack "content") tags
        , properties = parseProperties tags
        }

parseDescription :: [Tag Text] -> Maybe Text
parseDescription tags =
  let descriptionTags = getTagsBetween "<p itemprop=description>" "</p>" tags
  in  if null descriptionTags
        then
          Nothing
        else
          let replaceBr = map (\tag -> if tag ~== "<br>" then TagText (T.pack "\n") else tag)
          in  Just . T.strip . renderTags . replaceBr $ descriptionTags

parseProperties :: [Tag Text] -> Map Text Text
parseProperties tags =
  let mbUtagData = getTagTextAfter "<script>" . getTagsAfter "</script>" . getTagsAfter "<body>" $ tags
  in  fromMaybe M.empty (fmap parseUtagData mbUtagData)

parseUtagData :: Text -> Map Text Text
parseUtagData =
  M.fromList
  . catMaybes
  . fmap parseUtag
  . T.splitOn (T.pack ",")
  . T.takeWhile (/= '}')
  . T.drop 1
  . T.dropWhile (/= '{')

parseUtag :: Text -> Maybe (Text, Text)
parseUtag utag =
  case T.splitOn (T.pack ":") utag of
    [x, y] -> Just (T.strip x, removeQuotes y)
    _ -> Nothing

removeQuotes :: Text -> Text
removeQuotes =
  T.takeWhile (/= '\"')
  . T.dropWhile (== '\"')
  . T.strip