aboutsummaryrefslogtreecommitdiff
path: root/src/Parser/Utils.hs
blob: 98694bb350e95099f56b567ad6a212ede639f023 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
module Parser.Utils
  ( getTagsBefore
  , getTagsAfter
  , getTagsBetween
  , getTagAttributes
  , getTagAttribute
  , getTagTextAfter
  , hasClass
  ) where

import Data.List (find, findIndex)
import Data.Maybe (listToMaybe, catMaybes, isJust)
import Data.Text (Text)
import qualified Data.Text as T

import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match (tagOpen)

getTagsBefore :: String -> [Tag Text] -> [Tag Text]
getTagsBefore selector = takeWhile (~/= selector)

getTagsAfter :: String -> [Tag Text] -> [Tag Text]
getTagsAfter selector = drop 1 . dropWhile (~/= selector)

getTagsBetween :: String -> String -> [Tag Text] -> [Tag Text]
getTagsBetween begin end = getTagsBefore end . getTagsAfter begin

getTagAttributes :: String -> Text -> [Tag Text] -> [Text]
getTagAttributes selector attribute =
  catMaybes
  . fmap (maybeTagAttribute attribute)
  . filter (~== selector)

getTagAttribute :: String -> Text -> [Tag Text] -> Maybe Text
getTagAttribute selector attribute =
  listToMaybe
  . getTagAttributes selector attribute

getTagTextAfter :: String -> [Tag Text] -> Maybe Text
getTagTextAfter selector tags =
  case findIndex (~== selector) tags of
    Just index -> fmap T.strip $ safeGetAt (index + 1) tags >>= maybeTagText
    Nothing -> Nothing

maybeTagAttribute :: Text -> Tag Text -> Maybe Text
maybeTagAttribute name (TagOpen _ xs) =
  fmap snd . find (\(x, _) -> x == name) $ xs
maybeTagAttribute _ _ = Nothing

safeGetAt :: Int -> [a] -> Maybe a
safeGetAt index = listToMaybe . drop index

hasClass :: Text -> Text -> Tag Text -> Bool
hasClass selector className =
  tagOpen ((==) selector) (isJust . find matchClass)
  where matchClass (name, values) =
          (  name == (T.pack "class")
          && (isJust . find ((==) className) . T.words $ values)
          )