aboutsummaryrefslogtreecommitdiff
path: root/src/PerfumeParser.hs
blob: 86416380a3a65379cdcfa446fd9f0c059ecd818a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
{-# LANGUAGE OverloadedStrings #-}

module PerfumeParser
  ( parsePerfumes
  ) where

import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup
import Data.List (find)

import Model.URL
import Model.Perfume

parsePerfumes :: Text -> [Perfume]
parsePerfumes page = getPerfumes . getTagsInside "tbody" . keepOnlyOne "tbody" $ parseTags page

keepOnlyOne :: String -> [Tag Text] -> [Tag Text]
keepOnlyOne tagName tags =
  let count = length . filter (~== ("<" ++ tagName ++ ">")) $ tags
  in  if count > 1
        then
          keepOnlyOne tagName (drop 1 . dropWhile (~/= ("<" ++ tagName ++ ">")) $ tags)
        else
          tags

getTagsInside :: String -> [Tag Text] -> [Tag Text]
getTagsInside selector =
  takeWhile (~/= ("</" ++ selector ++ ">"))
  . dropWhile (~/= ("<" ++ selector ++ ">"))

getPerfumes :: [Tag Text] -> [Perfume]
getPerfumes (TagOpen "a" attributes : TagText name : xs) =
  case find ((==) "href" . fst) attributes of
    Just (_, url) -> Perfume name (T.concat [site, "/", url]) : getPerfumes xs
    Nothing -> getPerfumes xs
getPerfumes (_:xs) = getPerfumes xs
getPerfumes [] = []