aboutsummaryrefslogtreecommitdiff
path: root/src/PerfumeParser.hs
blob: 1b200d9b274643fba340ecdce913e3f700a0e693 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
{-# LANGUAGE OverloadedStrings #-}

module PerfumeParser
  ( parsePerfumes
  ) where

import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup
import Data.List (find)

import Model.URL
import Model.Perfume

parsePerfumes :: Text -> [Perfume]
parsePerfumes page = getPerfumes . getSecondTagsInside "tbody" $ parseTags page

getSecondTagsInside :: String -> [Tag Text] -> [Tag Text]
getSecondTagsInside selector =
  takeWhile (~/= ("</" ++ selector ++ ">"))
  . dropWhile (~/= ("<" ++ selector ++ ">"))
  . drop 1
  . dropWhile (~/= ("<" ++ selector ++ ">"))

getPerfumes :: [Tag Text] -> [Perfume]
getPerfumes (TagOpen "a" attributes : TagText name : xs) =
  case find ((==) "href" . fst) attributes of
    Just (_, url) -> Perfume name (T.concat [site, "/", url]) : getPerfumes xs
    Nothing -> getPerfumes xs
getPerfumes (_:xs) = getPerfumes xs
getPerfumes [] = []