blob: 86416380a3a65379cdcfa446fd9f0c059ecd818a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
{-# LANGUAGE OverloadedStrings #-}
module PerfumeParser
( parsePerfumes
) where
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup
import Data.List (find)
import Model.URL
import Model.Perfume
parsePerfumes :: Text -> [Perfume]
parsePerfumes page = getPerfumes . getTagsInside "tbody" . keepOnlyOne "tbody" $ parseTags page
keepOnlyOne :: String -> [Tag Text] -> [Tag Text]
keepOnlyOne tagName tags =
let count = length . filter (~== ("<" ++ tagName ++ ">")) $ tags
in if count > 1
then
keepOnlyOne tagName (drop 1 . dropWhile (~/= ("<" ++ tagName ++ ">")) $ tags)
else
tags
getTagsInside :: String -> [Tag Text] -> [Tag Text]
getTagsInside selector =
takeWhile (~/= ("</" ++ selector ++ ">"))
. dropWhile (~/= ("<" ++ selector ++ ">"))
getPerfumes :: [Tag Text] -> [Perfume]
getPerfumes (TagOpen "a" attributes : TagText name : xs) =
case find ((==) "href" . fst) attributes of
Just (_, url) -> Perfume name (T.concat [site, "/", url]) : getPerfumes xs
Nothing -> getPerfumes xs
getPerfumes (_:xs) = getPerfumes xs
getPerfumes [] = []
|