{-# LANGUAGE OverloadedStrings #-} module PerfumeParser ( parsePerfumes ) where import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup import Data.List (find) import Model.URL import Model.Perfume parsePerfumes :: Text -> [Perfume] parsePerfumes page = getPerfumes . getSecondTagsInside "tbody" $ parseTags page getSecondTagsInside :: String -> [Tag Text] -> [Tag Text] getSecondTagsInside selector = takeWhile (~/= ("")) . dropWhile (~/= ("<" ++ selector ++ ">")) . drop 1 . dropWhile (~/= ("<" ++ selector ++ ">")) getPerfumes :: [Tag Text] -> [Perfume] getPerfumes (TagOpen "a" attributes : TagText name : xs) = case find ((==) "href" . fst) attributes of Just (_, url) -> Perfume name (T.concat [site, "/", url]) : getPerfumes xs Nothing -> getPerfumes xs getPerfumes (_:xs) = getPerfumes xs getPerfumes [] = []