{-# LANGUAGE OverloadedStrings #-} module PerfumeParser ( parsePerfumes ) where import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup import Data.List (find) import Model.URL import Model.Perfume parsePerfumes :: Text -> [Perfume] parsePerfumes page = getPerfumes . getTagsInside "tbody" . keepOnlyOne "tbody" $ parseTags page keepOnlyOne :: String -> [Tag Text] -> [Tag Text] keepOnlyOne tagName tags = let count = length . filter (~== ("<" ++ tagName ++ ">")) $ tags in if count > 1 then keepOnlyOne tagName (drop 1 . dropWhile (~/= ("<" ++ tagName ++ ">")) $ tags) else tags getTagsInside :: String -> [Tag Text] -> [Tag Text] getTagsInside selector = takeWhile (~/= ("")) . dropWhile (~/= ("<" ++ selector ++ ">")) getPerfumes :: [Tag Text] -> [Perfume] getPerfumes (TagOpen "a" attributes : TagText name : xs) = case find ((==) "href" . fst) attributes of Just (_, url) -> Perfume name (T.concat [site, "/", url]) : getPerfumes xs Nothing -> getPerfumes xs getPerfumes (_:xs) = getPerfumes xs getPerfumes [] = []