diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/PerfumeParser.hs | 17 |
1 files changed, 12 insertions, 5 deletions
diff --git a/src/PerfumeParser.hs b/src/PerfumeParser.hs index 1b200d9..8641638 100644 --- a/src/PerfumeParser.hs +++ b/src/PerfumeParser.hs @@ -13,14 +13,21 @@ import Model.URL import Model.Perfume parsePerfumes :: Text -> [Perfume] -parsePerfumes page = getPerfumes . getSecondTagsInside "tbody" $ parseTags page +parsePerfumes page = getPerfumes . getTagsInside "tbody" . keepOnlyOne "tbody" $ parseTags page -getSecondTagsInside :: String -> [Tag Text] -> [Tag Text] -getSecondTagsInside selector = +keepOnlyOne :: String -> [Tag Text] -> [Tag Text] +keepOnlyOne tagName tags = + let count = length . filter (~== ("<" ++ tagName ++ ">")) $ tags + in if count > 1 + then + keepOnlyOne tagName (drop 1 . dropWhile (~/= ("<" ++ tagName ++ ">")) $ tags) + else + tags + +getTagsInside :: String -> [Tag Text] -> [Tag Text] +getTagsInside selector = takeWhile (~/= ("</" ++ selector ++ ">")) . dropWhile (~/= ("<" ++ selector ++ ">")) - . drop 1 - . dropWhile (~/= ("<" ++ selector ++ ">")) getPerfumes :: [Tag Text] -> [Perfume] getPerfumes (TagOpen "a" attributes : TagText name : xs) = |