From c1c9b1f6233a7316c8df8fa50efba14fa5b21660 Mon Sep 17 00:00:00 2001 From: Joris Date: Mon, 31 Aug 2015 23:14:40 +0200 Subject: Parsing perfumes in the last tbody of the page --- src/PerfumeParser.hs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/PerfumeParser.hs b/src/PerfumeParser.hs index 1b200d9..8641638 100644 --- a/src/PerfumeParser.hs +++ b/src/PerfumeParser.hs @@ -13,14 +13,21 @@ import Model.URL import Model.Perfume parsePerfumes :: Text -> [Perfume] -parsePerfumes page = getPerfumes . getSecondTagsInside "tbody" $ parseTags page +parsePerfumes page = getPerfumes . getTagsInside "tbody" . keepOnlyOne "tbody" $ parseTags page -getSecondTagsInside :: String -> [Tag Text] -> [Tag Text] -getSecondTagsInside selector = +keepOnlyOne :: String -> [Tag Text] -> [Tag Text] +keepOnlyOne tagName tags = + let count = length . filter (~== ("<" ++ tagName ++ ">")) $ tags + in if count > 1 + then + keepOnlyOne tagName (drop 1 . dropWhile (~/= ("<" ++ tagName ++ ">")) $ tags) + else + tags + +getTagsInside :: String -> [Tag Text] -> [Tag Text] +getTagsInside selector = takeWhile (~/= ("")) . dropWhile (~/= ("<" ++ selector ++ ">")) - . drop 1 - . dropWhile (~/= ("<" ++ selector ++ ">")) getPerfumes :: [Tag Text] -> [Perfume] getPerfumes (TagOpen "a" attributes : TagText name : xs) = -- cgit v1.2.3