diff options
author | Joris | 2015-08-31 23:14:40 +0200 |
---|---|---|
committer | Joris | 2015-08-31 23:14:40 +0200 |
commit | c1c9b1f6233a7316c8df8fa50efba14fa5b21660 (patch) | |
tree | fafd5d57afdc70936186e2fa291289fa936c7eac | |
parent | 1783b28d8630119f96b49d8ab8efa62975cfd13f (diff) |
Parsing perfumes in the last tbody of the page
-rw-r--r-- | src/PerfumeParser.hs | 17 |
1 files changed, 12 insertions, 5 deletions
diff --git a/src/PerfumeParser.hs b/src/PerfumeParser.hs index 1b200d9..8641638 100644 --- a/src/PerfumeParser.hs +++ b/src/PerfumeParser.hs @@ -13,14 +13,21 @@ import Model.URL import Model.Perfume parsePerfumes :: Text -> [Perfume] -parsePerfumes page = getPerfumes . getSecondTagsInside "tbody" $ parseTags page +parsePerfumes page = getPerfumes . getTagsInside "tbody" . keepOnlyOne "tbody" $ parseTags page -getSecondTagsInside :: String -> [Tag Text] -> [Tag Text] -getSecondTagsInside selector = +keepOnlyOne :: String -> [Tag Text] -> [Tag Text] +keepOnlyOne tagName tags = + let count = length . filter (~== ("<" ++ tagName ++ ">")) $ tags + in if count > 1 + then + keepOnlyOne tagName (drop 1 . dropWhile (~/= ("<" ++ tagName ++ ">")) $ tags) + else + tags + +getTagsInside :: String -> [Tag Text] -> [Tag Text] +getTagsInside selector = takeWhile (~/= ("</" ++ selector ++ ">")) . dropWhile (~/= ("<" ++ selector ++ ">")) - . drop 1 - . dropWhile (~/= ("<" ++ selector ++ ">")) getPerfumes :: [Tag Text] -> [Perfume] getPerfumes (TagOpen "a" attributes : TagText name : xs) = |