aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJoris2015-08-31 23:14:40 +0200
committerJoris2015-08-31 23:14:40 +0200
commitc1c9b1f6233a7316c8df8fa50efba14fa5b21660 (patch)
treefafd5d57afdc70936186e2fa291289fa936c7eac /src
parent1783b28d8630119f96b49d8ab8efa62975cfd13f (diff)
Parsing perfumes in the last tbody of the page
Diffstat (limited to 'src')
-rw-r--r--src/PerfumeParser.hs17
1 files changed, 12 insertions, 5 deletions
diff --git a/src/PerfumeParser.hs b/src/PerfumeParser.hs
index 1b200d9..8641638 100644
--- a/src/PerfumeParser.hs
+++ b/src/PerfumeParser.hs
@@ -13,14 +13,21 @@ import Model.URL
import Model.Perfume
parsePerfumes :: Text -> [Perfume]
-parsePerfumes page = getPerfumes . getSecondTagsInside "tbody" $ parseTags page
+parsePerfumes page = getPerfumes . getTagsInside "tbody" . keepOnlyOne "tbody" $ parseTags page
-getSecondTagsInside :: String -> [Tag Text] -> [Tag Text]
-getSecondTagsInside selector =
+keepOnlyOne :: String -> [Tag Text] -> [Tag Text]
+keepOnlyOne tagName tags =
+ let count = length . filter (~== ("<" ++ tagName ++ ">")) $ tags
+ in if count > 1
+ then
+ keepOnlyOne tagName (drop 1 . dropWhile (~/= ("<" ++ tagName ++ ">")) $ tags)
+ else
+ tags
+
+getTagsInside :: String -> [Tag Text] -> [Tag Text]
+getTagsInside selector =
takeWhile (~/= ("</" ++ selector ++ ">"))
. dropWhile (~/= ("<" ++ selector ++ ">"))
- . drop 1
- . dropWhile (~/= ("<" ++ selector ++ ">"))
getPerfumes :: [Tag Text] -> [Perfume]
getPerfumes (TagOpen "a" attributes : TagText name : xs) =