From 5977e1454d7738ddb086d37b20337e350e380790 Mon Sep 17 00:00:00 2001 From: Joris Guyonvarch Date: Sat, 11 Apr 2015 11:50:48 +0200 Subject: Fetch first page ads of a given leboncoin url, fetch also the description page of each item. --- src/Parser/Utils.hs | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 src/Parser/Utils.hs (limited to 'src/Parser/Utils.hs') diff --git a/src/Parser/Utils.hs b/src/Parser/Utils.hs new file mode 100644 index 0000000..4864e00 --- /dev/null +++ b/src/Parser/Utils.hs @@ -0,0 +1,30 @@ +module Parser.Utils + ( getTagAttribute + , getTagText + ) where + +import Data.List (find, findIndex) +import Data.Maybe (listToMaybe) + +import Text.HTML.TagSoup + +getTagAttribute :: String -> String -> [Tag String] -> Maybe String +getTagAttribute selector attribute item = + find (~== selector) item >>= maybeTagAttribute attribute + +getTagText :: String -> [Tag String] -> Maybe String +getTagText selector item = + case findIndex (~== selector) item of + Just index -> fmap trim $ safeGetAt (index + 1) item >>= maybeTagText + Nothing -> Nothing + +maybeTagAttribute :: String -> Tag String -> Maybe String +maybeTagAttribute name (TagOpen _ xs) = + fmap snd . find (\(x, _) -> x == name) $ xs +maybeTagAttribute attribute _ = Nothing + +trim :: String -> String +trim = unwords . words + +safeGetAt :: Int -> [a] -> Maybe a +safeGetAt index = listToMaybe . drop index -- cgit v1.2.3