diff options
Diffstat (limited to 'src/parser')
-rw-r--r-- | src/parser/haskell/Parser/LeboncoinParser.hs | 8 | ||||
-rw-r--r-- | src/parser/haskell/Parser/OuestFranceParser.hs | 8 | ||||
-rw-r--r-- | src/parser/haskell/Parser/Utils.hs | 20 |
3 files changed, 17 insertions, 19 deletions
diff --git a/src/parser/haskell/Parser/LeboncoinParser.hs b/src/parser/haskell/Parser/LeboncoinParser.hs index 48eb80f..77213cb 100644 --- a/src/parser/haskell/Parser/LeboncoinParser.hs +++ b/src/parser/haskell/Parser/LeboncoinParser.hs @@ -12,13 +12,13 @@ import Parser.Utils parse :: Text -> [Ad] parse page = - catMaybes . fmap parseAd $ partitions (~== "<a>") tags + catMaybes . fmap parseAd $ partitions (~== (T.unpack "<a>")) tags where tags = getTagsBetween "<li itemtype=http://schema.org/Offer>" "<div class=information-immo_content>" (parseTags page) parseAd :: [Tag Text] -> Maybe Ad parseAd tags = do name <- getTagTextAfter "<h2 class=item_title>" tags - location <- getTagAttribute "<meta itemprop=address>" (T.pack "content") tags + location <- getTagAttribute "<meta itemprop=address>" "content" tags let price = getTagTextAfter "<h3 class=item_price>" tags - url <- getTagAttribute "<a>" (T.pack "href") tags - return (Ad name location price (T.concat [T.pack "https:", url])) + url <- getTagAttribute "<a>" "href" tags + return (Ad name location price (T.concat ["https:", url])) diff --git a/src/parser/haskell/Parser/OuestFranceParser.hs b/src/parser/haskell/Parser/OuestFranceParser.hs index a7b6360..f46ed03 100644 --- a/src/parser/haskell/Parser/OuestFranceParser.hs +++ b/src/parser/haskell/Parser/OuestFranceParser.hs @@ -5,16 +5,14 @@ module Parser.OuestFranceParser import Data.Maybe (catMaybes) import Data.Text (Text) import qualified Data.Text as T - import Text.HTML.TagSoup import Model.Ad (Ad (Ad)) - import Parser.Utils parse :: Text -> [Ad] parse page = - catMaybes . fmap parseAd $ partitions (~== "<a>") tags + catMaybes . fmap parseAd $ partitions (~== (T.unpack "<a>")) tags where tags = getTagsBetween "<div id=listAnnonces>" "<div id=interactions>" (parseTags page) parseAd :: [Tag Text] -> Maybe Ad @@ -22,6 +20,6 @@ parseAd tags = do name <- getTagTextAfter "<span class=annTitre>" tags location <- getTagTextAfter "<span class=annVille>" tags let price = getTagTextAfter "<span class=annPrix>" tags - let startUrl = T.pack "https://www.ouestfrance-immo.com/" - url <- getTagAttribute "<a>" (T.pack "href") tags + let startUrl = "https://www.ouestfrance-immo.com/" + url <- getTagAttribute "<a>" "href" tags return (Ad name location price (T.concat [startUrl, url])) diff --git a/src/parser/haskell/Parser/Utils.hs b/src/parser/haskell/Parser/Utils.hs index 7c433c6..461808d 100644 --- a/src/parser/haskell/Parser/Utils.hs +++ b/src/parser/haskell/Parser/Utils.hs @@ -14,29 +14,29 @@ import qualified Data.Text as T import Text.HTML.TagSoup -getTagsBefore :: String -> [Tag Text] -> [Tag Text] -getTagsBefore selector = takeWhile (~/= selector) +getTagsBefore :: Text -> [Tag Text] -> [Tag Text] +getTagsBefore selector = takeWhile (~/= (T.unpack selector)) -getTagsAfter :: String -> [Tag Text] -> [Tag Text] -getTagsAfter selector = drop 1 . dropWhile (~/= selector) +getTagsAfter :: Text -> [Tag Text] -> [Tag Text] +getTagsAfter selector = drop 1 . dropWhile (~/= (T.unpack selector)) -getTagsBetween :: String -> String -> [Tag Text] -> [Tag Text] +getTagsBetween :: Text -> Text -> [Tag Text] -> [Tag Text] getTagsBetween begin end = getTagsBefore end . getTagsAfter begin -getTagAttributes :: String -> Text -> [Tag Text] -> [Text] +getTagAttributes :: Text -> Text -> [Tag Text] -> [Text] getTagAttributes selector attribute = catMaybes . fmap (maybeTagAttribute attribute) - . filter (~== selector) + . filter (~== (T.unpack selector)) -getTagAttribute :: String -> Text -> [Tag Text] -> Maybe Text +getTagAttribute :: Text -> Text -> [Tag Text] -> Maybe Text getTagAttribute selector attribute = listToMaybe . getTagAttributes selector attribute -getTagTextAfter :: String -> [Tag Text] -> Maybe Text +getTagTextAfter :: Text -> [Tag Text] -> Maybe Text getTagTextAfter selector tags = - case findIndex (~== selector) tags of + case findIndex (~== (T.unpack selector)) tags of Just index -> fmap T.strip $ safeGetAt (index + 1) tags >>= maybeTagText Nothing -> Nothing |