diff options
author | Joris | 2019-09-03 21:01:53 +0200 |
---|---|---|
committer | Joris | 2019-09-03 21:04:40 +0200 |
commit | 5cedcecd6ae31e2485dcab2ddd74c74a4779545d (patch) | |
tree | bb54b8f1bbf1f5baaf94c28e4443fb17321d5fc7 /src/lib/haskell/Parser | |
parent | 8d14cb80170a8a15a0ced13bc7fe5cd16b908974 (diff) |
Make LBC to work
Use request headers to simulate a normal browser
Diffstat (limited to 'src/lib/haskell/Parser')
-rw-r--r-- | src/lib/haskell/Parser/LeboncoinParser.hs | 17 |
1 files changed, 11 insertions, 6 deletions
diff --git a/src/lib/haskell/Parser/LeboncoinParser.hs b/src/lib/haskell/Parser/LeboncoinParser.hs index 77213cb..99d8116 100644 --- a/src/lib/haskell/Parser/LeboncoinParser.hs +++ b/src/lib/haskell/Parser/LeboncoinParser.hs @@ -11,14 +11,19 @@ import Model.Ad (Ad (Ad)) import Parser.Utils parse :: Text -> [Ad] -parse page = - catMaybes . fmap parseAd $ partitions (~== (T.unpack "<a>")) tags - where tags = getTagsBetween "<li itemtype=http://schema.org/Offer>" "<div class=information-immo_content>" (parseTags page) +parse = + catMaybes + . fmap parseAd + . partitions (~== (T.unpack "<li>")) + . parseTags parseAd :: [Tag Text] -> Maybe Ad parseAd tags = do - name <- getTagTextAfter "<h2 class=item_title>" tags - location <- getTagAttribute "<meta itemprop=address>" "content" tags - let price = getTagTextAfter "<h3 class=item_price>" tags + name <- getTagTextAfter "<span data-qa-id=aditem_title>" tags + location <- getTagTextAfter "<p data-qa-id=aditem_location>" tags + let price = + case getTagsBetween "<span itemprop=priceCurrency>" "</span>" tags of + [] -> Nothing + xs -> Just $ innerText xs url <- getTagAttribute "<a>" "href" tags return (Ad name location price (T.concat ["https:", url])) |