aboutsummaryrefslogtreecommitdiff
path: root/src/lib/haskell/Parser
diff options
context:
space:
mode:
authorJoris2019-09-03 21:01:53 +0200
committerJoris2019-09-03 21:04:40 +0200
commit5cedcecd6ae31e2485dcab2ddd74c74a4779545d (patch)
treebb54b8f1bbf1f5baaf94c28e4443fb17321d5fc7 /src/lib/haskell/Parser
parent8d14cb80170a8a15a0ced13bc7fe5cd16b908974 (diff)
Make LBC to work
Use request headers to simulate a normal browser
Diffstat (limited to 'src/lib/haskell/Parser')
-rw-r--r--src/lib/haskell/Parser/LeboncoinParser.hs17
1 files changed, 11 insertions, 6 deletions
diff --git a/src/lib/haskell/Parser/LeboncoinParser.hs b/src/lib/haskell/Parser/LeboncoinParser.hs
index 77213cb..99d8116 100644
--- a/src/lib/haskell/Parser/LeboncoinParser.hs
+++ b/src/lib/haskell/Parser/LeboncoinParser.hs
@@ -11,14 +11,19 @@ import Model.Ad (Ad (Ad))
import Parser.Utils
parse :: Text -> [Ad]
-parse page =
- catMaybes . fmap parseAd $ partitions (~== (T.unpack "<a>")) tags
- where tags = getTagsBetween "<li itemtype=http://schema.org/Offer>" "<div class=information-immo_content>" (parseTags page)
+parse =
+ catMaybes
+ . fmap parseAd
+ . partitions (~== (T.unpack "<li>"))
+ . parseTags
parseAd :: [Tag Text] -> Maybe Ad
parseAd tags = do
- name <- getTagTextAfter "<h2 class=item_title>" tags
- location <- getTagAttribute "<meta itemprop=address>" "content" tags
- let price = getTagTextAfter "<h3 class=item_price>" tags
+ name <- getTagTextAfter "<span data-qa-id=aditem_title>" tags
+ location <- getTagTextAfter "<p data-qa-id=aditem_location>" tags
+ let price =
+ case getTagsBetween "<span itemprop=priceCurrency>" "</span>" tags of
+ [] -> Nothing
+ xs -> Just $ innerText xs
url <- getTagAttribute "<a>" "href" tags
return (Ad name location price (T.concat ["https:", url]))