diff options
author | Joris | 2018-06-19 22:49:16 +0200 |
---|---|---|
committer | Joris | 2018-06-19 22:49:16 +0200 |
commit | 149a0470b73781022e584aaeaa7ce871d6f4173b (patch) | |
tree | c1cc762e105ae19e7f3daaa3e9279a467dbaa3dc /src/lib/haskell/Parser/LeboncoinParser.hs | |
parent | 5d921c9a2b0a7a8f1a1bb5642cbefa516cbbe4cc (diff) | |
download | ad-listener-149a0470b73781022e584aaeaa7ce871d6f4173b.tar.gz ad-listener-149a0470b73781022e584aaeaa7ce871d6f4173b.tar.bz2 ad-listener-149a0470b73781022e584aaeaa7ce871d6f4173b.zip |
Add automatic tests on remote pages
Diffstat (limited to 'src/lib/haskell/Parser/LeboncoinParser.hs')
-rw-r--r-- | src/lib/haskell/Parser/LeboncoinParser.hs | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/src/lib/haskell/Parser/LeboncoinParser.hs b/src/lib/haskell/Parser/LeboncoinParser.hs new file mode 100644 index 0000000..77213cb --- /dev/null +++ b/src/lib/haskell/Parser/LeboncoinParser.hs @@ -0,0 +1,24 @@ +module Parser.LeboncoinParser + ( parse + ) where + +import Data.Maybe (catMaybes) +import Data.Text (Text) +import qualified Data.Text as T +import Text.HTML.TagSoup + +import Model.Ad (Ad (Ad)) +import Parser.Utils + +parse :: Text -> [Ad] +parse page = + catMaybes . fmap parseAd $ partitions (~== (T.unpack "<a>")) tags + where tags = getTagsBetween "<li itemtype=http://schema.org/Offer>" "<div class=information-immo_content>" (parseTags page) + +parseAd :: [Tag Text] -> Maybe Ad +parseAd tags = do + name <- getTagTextAfter "<h2 class=item_title>" tags + location <- getTagAttribute "<meta itemprop=address>" "content" tags + let price = getTagTextAfter "<h3 class=item_price>" tags + url <- getTagAttribute "<a>" "href" tags + return (Ad name location price (T.concat ["https:", url])) |