diff options
author | Joris | 2018-06-18 23:15:12 +0200 |
---|---|---|
committer | Joris | 2018-06-18 23:15:12 +0200 |
commit | 318292532a877c308403c45795d229fad659d900 (patch) | |
tree | 16e4695cc316a28bff4f0a9ae42e779ecce82a31 /src/parser/haskell | |
parent | 96bbdbbe9b22b3c3e96998cc18a3b68c9db66da9 (diff) |
Add seLoger parser
Diffstat (limited to 'src/parser/haskell')
-rw-r--r-- | src/parser/haskell/Parser/SeLogerParser.hs | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/src/parser/haskell/Parser/SeLogerParser.hs b/src/parser/haskell/Parser/SeLogerParser.hs new file mode 100644 index 0000000..dc2a5c2 --- /dev/null +++ b/src/parser/haskell/Parser/SeLogerParser.hs @@ -0,0 +1,24 @@ +module Parser.SeLogerParser + ( parse + ) where + +import Data.Maybe (catMaybes) +import Data.Text (Text) +import qualified Data.Text as T +import Text.HTML.TagSoup + +import Model.Ad (Ad (Ad)) +import Parser.Utils + +parse :: Text -> [Ad] +parse page = + catMaybes . fmap parseAd $ partitions (~== (T.unpack "<div class=c-pa-info>")) tags + where tags = getTagsBetween "<section class=liste_resultat>" "<div class=bottomAnchorWrapper>" (parseTags page) + +parseAd :: [Tag Text] -> Maybe Ad +parseAd tags = do + name <- getTagTextAfter "<a>" tags + location <- getTagTextAfter "<div class=c-pa-city>" tags + let price = getTagTextAfter "<span class=c-pa-cprice>" tags + url <- getTagAttribute "<a>" "href" tags + return (Ad name location price (T.concat ["https:", url])) |