aboutsummaryrefslogtreecommitdiff
path: root/src/parser/haskell/Parser
diff options
context:
space:
mode:
authorJoris2018-06-17 23:24:47 +0200
committerJoris2018-06-18 11:13:55 +0200
commit0a4d3c8f12dc5797a919a00b6bcaf759947687cc (patch)
treebcb89781e22c2314bf0c064ebb37cb7f8a362f5c /src/parser/haskell/Parser
parente2a5c7c5c596d057b6fa9c08a8204ce1429cfdc4 (diff)
Add ouest france parser
Diffstat (limited to 'src/parser/haskell/Parser')
-rw-r--r--src/parser/haskell/Parser/LeboncoinParser.hs24
-rw-r--r--src/parser/haskell/Parser/OuestFranceParser.hs27
-rw-r--r--src/parser/haskell/Parser/Utils.hs49
3 files changed, 100 insertions, 0 deletions
diff --git a/src/parser/haskell/Parser/LeboncoinParser.hs b/src/parser/haskell/Parser/LeboncoinParser.hs
new file mode 100644
index 0000000..48eb80f
--- /dev/null
+++ b/src/parser/haskell/Parser/LeboncoinParser.hs
@@ -0,0 +1,24 @@
+module Parser.LeboncoinParser
+ ( parse
+ ) where
+
+import Data.Maybe (catMaybes)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.HTML.TagSoup
+
+import Model.Ad (Ad (Ad))
+import Parser.Utils
+
+parse :: Text -> [Ad]
+parse page =
+ catMaybes . fmap parseAd $ partitions (~== "<a>") tags
+ where tags = getTagsBetween "<li itemtype=http://schema.org/Offer>" "<div class=information-immo_content>" (parseTags page)
+
+parseAd :: [Tag Text] -> Maybe Ad
+parseAd tags = do
+ name <- getTagTextAfter "<h2 class=item_title>" tags
+ location <- getTagAttribute "<meta itemprop=address>" (T.pack "content") tags
+ let price = getTagTextAfter "<h3 class=item_price>" tags
+ url <- getTagAttribute "<a>" (T.pack "href") tags
+ return (Ad name location price (T.concat [T.pack "https:", url]))
diff --git a/src/parser/haskell/Parser/OuestFranceParser.hs b/src/parser/haskell/Parser/OuestFranceParser.hs
new file mode 100644
index 0000000..a7b6360
--- /dev/null
+++ b/src/parser/haskell/Parser/OuestFranceParser.hs
@@ -0,0 +1,27 @@
+module Parser.OuestFranceParser
+ ( parse
+ ) where
+
+import Data.Maybe (catMaybes)
+import Data.Text (Text)
+import qualified Data.Text as T
+
+import Text.HTML.TagSoup
+
+import Model.Ad (Ad (Ad))
+
+import Parser.Utils
+
+parse :: Text -> [Ad]
+parse page =
+ catMaybes . fmap parseAd $ partitions (~== "<a>") tags
+ where tags = getTagsBetween "<div id=listAnnonces>" "<div id=interactions>" (parseTags page)
+
+parseAd :: [Tag Text] -> Maybe Ad
+parseAd tags = do
+ name <- getTagTextAfter "<span class=annTitre>" tags
+ location <- getTagTextAfter "<span class=annVille>" tags
+ let price = getTagTextAfter "<span class=annPrix>" tags
+ let startUrl = T.pack "https://www.ouestfrance-immo.com/"
+ url <- getTagAttribute "<a>" (T.pack "href") tags
+ return (Ad name location price (T.concat [startUrl, url]))
diff --git a/src/parser/haskell/Parser/Utils.hs b/src/parser/haskell/Parser/Utils.hs
new file mode 100644
index 0000000..7c433c6
--- /dev/null
+++ b/src/parser/haskell/Parser/Utils.hs
@@ -0,0 +1,49 @@
+module Parser.Utils
+ ( getTagsBefore
+ , getTagsAfter
+ , getTagsBetween
+ , getTagAttributes
+ , getTagAttribute
+ , getTagTextAfter
+ ) where
+
+import Data.List (find, findIndex)
+import Data.Maybe (catMaybes, listToMaybe)
+import Data.Text (Text)
+import qualified Data.Text as T
+
+import Text.HTML.TagSoup
+
+getTagsBefore :: String -> [Tag Text] -> [Tag Text]
+getTagsBefore selector = takeWhile (~/= selector)
+
+getTagsAfter :: String -> [Tag Text] -> [Tag Text]
+getTagsAfter selector = drop 1 . dropWhile (~/= selector)
+
+getTagsBetween :: String -> String -> [Tag Text] -> [Tag Text]
+getTagsBetween begin end = getTagsBefore end . getTagsAfter begin
+
+getTagAttributes :: String -> Text -> [Tag Text] -> [Text]
+getTagAttributes selector attribute =
+ catMaybes
+ . fmap (maybeTagAttribute attribute)
+ . filter (~== selector)
+
+getTagAttribute :: String -> Text -> [Tag Text] -> Maybe Text
+getTagAttribute selector attribute =
+ listToMaybe
+ . getTagAttributes selector attribute
+
+getTagTextAfter :: String -> [Tag Text] -> Maybe Text
+getTagTextAfter selector tags =
+ case findIndex (~== selector) tags of
+ Just index -> fmap T.strip $ safeGetAt (index + 1) tags >>= maybeTagText
+ Nothing -> Nothing
+
+maybeTagAttribute :: Text -> Tag Text -> Maybe Text
+maybeTagAttribute name (TagOpen _ xs) =
+ fmap snd . find (\(x, _) -> x == name) $ xs
+maybeTagAttribute _ _ = Nothing
+
+safeGetAt :: Int -> [a] -> Maybe a
+safeGetAt index = listToMaybe . drop index