diff options
author | Joris | 2018-06-19 22:49:16 +0200 |
---|---|---|
committer | Joris | 2018-06-19 22:49:16 +0200 |
commit | 149a0470b73781022e584aaeaa7ce871d6f4173b (patch) | |
tree | c1cc762e105ae19e7f3daaa3e9279a467dbaa3dc /src/parser | |
parent | 5d921c9a2b0a7a8f1a1bb5642cbefa516cbbe4cc (diff) |
Add automatic tests on remote pages
Diffstat (limited to 'src/parser')
-rw-r--r-- | src/parser/haskell/Model/Ad.hs | 22 | ||||
-rw-r--r-- | src/parser/haskell/Model/URL.hs | 7 | ||||
-rw-r--r-- | src/parser/haskell/Parser/LeboncoinParser.hs | 24 | ||||
-rw-r--r-- | src/parser/haskell/Parser/OuestFranceParser.hs | 25 | ||||
-rw-r--r-- | src/parser/haskell/Parser/SeLogerParser.hs | 24 | ||||
-rw-r--r-- | src/parser/haskell/Parser/Utils.hs | 48 |
6 files changed, 0 insertions, 150 deletions
diff --git a/src/parser/haskell/Model/Ad.hs b/src/parser/haskell/Model/Ad.hs deleted file mode 100644 index 06906eb..0000000 --- a/src/parser/haskell/Model/Ad.hs +++ /dev/null @@ -1,22 +0,0 @@ -module Model.Ad - ( Ad(..) - , getNewAds - ) where - -import Data.List ((\\)) -import Data.Text (Text) - -import Model.URL (URL) - -data Ad = Ad - { name :: Text - , location :: Text - , price :: Maybe Text - , url :: URL - } deriving (Eq, Read, Show) - -getNewAds :: [URL] -> [Ad] -> ([URL], [Ad]) -getNewAds viewdURLs ads = - let newURLs = (map url ads) \\ viewdURLs - newAds = filter (\ad -> elem (url ad) newURLs) ads - in (newURLs, newAds) diff --git a/src/parser/haskell/Model/URL.hs b/src/parser/haskell/Model/URL.hs deleted file mode 100644 index 2114113..0000000 --- a/src/parser/haskell/Model/URL.hs +++ /dev/null @@ -1,7 +0,0 @@ -module Model.URL - ( URL - ) where - -import Data.Text - -type URL = Text diff --git a/src/parser/haskell/Parser/LeboncoinParser.hs b/src/parser/haskell/Parser/LeboncoinParser.hs deleted file mode 100644 index 77213cb..0000000 --- a/src/parser/haskell/Parser/LeboncoinParser.hs +++ /dev/null @@ -1,24 +0,0 @@ -module Parser.LeboncoinParser - ( parse - ) where - -import Data.Maybe (catMaybes) -import Data.Text (Text) -import qualified Data.Text as T -import Text.HTML.TagSoup - -import Model.Ad (Ad (Ad)) -import Parser.Utils - -parse :: Text -> [Ad] -parse page = - catMaybes . fmap parseAd $ partitions (~== (T.unpack "<a>")) tags - where tags = getTagsBetween "<li itemtype=http://schema.org/Offer>" "<div class=information-immo_content>" (parseTags page) - -parseAd :: [Tag Text] -> Maybe Ad -parseAd tags = do - name <- getTagTextAfter "<h2 class=item_title>" tags - location <- getTagAttribute "<meta itemprop=address>" "content" tags - let price = getTagTextAfter "<h3 class=item_price>" tags - url <- getTagAttribute "<a>" "href" tags - return (Ad name location price (T.concat ["https:", url])) diff --git a/src/parser/haskell/Parser/OuestFranceParser.hs b/src/parser/haskell/Parser/OuestFranceParser.hs deleted file mode 100644 index f46ed03..0000000 --- a/src/parser/haskell/Parser/OuestFranceParser.hs +++ /dev/null @@ -1,25 +0,0 @@ -module Parser.OuestFranceParser - ( parse - ) where - -import Data.Maybe (catMaybes) -import Data.Text (Text) -import qualified Data.Text as T -import Text.HTML.TagSoup - -import Model.Ad (Ad (Ad)) -import Parser.Utils - -parse :: Text -> [Ad] -parse page = - catMaybes . fmap parseAd $ partitions (~== (T.unpack "<a>")) tags - where tags = getTagsBetween "<div id=listAnnonces>" "<div id=interactions>" (parseTags page) - -parseAd :: [Tag Text] -> Maybe Ad -parseAd tags = do - name <- getTagTextAfter "<span class=annTitre>" tags - location <- getTagTextAfter "<span class=annVille>" tags - let price = getTagTextAfter "<span class=annPrix>" tags - let startUrl = "https://www.ouestfrance-immo.com/" - url <- getTagAttribute "<a>" "href" tags - return (Ad name location price (T.concat [startUrl, url])) diff --git a/src/parser/haskell/Parser/SeLogerParser.hs b/src/parser/haskell/Parser/SeLogerParser.hs deleted file mode 100644 index b073862..0000000 --- a/src/parser/haskell/Parser/SeLogerParser.hs +++ /dev/null @@ -1,24 +0,0 @@ -module Parser.SeLogerParser - ( parse - ) where - -import Data.Maybe (catMaybes) -import Data.Text (Text) -import qualified Data.Text as T -import Text.HTML.TagSoup - -import Model.Ad (Ad (Ad)) -import Parser.Utils - -parse :: Text -> [Ad] -parse page = - catMaybes . fmap parseAd $ partitions (~== (T.unpack "<div class=c-pa-info>")) tags - where tags = getTagsBetween "<section class=liste_resultat>" "<div class=bottomAnchorWrapper>" (parseTags page) - -parseAd :: [Tag Text] -> Maybe Ad -parseAd tags = do - name <- getTagTextAfter "<a>" tags - location <- getTagTextAfter "<div class=c-pa-city>" tags - let price = getTagTextAfter "<span class=c-pa-cprice>" tags - url <- getTagAttribute "<a>" "href" tags - return (Ad name location price url) diff --git a/src/parser/haskell/Parser/Utils.hs b/src/parser/haskell/Parser/Utils.hs deleted file mode 100644 index 4768327..0000000 --- a/src/parser/haskell/Parser/Utils.hs +++ /dev/null @@ -1,48 +0,0 @@ -module Parser.Utils - ( getTagsBefore - , getTagsAfter - , getTagsBetween - , getTagAttributes - , getTagAttribute - , getTagTextAfter - ) where - -import Data.List (find, findIndex) -import Data.Maybe (catMaybes, listToMaybe) -import Data.Text (Text) -import qualified Data.Text as T -import Text.HTML.TagSoup - -getTagsBefore :: Text -> [Tag Text] -> [Tag Text] -getTagsBefore selector = takeWhile (~/= (T.unpack selector)) - -getTagsAfter :: Text -> [Tag Text] -> [Tag Text] -getTagsAfter selector = drop 1 . dropWhile (~/= (T.unpack selector)) - -getTagsBetween :: Text -> Text -> [Tag Text] -> [Tag Text] -getTagsBetween begin end = getTagsBefore end . getTagsAfter begin - -getTagAttributes :: Text -> Text -> [Tag Text] -> [Text] -getTagAttributes selector attribute = - catMaybes - . fmap (maybeTagAttribute attribute) - . filter (~== (T.unpack selector)) - -getTagAttribute :: Text -> Text -> [Tag Text] -> Maybe Text -getTagAttribute selector attribute = - listToMaybe - . getTagAttributes selector attribute - -getTagTextAfter :: Text -> [Tag Text] -> Maybe Text -getTagTextAfter selector tags = - case findIndex (~== (T.unpack selector)) tags of - Just index -> fmap T.strip $ safeGetAt (index + 1) tags >>= maybeTagText - Nothing -> Nothing - -maybeTagAttribute :: Text -> Tag Text -> Maybe Text -maybeTagAttribute name (TagOpen _ xs) = - fmap snd . find (\(x, _) -> x == name) $ xs -maybeTagAttribute _ _ = Nothing - -safeGetAt :: Int -> [a] -> Maybe a -safeGetAt index = listToMaybe . drop index |