diff options
author | Joris | 2019-03-10 19:15:45 +0100 |
---|---|---|
committer | Joris | 2019-03-10 19:15:45 +0100 |
commit | 8e03a571033a5d02d2287a2b1d1edd8b57aa2462 (patch) | |
tree | 03d6965bb65322dbac03edf2d6a30ee856c197f6 /src/Scrapper.hs | |
parent | c41f16ed474376ad8a61a75d8b7f9ef543f359b4 (diff) |
Scrap result page and make a CSV of it
Diffstat (limited to 'src/Scrapper.hs')
-rw-r--r-- | src/Scrapper.hs | 75 |
1 files changed, 75 insertions, 0 deletions
diff --git a/src/Scrapper.hs b/src/Scrapper.hs new file mode 100644 index 0000000..456a455 --- /dev/null +++ b/src/Scrapper.hs @@ -0,0 +1,75 @@ +module Scrapper (hikingsUrls, hiking, Hiking(..)) where + +import qualified Data.List as List +import qualified Data.Maybe as Maybe +import Data.Text (Text) +import qualified Data.Text as T +import Text.HTML.TagSoup (Tag (TagOpen, TagText), (~/=), (~==)) +import qualified Text.HTML.TagSoup as S + +import Hiking (Hiking (..)) + +hikingsUrls :: Text -> [Text] +hikingsUrls = + Maybe.catMaybes + . map sectionUrl + . S.partitions (~== ("<div class=rando-title-sansDetail>" :: String)) + . S.parseTags + where + sectionUrl = + fmap (T.strip . S.fromAttrib "href") + . Maybe.listToMaybe + . dropWhile (~/= ("<a>" :: String)) + +hiking :: Text -> Hiking +hiking page = + Hiking name description lat lng url duration distance elevation difficulty + where + tags = S.parseTags page + + name = textInside "<h1 itemprop=name>" "</h1" tags + + description = textInside "<p>" "</p>" . dropWhile (~/= ("<h1 itemprop=name>" :: String)) $ tags + + (lat, lng) = latAndLng (info "Départ :") + + url = S.fromAttrib "href" . head . dropWhile (~/= ("<link>" :: String)) $ tags + + duration = info "Durée moyenne:" + + distance = info "Distance :" + + elevation = T.concat [ "+", info "Dénivelé positif :", " -", info "Dénivelé négatif :" ] + + difficulty = + case info "Difficulté :" of + "Facile" -> "green" + "Moyenne" -> "blue" + "Difficile" -> "red" + _ -> "gray" + + info title = nextTagText . drop 1 . dropWhile (not . tagTextContains title) $ tags + + tagTextContains title (TagText t) = title == T.strip t + tagTextContains _ _ = False + + +latAndLng :: Text -> (Text, Text) +latAndLng t = + let [ns, lat, _, oe, lng] = T.words t + a = T.concat [ if ns == "S" then "-" else "", T.dropEnd 1 lat ] + b = T.concat [ if oe == "O" then "-" else "", T.dropEnd 1 lng ] + in (a, b) + +textInside :: Text -> Text -> [Tag Text] -> Text +textInside from to = + T.unwords + . map (T.strip) + . T.lines + . T.strip + . S.innerText + . takeWhile (~/= T.unpack to) + . dropWhile (~/= T.unpack from) + +nextTagText :: [Tag Text] -> Text +nextTagText = T.strip . S.innerText . take 1 . drop 1 |