From 8e03a571033a5d02d2287a2b1d1edd8b57aa2462 Mon Sep 17 00:00:00 2001 From: Joris Date: Sun, 10 Mar 2019 19:15:45 +0100 Subject: Scrap result page and make a CSV of it --- src/Scrapper.hs | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 src/Scrapper.hs (limited to 'src/Scrapper.hs') diff --git a/src/Scrapper.hs b/src/Scrapper.hs new file mode 100644 index 0000000..456a455 --- /dev/null +++ b/src/Scrapper.hs @@ -0,0 +1,75 @@ +module Scrapper (hikingsUrls, hiking, Hiking(..)) where + +import qualified Data.List as List +import qualified Data.Maybe as Maybe +import Data.Text (Text) +import qualified Data.Text as T +import Text.HTML.TagSoup (Tag (TagOpen, TagText), (~/=), (~==)) +import qualified Text.HTML.TagSoup as S + +import Hiking (Hiking (..)) + +hikingsUrls :: Text -> [Text] +hikingsUrls = + Maybe.catMaybes + . map sectionUrl + . S.partitions (~== ("
" :: String)) + . S.parseTags + where + sectionUrl = + fmap (T.strip . S.fromAttrib "href") + . Maybe.listToMaybe + . dropWhile (~/= ("" :: String)) + +hiking :: Text -> Hiking +hiking page = + Hiking name description lat lng url duration distance elevation difficulty + where + tags = S.parseTags page + + name = textInside "

" "" "

" . dropWhile (~/= ("

" :: String)) $ tags + + (lat, lng) = latAndLng (info "Départ :") + + url = S.fromAttrib "href" . head . dropWhile (~/= ("" :: String)) $ tags + + duration = info "Durée moyenne:" + + distance = info "Distance :" + + elevation = T.concat [ "+", info "Dénivelé positif :", " -", info "Dénivelé négatif :" ] + + difficulty = + case info "Difficulté :" of + "Facile" -> "green" + "Moyenne" -> "blue" + "Difficile" -> "red" + _ -> "gray" + + info title = nextTagText . drop 1 . dropWhile (not . tagTextContains title) $ tags + + tagTextContains title (TagText t) = title == T.strip t + tagTextContains _ _ = False + + +latAndLng :: Text -> (Text, Text) +latAndLng t = + let [ns, lat, _, oe, lng] = T.words t + a = T.concat [ if ns == "S" then "-" else "", T.dropEnd 1 lat ] + b = T.concat [ if oe == "O" then "-" else "", T.dropEnd 1 lng ] + in (a, b) + +textInside :: Text -> Text -> [Tag Text] -> Text +textInside from to = + T.unwords + . map (T.strip) + . T.lines + . T.strip + . S.innerText + . takeWhile (~/= T.unpack to) + . dropWhile (~/= T.unpack from) + +nextTagText :: [Tag Text] -> Text +nextTagText = T.strip . S.innerText . take 1 . drop 1 -- cgit v1.2.3