From 8e03a571033a5d02d2287a2b1d1edd8b57aa2462 Mon Sep 17 00:00:00 2001
From: Joris
Date: Sun, 10 Mar 2019 19:15:45 +0100
Subject: Scrap result page and make a CSV of it
---
src/Scrapper.hs | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 75 insertions(+)
create mode 100644 src/Scrapper.hs
(limited to 'src/Scrapper.hs')
diff --git a/src/Scrapper.hs b/src/Scrapper.hs
new file mode 100644
index 0000000..456a455
--- /dev/null
+++ b/src/Scrapper.hs
@@ -0,0 +1,75 @@
+module Scrapper (hikingsUrls, hiking, Hiking(..)) where
+
+import qualified Data.List as List
+import qualified Data.Maybe as Maybe
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.HTML.TagSoup (Tag (TagOpen, TagText), (~/=), (~==))
+import qualified Text.HTML.TagSoup as S
+
+import Hiking (Hiking (..))
+
+hikingsUrls :: Text -> [Text]
+hikingsUrls =
+ Maybe.catMaybes
+ . map sectionUrl
+ . S.partitions (~== ("
" :: String))
+ . S.parseTags
+ where
+ sectionUrl =
+ fmap (T.strip . S.fromAttrib "href")
+ . Maybe.listToMaybe
+ . dropWhile (~/= ("
" :: String))
+
+hiking :: Text -> Hiking
+hiking page =
+ Hiking name description lat lng url duration distance elevation difficulty
+ where
+ tags = S.parseTags page
+
+ name = textInside "" "
" "" . dropWhile (~/= ("" :: String)) $ tags
+
+ (lat, lng) = latAndLng (info "Départ :")
+
+ url = S.fromAttrib "href" . head . dropWhile (~/= ("" :: String)) $ tags
+
+ duration = info "Durée moyenne:"
+
+ distance = info "Distance :"
+
+ elevation = T.concat [ "+", info "Dénivelé positif :", " -", info "Dénivelé négatif :" ]
+
+ difficulty =
+ case info "Difficulté :" of
+ "Facile" -> "green"
+ "Moyenne" -> "blue"
+ "Difficile" -> "red"
+ _ -> "gray"
+
+ info title = nextTagText . drop 1 . dropWhile (not . tagTextContains title) $ tags
+
+ tagTextContains title (TagText t) = title == T.strip t
+ tagTextContains _ _ = False
+
+
+latAndLng :: Text -> (Text, Text)
+latAndLng t =
+ let [ns, lat, _, oe, lng] = T.words t
+ a = T.concat [ if ns == "S" then "-" else "", T.dropEnd 1 lat ]
+ b = T.concat [ if oe == "O" then "-" else "", T.dropEnd 1 lng ]
+ in (a, b)
+
+textInside :: Text -> Text -> [Tag Text] -> Text
+textInside from to =
+ T.unwords
+ . map (T.strip)
+ . T.lines
+ . T.strip
+ . S.innerText
+ . takeWhile (~/= T.unpack to)
+ . dropWhile (~/= T.unpack from)
+
+nextTagText :: [Tag Text] -> Text
+nextTagText = T.strip . S.innerText . take 1 . drop 1
--
cgit v1.2.3