aboutsummaryrefslogtreecommitdiff
path: root/src/Scrapper.hs
diff options
context:
space:
mode:
authorJoris2019-03-10 19:15:45 +0100
committerJoris2019-03-10 19:15:45 +0100
commit8e03a571033a5d02d2287a2b1d1edd8b57aa2462 (patch)
tree03d6965bb65322dbac03edf2d6a30ee856c197f6 /src/Scrapper.hs
parentc41f16ed474376ad8a61a75d8b7f9ef543f359b4 (diff)
Scrap result page and make a CSV of it
Diffstat (limited to 'src/Scrapper.hs')
-rw-r--r--src/Scrapper.hs75
1 files changed, 75 insertions, 0 deletions
diff --git a/src/Scrapper.hs b/src/Scrapper.hs
new file mode 100644
index 0000000..456a455
--- /dev/null
+++ b/src/Scrapper.hs
@@ -0,0 +1,75 @@
+module Scrapper (hikingsUrls, hiking, Hiking(..)) where
+
+import qualified Data.List as List
+import qualified Data.Maybe as Maybe
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.HTML.TagSoup (Tag (TagOpen, TagText), (~/=), (~==))
+import qualified Text.HTML.TagSoup as S
+
+import Hiking (Hiking (..))
+
+hikingsUrls :: Text -> [Text]
+hikingsUrls =
+ Maybe.catMaybes
+ . map sectionUrl
+ . S.partitions (~== ("<div class=rando-title-sansDetail>" :: String))
+ . S.parseTags
+ where
+ sectionUrl =
+ fmap (T.strip . S.fromAttrib "href")
+ . Maybe.listToMaybe
+ . dropWhile (~/= ("<a>" :: String))
+
+hiking :: Text -> Hiking
+hiking page =
+ Hiking name description lat lng url duration distance elevation difficulty
+ where
+ tags = S.parseTags page
+
+ name = textInside "<h1 itemprop=name>" "</h1" tags
+
+ description = textInside "<p>" "</p>" . dropWhile (~/= ("<h1 itemprop=name>" :: String)) $ tags
+
+ (lat, lng) = latAndLng (info "Départ :")
+
+ url = S.fromAttrib "href" . head . dropWhile (~/= ("<link>" :: String)) $ tags
+
+ duration = info "Durée moyenne:"
+
+ distance = info "Distance :"
+
+ elevation = T.concat [ "+", info "Dénivelé positif :", " -", info "Dénivelé négatif :" ]
+
+ difficulty =
+ case info "Difficulté :" of
+ "Facile" -> "green"
+ "Moyenne" -> "blue"
+ "Difficile" -> "red"
+ _ -> "gray"
+
+ info title = nextTagText . drop 1 . dropWhile (not . tagTextContains title) $ tags
+
+ tagTextContains title (TagText t) = title == T.strip t
+ tagTextContains _ _ = False
+
+
+latAndLng :: Text -> (Text, Text)
+latAndLng t =
+ let [ns, lat, _, oe, lng] = T.words t
+ a = T.concat [ if ns == "S" then "-" else "", T.dropEnd 1 lat ]
+ b = T.concat [ if oe == "O" then "-" else "", T.dropEnd 1 lng ]
+ in (a, b)
+
+textInside :: Text -> Text -> [Tag Text] -> Text
+textInside from to =
+ T.unwords
+ . map (T.strip)
+ . T.lines
+ . T.strip
+ . S.innerText
+ . takeWhile (~/= T.unpack to)
+ . dropWhile (~/= T.unpack from)
+
+nextTagText :: [Tag Text] -> Text
+nextTagText = T.strip . S.innerText . take 1 . drop 1