1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
module Scrapper (hikingsUrls, hiking, Hiking(..)) where
import qualified Data.List as List
import qualified Data.Maybe as Maybe
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup (Tag (TagOpen, TagText), (~/=), (~==))
import qualified Text.HTML.TagSoup as S
import Hiking (Hiking (..))
hikingsUrls :: Text -> [Text]
hikingsUrls =
Maybe.catMaybes
. map sectionUrl
. S.partitions (~== ("<div class=rando-title-sansDetail>" :: String))
. S.parseTags
where
sectionUrl =
fmap (T.strip . S.fromAttrib "href")
. Maybe.listToMaybe
. dropWhile (~/= ("<a>" :: String))
hiking :: Text -> Hiking
hiking page =
Hiking name description lat lng url duration distance elevation difficulty
where
tags = S.parseTags page
name = textInside "<h1 itemprop=name>" "</h1" tags
description = textInside "<p>" "</p>" . dropWhile (~/= ("<h1 itemprop=name>" :: String)) $ tags
(lat, lng) = latAndLng (info "Départ :")
url = S.fromAttrib "href" . head . dropWhile (~/= ("<link>" :: String)) $ tags
duration = info "Durée moyenne:"
distance = info "Distance :"
elevation = T.concat [ "+", info "Dénivelé positif :", " -", info "Dénivelé négatif :" ]
difficulty =
case info "Difficulté :" of
"Facile" -> "green"
"Moyenne" -> "blue"
"Difficile" -> "red"
_ -> "gray"
info title = nextTagText . drop 1 . dropWhile (not . tagTextContains title) $ tags
tagTextContains title (TagText t) = title == T.strip t
tagTextContains _ _ = False
latAndLng :: Text -> (Text, Text)
latAndLng t =
let [ns, lat, _, oe, lng] = T.words t
a = T.concat [ if ns == "S" then "-" else "", T.dropEnd 1 lat ]
b = T.concat [ if oe == "O" then "-" else "", T.dropEnd 1 lng ]
in (a, b)
textInside :: Text -> Text -> [Tag Text] -> Text
textInside from to =
T.unwords
. map (T.strip)
. T.lines
. T.strip
. S.innerText
. takeWhile (~/= T.unpack to)
. dropWhile (~/= T.unpack from)
nextTagText :: [Tag Text] -> Text
nextTagText = T.strip . S.innerText . take 1 . drop 1
|