aboutsummaryrefslogtreecommitdiff
path: root/src/Scrapper.hs
blob: 456a455396eb4a63fb2ae884a8535d316933796c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
module Scrapper (hikingsUrls, hiking, Hiking(..)) where

import qualified Data.List         as List
import qualified Data.Maybe        as Maybe
import           Data.Text         (Text)
import qualified Data.Text         as T
import           Text.HTML.TagSoup (Tag (TagOpen, TagText), (~/=), (~==))
import qualified Text.HTML.TagSoup as S

import           Hiking            (Hiking (..))

hikingsUrls :: Text -> [Text]
hikingsUrls =
  Maybe.catMaybes
  . map sectionUrl
  . S.partitions (~== ("<div class=rando-title-sansDetail>" :: String))
  . S.parseTags
  where
    sectionUrl =
      fmap (T.strip . S.fromAttrib "href")
      . Maybe.listToMaybe
      . dropWhile (~/= ("<a>" :: String))

hiking :: Text -> Hiking
hiking page =
  Hiking name description lat lng url duration distance elevation difficulty
  where
    tags = S.parseTags page

    name = textInside "<h1 itemprop=name>" "</h1" tags

    description = textInside "<p>" "</p>" . dropWhile (~/= ("<h1 itemprop=name>" :: String)) $ tags

    (lat, lng) = latAndLng (info "Départ :")

    url = S.fromAttrib "href" . head . dropWhile (~/= ("<link>" :: String)) $ tags

    duration = info "Durée moyenne:"

    distance = info "Distance :"

    elevation = T.concat [ "+", info "Dénivelé positif :", " -", info "Dénivelé négatif :" ]

    difficulty =
      case info "Difficulté :" of
        "Facile"    -> "green"
        "Moyenne"   -> "blue"
        "Difficile" -> "red"
        _           -> "gray"

    info title = nextTagText . drop 1 . dropWhile (not . tagTextContains title) $ tags

    tagTextContains title (TagText t) = title == T.strip t
    tagTextContains _ _               = False


latAndLng :: Text -> (Text, Text)
latAndLng t =
  let [ns, lat, _, oe, lng] = T.words t
      a = T.concat [ if ns == "S" then "-" else "", T.dropEnd 1 lat ]
      b = T.concat [ if oe == "O" then "-" else "", T.dropEnd 1 lng ]
  in  (a, b)

textInside :: Text -> Text -> [Tag Text] -> Text
textInside from to =
  T.unwords
  . map (T.strip)
  . T.lines
  . T.strip
  . S.innerText
  . takeWhile (~/= T.unpack to)
  . dropWhile (~/= T.unpack from)

nextTagText :: [Tag Text] -> Text
nextTagText = T.strip . S.innerText . take 1 . drop 1