From aafc45eb5eed3839a5210a7d48928d975df6a296 Mon Sep 17 00:00:00 2001 From: Joris Guyonvarch Date: Sat, 11 Apr 2015 20:38:08 +0200 Subject: Handle a configuration file to save the url --- src/Parser/Detail.hs | 3 ++- src/Parser/Resume.hs | 7 ++++--- src/Parser/Utils.hs | 9 +++++---- 3 files changed, 11 insertions(+), 8 deletions(-) (limited to 'src/Parser') diff --git a/src/Parser/Detail.hs b/src/Parser/Detail.hs index b787772..4144964 100644 --- a/src/Parser/Detail.hs +++ b/src/Parser/Detail.hs @@ -2,6 +2,7 @@ module Parser.Detail ( parseDetail ) where +import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup @@ -10,7 +11,7 @@ import Model.Detail import Parser.Utils -parseDetail :: T.Text -> Detail +parseDetail :: Text -> Detail parseDetail page = let tags = parseTags page descriptionTags = getTagsBetween "
" "
" tags diff --git a/src/Parser/Resume.hs b/src/Parser/Resume.hs index 6cd4415..76faca4 100644 --- a/src/Parser/Resume.hs +++ b/src/Parser/Resume.hs @@ -3,6 +3,7 @@ module Parser.Resume ) where import Data.Maybe (catMaybes) +import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup @@ -11,7 +12,7 @@ import Model.Resume import Parser.Utils -parseResumes :: T.Text -> [Resume] +parseResumes :: Text -> [Resume] parseResumes page = case sections (~== "
") (parseTags page) of [] -> @@ -20,9 +21,9 @@ parseResumes page = let lbcTags = takeWhile (~/= "
") sectionTags in catMaybes . fmap parseResume $ partitions (~== "") lbcTags -parseResume :: [Tag T.Text] -> Maybe Resume +parseResume :: [Tag Text] -> Maybe Resume parseResume item = do name <- getTagTextAfter "

" item let price = getTagTextAfter "
" item url <- getTagAttribute "" (T.pack "href") item - return Resume { name = name, price = price, url = T.unpack url } + return Resume { name = name, price = price, url = url } diff --git a/src/Parser/Utils.hs b/src/Parser/Utils.hs index 8527777..16fe3d2 100644 --- a/src/Parser/Utils.hs +++ b/src/Parser/Utils.hs @@ -6,27 +6,28 @@ module Parser.Utils import Data.List (find, findIndex) import Data.Maybe (listToMaybe) +import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup -getTagsBetween :: String -> String -> [Tag T.Text] -> [Tag T.Text] +getTagsBetween :: String -> String -> [Tag Text] -> [Tag Text] getTagsBetween beginSelector endSelector = takeWhile (~/= endSelector) . drop 1 . dropWhile (~/= beginSelector) -getTagAttribute :: String -> T.Text -> [Tag T.Text] -> Maybe T.Text +getTagAttribute :: String -> Text -> [Tag Text] -> Maybe Text getTagAttribute selector attribute tags = find (~== selector) tags >>= maybeTagAttribute attribute -getTagTextAfter :: String -> [Tag T.Text] -> Maybe T.Text +getTagTextAfter :: String -> [Tag Text] -> Maybe Text getTagTextAfter selector tags = case findIndex (~== selector) tags of Just index -> fmap T.strip $ safeGetAt (index + 1) tags >>= maybeTagText Nothing -> Nothing -maybeTagAttribute :: T.Text -> Tag T.Text -> Maybe T.Text +maybeTagAttribute :: Text -> Tag Text -> Maybe Text maybeTagAttribute name (TagOpen _ xs) = fmap snd . find (\(x, _) -> x == name) $ xs maybeTagAttribute attribute _ = Nothing -- cgit v1.2.3