From 5cedcecd6ae31e2485dcab2ddd74c74a4779545d Mon Sep 17 00:00:00 2001 From: Joris Date: Tue, 3 Sep 2019 21:01:53 +0200 Subject: Make LBC to work Use request headers to simulate a normal browser --- src/lib/haskell/Utils/HTTP.hs | 52 ++++++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 13 deletions(-) (limited to 'src/lib/haskell/Utils/HTTP.hs') diff --git a/src/lib/haskell/Utils/HTTP.hs b/src/lib/haskell/Utils/HTTP.hs index 87635ce..9bcf5f0 100644 --- a/src/lib/haskell/Utils/HTTP.hs +++ b/src/lib/haskell/Utils/HTTP.hs @@ -2,21 +2,47 @@ module Utils.HTTP ( get ) where -import Control.Exception (SomeException, try) -import Data.ByteString (ByteString) -import qualified Data.ByteString.Lazy as BS -import Data.Text (Text) -import qualified Data.Text as T -import Network.HTTP.Conduit +import qualified Data.ByteString.Lazy as BS +import Data.Text (Text) +import qualified Data.Text as T +import Data.Text.Encoding as T +import Data.Text.IO as T +import Network.HTTP.Conduit (Manager) +import qualified Network.HTTP.Conduit as H +import qualified Network.HTTP.Simple as HS +import qualified Network.HTTP.Types.Status as Status import Model.URL -get :: (ByteString -> Text) -> URL -> IO (Either Text Text) -get decode url = mapLeft (T.pack . show) <$> (try (unsafeGetPage decode url) :: IO (Either SomeException Text)) +get :: Manager -> URL -> IO (Either Text Text) +get manager url = do + request <- H.parseRequest (T.unpack url) -unsafeGetPage :: (ByteString -> Text) -> URL -> IO Text -unsafeGetPage decode url = (decode . BS.toStrict) <$> simpleHttp (T.unpack url) + response <- H.httpLbs (HS.setRequestHeaders requestHeaders request) manager + let body = T.decodeUtf8 . BS.toStrict . H.responseBody $ response + let statusCode = Status.statusCode . H.responseStatus $ response -mapLeft :: (a -> c) -> Either a b -> Either c b -mapLeft f (Left l) = Left (f l) -mapLeft _ (Right r) = (Right r) + if statusCode >= 200 && statusCode < 300 then + return . Right $ body + else do + T.putStrLn . T.concat $ + [ "Got status " + , T.pack . show $ statusCode + , " while fetching " + , url + , ":\n" + , body + ] + return . Left $ body + + where + requestHeaders = + [ ("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0") + , ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") + , ("Accept-Language", "en-US,en;fr;q=0.5") + , ("Accept-Encoding", "gzip, deflate, br") + , ("Referer", "https://duckduckgo.com/") + , ("DNT", "1") + , ("Connection", "keep-alive") + , ("Upgrade-Insecure-Requests", "1") + ] -- cgit v1.2.3