Commit 06eedee4 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Experiment with request logging

parent fe4e032c
cabal-version: 1.12
-- This file has been generated from package.yaml by hpack version 0.35.0.
-- This file has been generated from package.yaml by hpack version 0.35.2.
--
-- see: https://github.com/sol/hpack
--
-- hash: d5f6b9788433a3873efd55684dfd55f4d85380f24209a6a8eb8ba80c82a330f8
-- hash: 8e1f14ffe78d88b3a3011e489d4b883c7c81039249d2334d78d7dbcc155816f6
name: crawlerHAL
version: 0.1.0.0
......@@ -53,6 +53,7 @@ library
, http-client
, http-client-tls
, lens
, mtl
, neat-interpolation
, optparse-applicative
, scientific
......@@ -89,6 +90,7 @@ executable crawlerHAL-exe
, http-client
, http-client-tls
, lens
, mtl
, neat-interpolation
, optparse-applicative
, scientific
......@@ -126,6 +128,7 @@ test-suite halCrawler-test
, http-client
, http-client-tls
, lens
, mtl
, neat-interpolation
, optparse-applicative
, scientific
......
......@@ -30,6 +30,7 @@ dependencies:
- http-client-tls
- lens
- neat-interpolation
- mtl
- optparse-applicative
- scientific
- servant
......
......@@ -4,24 +4,21 @@ module HAL where
import Conduit
import Control.Monad
import Control.Monad.Reader
import Data.Aeson
import Data.Default (def)
import Data.Maybe (fromMaybe)
import Data.Either (fromRight)
import Data.Maybe (fromMaybe)
import Data.Text
import Network.HTTP.Client (newManager)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant.Client (BaseUrl(..), Scheme(..), ClientM, ClientError, runClientM, mkClientEnv)
import HAL.Client
import HAL.Doc.Corpus
import HAL.Doc.Struct
import Network.HTTP.Client (newManager, Request)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Prelude
import Servant.API
import Data.Aeson
import Servant.Client (BaseUrl(..), Scheme(..), ClientM, ClientError, runClientM, mkClientEnv, ClientEnv (makeClientRequest))
import System.IO.Unsafe (unsafePerformIO)
data HalCrawlerOptions
= HalCrawlerOptions
......@@ -64,23 +61,22 @@ getMetadataWithOptionsC opts@HalCrawlerOptions{..} q mb_offset mb_limit = do
numResults = limit - offset
numPages = numResults `div` _hco_batchSize + 1
debugLog :: String -> IO ()
debugLog msg = when _hco_debugLogs $ putStrLn msg
getPage :: Text -> Int -> Int -> IO [Corpus]
getPage q start pageNum = do
let offset = start + pageNum * _hco_batchSize
debugLog $ "[getMetadataWithC] getPage: " <> show offset
eRes <- runHalAPIClient $ search (Just requestedFields) [q] Nothing (Just offset) (Just _hco_batchSize)
debugLog opts $ "[getMetadataWithC] getPage: " <> show offset
eRes <- runHalAPIClient opts $ search (Just requestedFields) [q] Nothing (Just offset) (Just _hco_batchSize)
pure $ case eRes of
Left _ -> []
Right (Response { _docs }) -> _docs
printDoc :: Corpus -> IO Corpus
printDoc c@(Corpus { _corpus_docid, _corpus_title }) = do
debugLog $ show _corpus_title
debugLog opts $ show _corpus_title
pure c
debugLog :: HalCrawlerOptions -> String -> IO ()
debugLog HalCrawlerOptions{..} msg = when _hco_debugLogs $ putStrLn msg
getMetadataWith :: Text
-- ^ The query, as a text.
......@@ -91,7 +87,7 @@ getMetadataWith :: Text
-> IO (Either ClientError (Response Corpus))
getMetadataWith q start rows = do
manager' <- newManager tlsManagerSettings
runHalAPIClient $ search (Just requestedFields) [q] Nothing start rows
runHalAPIClient defaultHalOptions $ search (Just requestedFields) [q] Nothing start rows
getMetadataWithC :: Text
-- ^ The textual query
......@@ -107,7 +103,7 @@ countResults :: Text -> IO (Either ClientError Int)
countResults q = do
manager' <- newManager tlsManagerSettings
-- First, estimate the total number of documents
eRes <- runHalAPIClient $ search (Just requestedFields) [q] Nothing (Just 0) (Just 1) :: IO (Either ClientError (Response Corpus))
eRes <- runHalAPIClient defaultHalOptions $ search (Just requestedFields) [q] Nothing (Just 0) (Just 1) :: IO (Either ClientError (Response Corpus))
pure $ _numFound <$> eRes
requestedFields :: Text
......@@ -116,19 +112,31 @@ requestedFields = "docid,title_s,abstract_s,submittedDate_s,source_s,authFullNam
structFields :: Text
structFields = "docid,label_s,parentDocid_i"
runHalAPIClient :: (FromJSON doc, ToHttpApiData doc) =>
ClientM (Response doc) -> IO (Either ClientError (Response doc))
runHalAPIClient cmd = do
requestLog :: HalCrawlerOptions -> Request -> Request
requestLog opts rq = unsafePerformIO $ do
debugLog opts $ "[HAL.makeClientRequestLog] " <> show rq
pure rq
{-# NOINLINE requestLog #-}
runHalAPIClient :: (FromJSON doc, ToHttpApiData doc)
=> HalCrawlerOptions
-> ClientM (Response doc)
-> IO (Either ClientError (Response doc))
runHalAPIClient opts cmd = do
manager' <- newManager tlsManagerSettings
runClientM cmd (mkClientEnv manager' $ BaseUrl Https "api.archives-ouvertes.fr" 443 "")
runClientM cmd' (mkClientEnv manager' $ BaseUrl Https "api.archives-ouvertes.fr" 443 "")
where
cmd' = local (\r -> r {
makeClientRequest = \bUrl servantRq -> requestLog opts ((makeClientRequest r) bUrl servantRq)
}) cmd
runStructureRequest :: Maybe Text -> IO (Either ClientError (Response Struct))
runStructureRequest rq =
runHalAPIClient $ structure (Just structFields) rq (Just 10000)
runHalAPIClient defaultHalOptions $ structure (Just structFields) rq (Just 10000)
runSearchRequest :: [Text] -> IO (Either ClientError (Response Corpus))
runSearchRequest rq =
runHalAPIClient $ search (Just requestedFields) rq Nothing Nothing Nothing
runHalAPIClient defaultHalOptions $ search (Just requestedFields) rq Nothing Nothing Nothing
generateRequestByStructID :: Text -> [Text] -> Text
generateRequestByStructID rq struct_ids =
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment