Commit aa1b7e0f authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[conduit] fixes to conduit get so that num of results is returned

parent a51cb46f
......@@ -4,7 +4,7 @@ cabal-version: 1.12
--
-- see: https://github.com/sol/hpack
--
-- hash: 509e772465870fbf771b5c61338c6ae265f9eb1cd69ff040abb0b8421acb20f2
-- hash: bed5d2249aa329640cc507ae0eacab8bc26361c6b612d02152a2a0a58f57cdf0
name: crawlerHAL
version: 0.1.0.0
......@@ -25,6 +25,7 @@ source-repository head
library
exposed-modules:
ConduitTest
HAL
HAL.Client
HAL.Doc
......
......@@ -26,23 +26,25 @@ getMetadataWith q start rows = do
manager' <- newManager tlsManagerSettings
runHalAPIClient $ search (Just requestedFields) [q] Nothing start rows
getMetadataRecursively :: Text -> Maybe Int -> Maybe Integer -> IO (Either ClientError (ConduitT () Corpus IO ()))
getMetadataRecursively :: Text -> Maybe Int -> Maybe Integer -> IO (Either ClientError (Maybe Integer, ConduitT () Corpus IO ()))
getMetadataRecursively q start rows = do
manager' <- newManager tlsManagerSettings
-- First, estimate the total number of documents
eRes <- runHalAPIClient $ search (Just requestedFields) [q] Nothing (Just 0) (Just 1)
pure $ get' q start rows <$> eRes
where
get' :: Text -> Maybe Int -> Maybe Integer -> Response Corpus -> ConduitT () Corpus IO ()
get' q start rows (Response { _numFound }) = do
let start' = fromMaybe 0 start
let rows' = fromMaybe _numFound rows
let numResults = rows' - (fromIntegral start')
let numPages = numResults `div` (fromIntegral batchSize) + 1
yieldMany [0..]
get' :: Text -> Maybe Int -> Maybe Integer -> Response Corpus -> (Maybe Integer, ConduitT () Corpus IO ())
get' q start rows (Response { _numFound }) =
( Just numResults
, yieldMany [0..]
.| takeC (fromInteger numPages)
.| concatMapMC (getPage q start')
-- .| mapMC printDoc
.| concatMapMC (getPage q start'))
where
start' = fromMaybe 0 start
rows' = fromMaybe _numFound rows
numResults = rows' - (fromIntegral start')
numPages = numResults `div` (fromIntegral batchSize) + 1
getPage :: Text -> Int -> Int -> IO [Corpus]
getPage q start pageNum = do
let offset = start + pageNum * batchSize
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment