Commit aa1b7e0f authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[conduit] fixes to conduit get so that num of results is returned

parent a51cb46f
...@@ -4,7 +4,7 @@ cabal-version: 1.12 ...@@ -4,7 +4,7 @@ cabal-version: 1.12
-- --
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
-- --
-- hash: 509e772465870fbf771b5c61338c6ae265f9eb1cd69ff040abb0b8421acb20f2 -- hash: bed5d2249aa329640cc507ae0eacab8bc26361c6b612d02152a2a0a58f57cdf0
name: crawlerHAL name: crawlerHAL
version: 0.1.0.0 version: 0.1.0.0
...@@ -25,6 +25,7 @@ source-repository head ...@@ -25,6 +25,7 @@ source-repository head
library library
exposed-modules: exposed-modules:
ConduitTest
HAL HAL
HAL.Client HAL.Client
HAL.Doc HAL.Doc
......
...@@ -26,23 +26,25 @@ getMetadataWith q start rows = do ...@@ -26,23 +26,25 @@ getMetadataWith q start rows = do
manager' <- newManager tlsManagerSettings manager' <- newManager tlsManagerSettings
runHalAPIClient $ search (Just requestedFields) [q] Nothing start rows runHalAPIClient $ search (Just requestedFields) [q] Nothing start rows
getMetadataRecursively :: Text -> Maybe Int -> Maybe Integer -> IO (Either ClientError (ConduitT () Corpus IO ())) getMetadataRecursively :: Text -> Maybe Int -> Maybe Integer -> IO (Either ClientError (Maybe Integer, ConduitT () Corpus IO ()))
getMetadataRecursively q start rows = do getMetadataRecursively q start rows = do
manager' <- newManager tlsManagerSettings manager' <- newManager tlsManagerSettings
-- First, estimate the total number of documents -- First, estimate the total number of documents
eRes <- runHalAPIClient $ search (Just requestedFields) [q] Nothing (Just 0) (Just 1) eRes <- runHalAPIClient $ search (Just requestedFields) [q] Nothing (Just 0) (Just 1)
pure $ get' q start rows <$> eRes pure $ get' q start rows <$> eRes
where where
get' :: Text -> Maybe Int -> Maybe Integer -> Response Corpus -> ConduitT () Corpus IO () get' :: Text -> Maybe Int -> Maybe Integer -> Response Corpus -> (Maybe Integer, ConduitT () Corpus IO ())
get' q start rows (Response { _numFound }) = do get' q start rows (Response { _numFound }) =
let start' = fromMaybe 0 start ( Just numResults
let rows' = fromMaybe _numFound rows , yieldMany [0..]
let numResults = rows' - (fromIntegral start')
let numPages = numResults `div` (fromIntegral batchSize) + 1
yieldMany [0..]
.| takeC (fromInteger numPages) .| takeC (fromInteger numPages)
.| concatMapMC (getPage q start') .| concatMapMC (getPage q start'))
-- .| mapMC printDoc where
start' = fromMaybe 0 start
rows' = fromMaybe _numFound rows
numResults = rows' - (fromIntegral start')
numPages = numResults `div` (fromIntegral batchSize) + 1
getPage :: Text -> Int -> Int -> IO [Corpus] getPage :: Text -> Int -> Int -> IO [Corpus]
getPage q start pageNum = do getPage q start pageNum = do
let offset = start + pageNum * batchSize let offset = start + pageNum * batchSize
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment