Commit 36424845 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Fix incorrect count report

For small counts, this reported wrong number of documents (batch size
instead of the small count).
parent 9a434702
...@@ -30,18 +30,18 @@ getMetadataWithC :: Text -> Maybe Int -> Maybe Integer -> IO (Either ClientError ...@@ -30,18 +30,18 @@ getMetadataWithC :: Text -> Maybe Int -> Maybe Integer -> IO (Either ClientError
getMetadataWithC q start rows = do getMetadataWithC q start rows = do
manager' <- newManager tlsManagerSettings manager' <- newManager tlsManagerSettings
-- First, estimate the total number of documents -- First, estimate the total number of documents
eRes <- runHalAPIClient $ search (Just requestedFields) [q] Nothing (Just 0) (Just 1) eCount <- countResults q
pure $ get' q start rows <$> eRes pure $ get' q start rows <$> eCount
where where
get' :: Text -> Maybe Int -> Maybe Integer -> Response Corpus -> (Maybe Integer, ConduitT () Corpus IO ()) get' :: Text -> Maybe Int -> Maybe Integer -> Integer -> (Maybe Integer, ConduitT () Corpus IO ())
get' q start rows (Response { _numFound }) = get' q start rows numFound =
( Just numResults ( Just numResults
, yieldMany [0..] , yieldMany [0..]
.| takeC (fromInteger numPages) .| takeC (fromInteger numPages)
.| concatMapMC (getPage q start')) .| concatMapMC (getPage q start'))
where where
start' = fromMaybe 0 start start' = fromMaybe 0 start
rows' = fromMaybe _numFound rows rows' = min numFound $ fromMaybe numFound rows
numResults = rows' - (fromIntegral start') numResults = rows' - (fromIntegral start')
numPages = numResults `div` (fromIntegral batchSize) + 1 numPages = numResults `div` (fromIntegral batchSize) + 1
...@@ -61,11 +61,8 @@ countResults :: Text -> IO (Either ClientError Integer) ...@@ -61,11 +61,8 @@ countResults :: Text -> IO (Either ClientError Integer)
countResults q = do countResults q = do
manager' <- newManager tlsManagerSettings manager' <- newManager tlsManagerSettings
-- First, estimate the total number of documents -- First, estimate the total number of documents
eRes <- runHalAPIClient $ search (Just requestedFields) [q] Nothing (Just 0) (Just 1) eRes <- runHalAPIClient $ search (Just requestedFields) [q] Nothing (Just 0) (Just 1) :: IO (Either ClientError (Response Corpus))
pure $ count' <$> eRes pure $ _numFound <$> eRes
where
count' :: Response Corpus -> Integer
count' (Response { _numFound }) = _numFound
requestedFields :: Text requestedFields :: Text
requestedFields = "docid,title_s,abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s" requestedFields = "docid,title_s,abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment