Commit e6836bb7 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

add test case, implement docs count query

parent 3bf77f28
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
module Main where module Main where
import NeatInterpolation (text)
import Network.HTTP.Client (newManager) import Network.HTTP.Client (newManager)
import Network.HTTP.Client.TLS (tlsManagerSettings) import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant.Client import Servant.Client
...@@ -13,6 +15,32 @@ import HAL ...@@ -13,6 +15,32 @@ import HAL
import Tree import Tree
import qualified Data.Text as T import qualified Data.Text as T
yearReq = [text|
(language_t:en)
AND (producedDateY_i:2015
OR producedDateY_i:2016
OR producedDateY_i:2017
OR producedDateY_i:2018
OR producedDateY_i:2021
OR producedDateY_i:2022)
AND (structId_i:302102
OR structId_i:469216
OR structId_i:6279
OR structId_i:224096
OR structId_i:144103
OR structId_i:497330
OR structId_i:1076752
OR structId_i:84538
OR structId_i:301262
OR structId_i:481355
OR structId_i:29212
OR structId_i:301442
OR structId_i:542824
OR structId_i:300362
OR structId_i:1048346
OR structId_i:352124)|]
imt :: [T.Text] imt :: [T.Text]
imt = [ imt = [
"224096" "224096"
......
...@@ -4,7 +4,7 @@ cabal-version: 1.12 ...@@ -4,7 +4,7 @@ cabal-version: 1.12
-- --
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
-- --
-- hash: 509e772465870fbf771b5c61338c6ae265f9eb1cd69ff040abb0b8421acb20f2 -- hash: 69a4bea2da13bc040b9bc7cb24f4f1ec8fcdfbd22873422aba7b7bb962810628
name: crawlerHAL name: crawlerHAL
version: 0.1.0.0 version: 0.1.0.0
...@@ -25,6 +25,7 @@ source-repository head ...@@ -25,6 +25,7 @@ source-repository head
library library
exposed-modules: exposed-modules:
ConduitTest
HAL HAL
HAL.Client HAL.Client
HAL.Doc HAL.Doc
...@@ -53,6 +54,7 @@ library ...@@ -53,6 +54,7 @@ library
, http-client , http-client
, http-client-tls , http-client-tls
, lens , lens
, neat-interpolation
, scientific , scientific
, servant , servant
, servant-client , servant-client
...@@ -87,6 +89,7 @@ executable crawlerHAL-exe ...@@ -87,6 +89,7 @@ executable crawlerHAL-exe
, http-client , http-client
, http-client-tls , http-client-tls
, lens , lens
, neat-interpolation
, scientific , scientific
, servant , servant
, servant-client , servant-client
...@@ -122,6 +125,7 @@ test-suite halCrawler-test ...@@ -122,6 +125,7 @@ test-suite halCrawler-test
, http-client , http-client
, http-client-tls , http-client-tls
, lens , lens
, neat-interpolation
, scientific , scientific
, servant , servant
, servant-client , servant-client
......
...@@ -29,6 +29,7 @@ dependencies: ...@@ -29,6 +29,7 @@ dependencies:
- http-client - http-client
- http-client-tls - http-client-tls
- lens - lens
- neat-interpolation
- scientific - scientific
- servant - servant
- servant-client - servant-client
......
...@@ -57,6 +57,16 @@ getMetadataWithC q start rows = do ...@@ -57,6 +57,16 @@ getMetadataWithC q start rows = do
print $ show _corpus_title print $ show _corpus_title
pure c pure c
countResults :: Text -> IO (Either ClientError Integer)
countResults q = do
manager' <- newManager tlsManagerSettings
-- First, estimate the total number of documents
eRes <- runHalAPIClient $ search (Just requestedFields) [q] Nothing (Just 0) (Just 1)
pure $ count' <$> eRes
where
count' :: Response Corpus -> Integer
count' (Response { _numFound }) = _numFound
requestedFields :: Text requestedFields :: Text
requestedFields = "docid,title_s,abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s" requestedFields = "docid,title_s,abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment