Commit ac1bec26 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Bring back docid parser, better app cmdline

parent 397fa34f
......@@ -3,6 +3,7 @@
module Main where
import Conduit
import NeatInterpolation (text)
import Network.HTTP.Client (newManager)
import Network.HTTP.Client.TLS (tlsManagerSettings)
......@@ -12,6 +13,7 @@ import Servant.Client
import HAL (getMetadataWith)
import HAL.Client
import HAL.Doc
import HAL.Doc.Corpus (Corpus(..))
import HAL
import Tree
import qualified Data.Text as T
......@@ -20,16 +22,27 @@ import qualified Data.Text as T
data CountParams = CountParams
{ cp_query :: T.Text }
data Command = Count CountParams
data FetchParams = FetchParams
{ fp_query :: T.Text }
data Command =
Count CountParams
| Fetch FetchParams
countParams :: Parser Command
countParams = Count <$>
(CountParams
<$> strArgument (metavar "query"))
fetchParams :: Parser Command
fetchParams = Fetch <$>
(FetchParams
<$> strArgument (metavar "query"))
params :: Parser Command
params = subparser
(command "count" (info countParams (progDesc "Count number of docs for a given query")))
(command "count" (info countParams (progDesc "Count number of docs for a given query"))
<> command "fetch" (info fetchParams (progDesc "Fetch docs for a given query")))
opts :: ParserInfo Command
opts = info (params <**> helper)
......@@ -50,9 +63,19 @@ run (Count (CountParams { cp_query })) = do
case res of
Left err -> print err
Right (cnt, _docsC) -> print $ show cnt
run (Fetch (FetchParams { fp_query })) = do
res <- getMetadataWithC (fp_query) (Just 0) Nothing
case res of
Left err -> print err
Right (_cnt, docsC) -> do
_ <- runConduit $
docsC
.| mapM_C (\(Corpus { _corpus_docid }) -> print $ "docid: " <> show _corpus_docid)
.| sinkList
pure ()
-- data
yearReq = [text|
(language_t:en)
AND (producedDateY_i:2018
......
......@@ -48,6 +48,7 @@ getMetadataWithC q start rows = do
getPage :: Text -> Int -> Int -> IO [Corpus]
getPage q start pageNum = do
let offset = start + pageNum * batchSize
putStrLn $ "[getMetadataWithC] getPage: " <> show offset
eRes <- runHalAPIClient $ search (Just requestedFields) [q] Nothing (Just offset) (Just $ fromIntegral batchSize)
pure $ case eRes of
Left _ -> []
......
......@@ -7,7 +7,7 @@ import GHC.Generics
import Control.Applicative ((<|>))
import qualified Control.Lens as L
import Data.Aeson
import Data.Aeson.Types (Parser, explicitParseField)
import Data.Aeson.Types (Parser)
import Data.Default
import Data.Text (pack, unpack, Text)
import Text.Read (readMaybe)
......@@ -33,7 +33,7 @@ instance Default Corpus where
instance FromJSON Corpus where
parseJSON (Object o) =
Corpus
<$> (explicitParseField docidParser o "docid")
<$> o .: "docid"
<*> (o .: "title_s" <|> return [])
<*> (o .: "abstract_s" <|> return [])
<*> (o .:? "submittedDate_s")
......@@ -42,12 +42,5 @@ instance FromJSON Corpus where
<*> (o .: "authOrganism_s" <|> return [])
<*> (o .: "structId_i" <|> return [])
docidParser :: Value -> Parser Int
docidParser n@(Number _) = parseJSON n
docidParser (String i) = case (readMaybe $ unpack i :: Maybe Int) of
Nothing -> fail $ "cannot parse int for docid"
Just i -> pure i
docidParser v = fail $ "cannot parse docid: " <> show v
instance ToHttpApiData Corpus where
toUrlPiece _ = "docid,title_s,abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s,structId_i"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment