Commit ac1bec26 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Bring back docid parser, better app cmdline

parent 397fa34f
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
module Main where module Main where
import Conduit
import NeatInterpolation (text) import NeatInterpolation (text)
import Network.HTTP.Client (newManager) import Network.HTTP.Client (newManager)
import Network.HTTP.Client.TLS (tlsManagerSettings) import Network.HTTP.Client.TLS (tlsManagerSettings)
...@@ -12,6 +13,7 @@ import Servant.Client ...@@ -12,6 +13,7 @@ import Servant.Client
import HAL (getMetadataWith) import HAL (getMetadataWith)
import HAL.Client import HAL.Client
import HAL.Doc import HAL.Doc
import HAL.Doc.Corpus (Corpus(..))
import HAL import HAL
import Tree import Tree
import qualified Data.Text as T import qualified Data.Text as T
...@@ -20,16 +22,27 @@ import qualified Data.Text as T ...@@ -20,16 +22,27 @@ import qualified Data.Text as T
data CountParams = CountParams data CountParams = CountParams
{ cp_query :: T.Text } { cp_query :: T.Text }
data Command = Count CountParams data FetchParams = FetchParams
{ fp_query :: T.Text }
data Command =
Count CountParams
| Fetch FetchParams
countParams :: Parser Command countParams :: Parser Command
countParams = Count <$> countParams = Count <$>
(CountParams (CountParams
<$> strArgument (metavar "query")) <$> strArgument (metavar "query"))
fetchParams :: Parser Command
fetchParams = Fetch <$>
(FetchParams
<$> strArgument (metavar "query"))
params :: Parser Command params :: Parser Command
params = subparser params = subparser
(command "count" (info countParams (progDesc "Count number of docs for a given query"))) (command "count" (info countParams (progDesc "Count number of docs for a given query"))
<> command "fetch" (info fetchParams (progDesc "Fetch docs for a given query")))
opts :: ParserInfo Command opts :: ParserInfo Command
opts = info (params <**> helper) opts = info (params <**> helper)
...@@ -50,9 +63,19 @@ run (Count (CountParams { cp_query })) = do ...@@ -50,9 +63,19 @@ run (Count (CountParams { cp_query })) = do
case res of case res of
Left err -> print err Left err -> print err
Right (cnt, _docsC) -> print $ show cnt Right (cnt, _docsC) -> print $ show cnt
run (Fetch (FetchParams { fp_query })) = do
res <- getMetadataWithC (fp_query) (Just 0) Nothing
case res of
Left err -> print err
Right (_cnt, docsC) -> do
_ <- runConduit $
docsC
.| mapM_C (\(Corpus { _corpus_docid }) -> print $ "docid: " <> show _corpus_docid)
.| sinkList
pure ()
-- data
yearReq = [text| yearReq = [text|
(language_t:en) (language_t:en)
AND (producedDateY_i:2018 AND (producedDateY_i:2018
......
...@@ -48,6 +48,7 @@ getMetadataWithC q start rows = do ...@@ -48,6 +48,7 @@ getMetadataWithC q start rows = do
getPage :: Text -> Int -> Int -> IO [Corpus] getPage :: Text -> Int -> Int -> IO [Corpus]
getPage q start pageNum = do getPage q start pageNum = do
let offset = start + pageNum * batchSize let offset = start + pageNum * batchSize
putStrLn $ "[getMetadataWithC] getPage: " <> show offset
eRes <- runHalAPIClient $ search (Just requestedFields) [q] Nothing (Just offset) (Just $ fromIntegral batchSize) eRes <- runHalAPIClient $ search (Just requestedFields) [q] Nothing (Just offset) (Just $ fromIntegral batchSize)
pure $ case eRes of pure $ case eRes of
Left _ -> [] Left _ -> []
......
...@@ -7,7 +7,7 @@ import GHC.Generics ...@@ -7,7 +7,7 @@ import GHC.Generics
import Control.Applicative ((<|>)) import Control.Applicative ((<|>))
import qualified Control.Lens as L import qualified Control.Lens as L
import Data.Aeson import Data.Aeson
import Data.Aeson.Types (Parser, explicitParseField) import Data.Aeson.Types (Parser)
import Data.Default import Data.Default
import Data.Text (pack, unpack, Text) import Data.Text (pack, unpack, Text)
import Text.Read (readMaybe) import Text.Read (readMaybe)
...@@ -33,7 +33,7 @@ instance Default Corpus where ...@@ -33,7 +33,7 @@ instance Default Corpus where
instance FromJSON Corpus where instance FromJSON Corpus where
parseJSON (Object o) = parseJSON (Object o) =
Corpus Corpus
<$> (explicitParseField docidParser o "docid") <$> o .: "docid"
<*> (o .: "title_s" <|> return []) <*> (o .: "title_s" <|> return [])
<*> (o .: "abstract_s" <|> return []) <*> (o .: "abstract_s" <|> return [])
<*> (o .:? "submittedDate_s") <*> (o .:? "submittedDate_s")
...@@ -42,12 +42,5 @@ instance FromJSON Corpus where ...@@ -42,12 +42,5 @@ instance FromJSON Corpus where
<*> (o .: "authOrganism_s" <|> return []) <*> (o .: "authOrganism_s" <|> return [])
<*> (o .: "structId_i" <|> return []) <*> (o .: "structId_i" <|> return [])
docidParser :: Value -> Parser Int
docidParser n@(Number _) = parseJSON n
docidParser (String i) = case (readMaybe $ unpack i :: Maybe Int) of
Nothing -> fail $ "cannot parse int for docid"
Just i -> pure i
docidParser v = fail $ "cannot parse docid: " <> show v
instance ToHttpApiData Corpus where instance ToHttpApiData Corpus where
toUrlPiece _ = "docid,title_s,abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s,structId_i" toUrlPiece _ = "docid,title_s,abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s,structId_i"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment