Commit 63e6bb6c authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Fix parsing of docid

Turns out sometimes docid can be a string.
parent 36424845
......@@ -44,7 +44,7 @@ getMetadataWithC q start rows = do
rows' = min numFound $ fromMaybe numFound rows
numResults = rows' - (fromIntegral start')
numPages = numResults `div` (fromIntegral batchSize) + 1
getPage :: Text -> Int -> Int -> IO [Corpus]
getPage q start pageNum = do
let offset = start + pageNum * batchSize
......@@ -63,7 +63,7 @@ countResults q = do
-- First, estimate the total number of documents
eRes <- runHalAPIClient $ search (Just requestedFields) [q] Nothing (Just 0) (Just 1) :: IO (Either ClientError (Response Corpus))
pure $ _numFound <$> eRes
requestedFields :: Text
requestedFields = "docid,title_s,abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s"
......
......@@ -27,7 +27,7 @@ type Search doc = "search"
:> QueryParams "fq" Text
-- pretty much clear, (Asc || Desc) + field you want to sort by
:> QueryParam "sort" SortField
-- permit to start a the x result
-- permit to start at the x result
:> QueryParam "start" Int
-- use rows to make the request only return the x number of result
:> QueryParam "rows" Integer
......
......@@ -4,11 +4,13 @@ module HAL.Doc.Corpus where
import GHC.Generics
import Data.Aeson
import Data.Default
import Data.Text (pack, Text)
import Control.Applicative ((<|>))
import qualified Control.Lens as L
import Data.Aeson
import Data.Aeson.Types (Parser, explicitParseField)
import Data.Default
import Data.Text (pack, unpack, Text)
import Text.Read (readMaybe)
import Servant.API (ToHttpApiData(..))
......@@ -29,15 +31,23 @@ instance Default Corpus where
def = Corpus def def def def def def def def
instance FromJSON Corpus where
parseJSON (Object o) = Corpus <$>
(o .: "docid")
<*> (o .: "title_s" <|> return [])
<*> (o .: "abstract_s" <|> return [])
<*> (o .:? "submittedDate_s")
<*> (o .:? "source_s")
<*> (o .: "authFullName_s" <|> return [])
<*> (o .: "authOrganism_s" <|> return [])
<*> (o .: "structId_i" <|> return [])
parseJSON (Object o) =
Corpus
<$> (explicitParseField docidParser o "docid")
<*> (o .: "title_s" <|> return [])
<*> (o .: "abstract_s" <|> return [])
<*> (o .:? "submittedDate_s")
<*> (o .:? "source_s")
<*> (o .: "authFullName_s" <|> return [])
<*> (o .: "authOrganism_s" <|> return [])
<*> (o .: "structId_i" <|> return [])
docidParser :: Value -> Parser Int
docidParser n@(Number _) = parseJSON n
docidParser (String i) = case (readMaybe $ unpack i :: Maybe Int) of
Nothing -> fail $ "cannot parse int for docid"
Just i -> pure i
docidParser v = fail $ "cannot parse docid: " <> show v
instance ToHttpApiData Corpus where
toUrlPiece _ = "docid,title_s,abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s,structId_i"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment