Commit 63e6bb6c authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Fix parsing of docid

Turns out sometimes docid can be a string.
parent 36424845
...@@ -44,7 +44,7 @@ getMetadataWithC q start rows = do ...@@ -44,7 +44,7 @@ getMetadataWithC q start rows = do
rows' = min numFound $ fromMaybe numFound rows rows' = min numFound $ fromMaybe numFound rows
numResults = rows' - (fromIntegral start') numResults = rows' - (fromIntegral start')
numPages = numResults `div` (fromIntegral batchSize) + 1 numPages = numResults `div` (fromIntegral batchSize) + 1
getPage :: Text -> Int -> Int -> IO [Corpus] getPage :: Text -> Int -> Int -> IO [Corpus]
getPage q start pageNum = do getPage q start pageNum = do
let offset = start + pageNum * batchSize let offset = start + pageNum * batchSize
...@@ -63,7 +63,7 @@ countResults q = do ...@@ -63,7 +63,7 @@ countResults q = do
-- First, estimate the total number of documents -- First, estimate the total number of documents
eRes <- runHalAPIClient $ search (Just requestedFields) [q] Nothing (Just 0) (Just 1) :: IO (Either ClientError (Response Corpus)) eRes <- runHalAPIClient $ search (Just requestedFields) [q] Nothing (Just 0) (Just 1) :: IO (Either ClientError (Response Corpus))
pure $ _numFound <$> eRes pure $ _numFound <$> eRes
requestedFields :: Text requestedFields :: Text
requestedFields = "docid,title_s,abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s" requestedFields = "docid,title_s,abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s"
......
...@@ -27,7 +27,7 @@ type Search doc = "search" ...@@ -27,7 +27,7 @@ type Search doc = "search"
:> QueryParams "fq" Text :> QueryParams "fq" Text
-- pretty much clear, (Asc || Desc) + field you want to sort by -- pretty much clear, (Asc || Desc) + field you want to sort by
:> QueryParam "sort" SortField :> QueryParam "sort" SortField
-- permit to start a the x result -- permit to start at the x result
:> QueryParam "start" Int :> QueryParam "start" Int
-- use rows to make the request only return the x number of result -- use rows to make the request only return the x number of result
:> QueryParam "rows" Integer :> QueryParam "rows" Integer
......
...@@ -4,11 +4,13 @@ module HAL.Doc.Corpus where ...@@ -4,11 +4,13 @@ module HAL.Doc.Corpus where
import GHC.Generics import GHC.Generics
import Data.Aeson
import Data.Default
import Data.Text (pack, Text)
import Control.Applicative ((<|>)) import Control.Applicative ((<|>))
import qualified Control.Lens as L import qualified Control.Lens as L
import Data.Aeson
import Data.Aeson.Types (Parser, explicitParseField)
import Data.Default
import Data.Text (pack, unpack, Text)
import Text.Read (readMaybe)
import Servant.API (ToHttpApiData(..)) import Servant.API (ToHttpApiData(..))
...@@ -29,15 +31,23 @@ instance Default Corpus where ...@@ -29,15 +31,23 @@ instance Default Corpus where
def = Corpus def def def def def def def def def = Corpus def def def def def def def def
instance FromJSON Corpus where instance FromJSON Corpus where
parseJSON (Object o) = Corpus <$> parseJSON (Object o) =
(o .: "docid") Corpus
<*> (o .: "title_s" <|> return []) <$> (explicitParseField docidParser o "docid")
<*> (o .: "abstract_s" <|> return []) <*> (o .: "title_s" <|> return [])
<*> (o .:? "submittedDate_s") <*> (o .: "abstract_s" <|> return [])
<*> (o .:? "source_s") <*> (o .:? "submittedDate_s")
<*> (o .: "authFullName_s" <|> return []) <*> (o .:? "source_s")
<*> (o .: "authOrganism_s" <|> return []) <*> (o .: "authFullName_s" <|> return [])
<*> (o .: "structId_i" <|> return []) <*> (o .: "authOrganism_s" <|> return [])
<*> (o .: "structId_i" <|> return [])
docidParser :: Value -> Parser Int
docidParser n@(Number _) = parseJSON n
docidParser (String i) = case (readMaybe $ unpack i :: Maybe Int) of
Nothing -> fail $ "cannot parse int for docid"
Just i -> pure i
docidParser v = fail $ "cannot parse docid: " <> show v
instance ToHttpApiData Corpus where instance ToHttpApiData Corpus where
toUrlPiece _ = "docid,title_s,abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s,structId_i" toUrlPiece _ = "docid,title_s,abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s,structId_i"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment