Commit d7571b77 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Send PUBMED the raw query from the frontend

parent 78bc52e0
......@@ -19,14 +19,16 @@ module Gargantext.Core.Text.Corpus.API
) where
import Conduit
import Control.Monad.Except
import Data.Bifunctor
import Data.Either (Either(..))
import Data.Maybe
import qualified Data.Text as T
import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
import Gargantext.Core (Lang(..), toISO639)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude
import Servant.Client (ClientError)
import qualified Data.Text as T
import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
......@@ -35,7 +37,6 @@ import qualified Gargantext.Core.Text.Corpus.API.OpenAlex as OpenAlex
import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED
import qualified Gargantext.Core.Text.Corpus.Query as Corpus
import qualified PUBMED.Types as PUBMED
import Servant.Client (ClientError)
data GetCorpusError
= -- | We couldn't parse the user input query into something meaningful.
......@@ -53,18 +54,23 @@ get :: ExternalAPIs
-- -> IO [HyperdataDocument]
-> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get externalAPI la q mPubmedAPIKey limit = do
case Corpus.parseQuery q of
Left err -> pure $ Left $ InvalidInputQuery q (T.pack err)
Right corpusQuery -> case externalAPI of
OpenAlex -> first ExternalAPIError <$>
OpenAlex.get (fromMaybe "" Nothing {- email -}) q (toISO639 la) limit
PubMed -> first ExternalAPIError <$>
PUBMED.get (fromMaybe "" mPubmedAPIKey) corpusQuery limit
--docs <- PUBMED.get q default_limit -- EN only by default
--pure (Just $ fromIntegral $ length docs, yieldMany docs)
Arxiv -> Right <$> Arxiv.get la corpusQuery limit
HAL -> first ExternalAPIError <$> HAL.getC (toISO639 la) (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
IsTex -> do docs <- ISTEX.get la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
-- For PUBMED, HAL, IsTex, Isidore and OpenAlex, we want to send the query as-it.
-- For Arxiv we parse the query into a structured boolean query we submit over.
case externalAPI of
PubMed ->
first ExternalAPIError <$> PUBMED.get (fromMaybe "" mPubmedAPIKey) q limit
OpenAlex ->
first ExternalAPIError <$> OpenAlex.get (fromMaybe "" Nothing {- email -}) q (toISO639 la) limit
Arxiv -> runExceptT $ do
corpusQuery <- ExceptT (pure parse_query)
ExceptT $ fmap Right (Arxiv.get la corpusQuery limit)
HAL ->
first ExternalAPIError <$> HAL.getC (toISO639 la) (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
IsTex -> do
docs <- ISTEX.get la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
Isidore -> do docs <- ISIDORE.get la (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing
Isidore -> do
docs <- ISIDORE.get la (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
where
parse_query = first (InvalidInputQuery q . T.pack) $ Corpus.parseQuery q
......@@ -45,6 +45,15 @@ import PUBMED.Types (Config(..))
-- | A pubmed query.
-- See: https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
-- The documentation for PUBMED says:
-- Values for query keys may also be provided in term if they are preceeded by a
-- '#' (%23 in the URL). While only one query_key parameter can be provided to ESearch,
-- any number of query keys can be combined in term. Also, if query keys are provided in term,
-- they can be combined with OR or NOT in addition to AND.
-- Example:
-- esearch.fcgi?db=pubmed&term=%231+AND+asthma&WebEnv=<webenv string>&usehistory=y
--
-- Therefore, we can pretty-print our 'Query' back into something that PubMed could understand.
newtype ESearch = ESearch { _ESearch :: [EscapeItem] }
deriving stock (Show, Eq)
deriving newtype (Semigroup, Monoid)
......@@ -87,21 +96,14 @@ convertQuery q = ESearch (interpretQuery q transformAST)
-> [QN "NOT+", QE (TE.encodeUtf8 term)]
get :: Text
-> Corpus.Query
-> Corpus.RawQuery
-> Maybe Limit
-> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get apiKey q l = do
-- The documentation for PUBMED says:
-- Values for query keys may also be provided in term if they are preceeded by a
-- '#' (%23 in the URL). While only one query_key parameter can be provided to ESearch,
-- any number of query keys can be combined in term. Also, if query keys are provided in term,
-- they can be combined with OR or NOT in addition to AND.
-- Example:
-- esearch.fcgi?db=pubmed&term=%231+AND+asthma&WebEnv=<webenv string>&usehistory=y
--
-- Therefore, we can pretty-print our 'Query' back into something that PubMed could understand.
-- NOTE(adinapoli): For now we do not interpret the PUBMED query into something
-- more structured, like an 'ESearch' term, but we could, in the future.
eRes <- runReaderT PubMed.getMetadataWithC (Config { apiKey = Just apiKey
, query = getESearch $ convertQuery q
, query = getRawQuery q
, perPage = Just 200
, mWebEnv = Nothing })
let takeLimit = case l of
......
......@@ -226,7 +226,7 @@ testPubMedCovid_01 getPubmedKey = do
case mb_key of
Nothing -> pure ()
Just k -> withValidQuery "\"Covid\"" $ \query -> do
res <- Pubmed.get (_PubmedApiKey k) query (Just 1)
res <- Pubmed.get (_PubmedApiKey k) (renderQuery query) (Just 1)
case res of
Left err -> fail (show err)
Right (_, cnd) -> do
......@@ -241,7 +241,7 @@ testPubMedCovid_02 getPubmedKey = do
case mb_key of
Nothing -> pure ()
Just k -> withValidQuery "\"Covid\" AND \"Alzheimer\"" $ \query -> do
res <- Pubmed.get (_PubmedApiKey k) query (Just 1)
res <- Pubmed.get (_PubmedApiKey k) (renderQuery query) (Just 1)
case res of
Left err -> fail (show err)
Right (_, cnd) -> do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment