Commit d7571b77 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Send PUBMED the raw query from the frontend

parent 78bc52e0
...@@ -19,14 +19,16 @@ module Gargantext.Core.Text.Corpus.API ...@@ -19,14 +19,16 @@ module Gargantext.Core.Text.Corpus.API
) where ) where
import Conduit import Conduit
import Control.Monad.Except
import Data.Bifunctor import Data.Bifunctor
import Data.Either (Either(..)) import Data.Either (Either(..))
import Data.Maybe import Data.Maybe
import qualified Data.Text as T
import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs) import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
import Gargantext.Core (Lang(..), toISO639) import Gargantext.Core (Lang(..), toISO639)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude import Gargantext.Prelude
import Servant.Client (ClientError)
import qualified Data.Text as T
import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
...@@ -35,7 +37,6 @@ import qualified Gargantext.Core.Text.Corpus.API.OpenAlex as OpenAlex ...@@ -35,7 +37,6 @@ import qualified Gargantext.Core.Text.Corpus.API.OpenAlex as OpenAlex
import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED
import qualified Gargantext.Core.Text.Corpus.Query as Corpus import qualified Gargantext.Core.Text.Corpus.Query as Corpus
import qualified PUBMED.Types as PUBMED import qualified PUBMED.Types as PUBMED
import Servant.Client (ClientError)
data GetCorpusError data GetCorpusError
= -- | We couldn't parse the user input query into something meaningful. = -- | We couldn't parse the user input query into something meaningful.
...@@ -53,18 +54,23 @@ get :: ExternalAPIs ...@@ -53,18 +54,23 @@ get :: ExternalAPIs
-- -> IO [HyperdataDocument] -- -> IO [HyperdataDocument]
-> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ())) -> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get externalAPI la q mPubmedAPIKey limit = do get externalAPI la q mPubmedAPIKey limit = do
case Corpus.parseQuery q of -- For PUBMED, HAL, IsTex, Isidore and OpenAlex, we want to send the query as-it.
Left err -> pure $ Left $ InvalidInputQuery q (T.pack err) -- For Arxiv we parse the query into a structured boolean query we submit over.
Right corpusQuery -> case externalAPI of case externalAPI of
OpenAlex -> first ExternalAPIError <$> PubMed ->
OpenAlex.get (fromMaybe "" Nothing {- email -}) q (toISO639 la) limit first ExternalAPIError <$> PUBMED.get (fromMaybe "" mPubmedAPIKey) q limit
PubMed -> first ExternalAPIError <$> OpenAlex ->
PUBMED.get (fromMaybe "" mPubmedAPIKey) corpusQuery limit first ExternalAPIError <$> OpenAlex.get (fromMaybe "" Nothing {- email -}) q (toISO639 la) limit
--docs <- PUBMED.get q default_limit -- EN only by default Arxiv -> runExceptT $ do
--pure (Just $ fromIntegral $ length docs, yieldMany docs) corpusQuery <- ExceptT (pure parse_query)
Arxiv -> Right <$> Arxiv.get la corpusQuery limit ExceptT $ fmap Right (Arxiv.get la corpusQuery limit)
HAL -> first ExternalAPIError <$> HAL.getC (toISO639 la) (Corpus.getRawQuery q) (Corpus.getLimit <$> limit) HAL ->
IsTex -> do docs <- ISTEX.get la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit) first ExternalAPIError <$> HAL.getC (toISO639 la) (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
IsTex -> do
docs <- ISTEX.get la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs) pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
Isidore -> do docs <- ISIDORE.get la (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing Isidore -> do
docs <- ISIDORE.get la (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs) pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
where
parse_query = first (InvalidInputQuery q . T.pack) $ Corpus.parseQuery q
...@@ -45,6 +45,15 @@ import PUBMED.Types (Config(..)) ...@@ -45,6 +45,15 @@ import PUBMED.Types (Config(..))
-- | A pubmed query. -- | A pubmed query.
-- See: https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch -- See: https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
-- The documentation for PUBMED says:
-- Values for query keys may also be provided in term if they are preceeded by a
-- '#' (%23 in the URL). While only one query_key parameter can be provided to ESearch,
-- any number of query keys can be combined in term. Also, if query keys are provided in term,
-- they can be combined with OR or NOT in addition to AND.
-- Example:
-- esearch.fcgi?db=pubmed&term=%231+AND+asthma&WebEnv=<webenv string>&usehistory=y
--
-- Therefore, we can pretty-print our 'Query' back into something that PubMed could understand.
newtype ESearch = ESearch { _ESearch :: [EscapeItem] } newtype ESearch = ESearch { _ESearch :: [EscapeItem] }
deriving stock (Show, Eq) deriving stock (Show, Eq)
deriving newtype (Semigroup, Monoid) deriving newtype (Semigroup, Monoid)
...@@ -87,21 +96,14 @@ convertQuery q = ESearch (interpretQuery q transformAST) ...@@ -87,21 +96,14 @@ convertQuery q = ESearch (interpretQuery q transformAST)
-> [QN "NOT+", QE (TE.encodeUtf8 term)] -> [QN "NOT+", QE (TE.encodeUtf8 term)]
get :: Text get :: Text
-> Corpus.Query -> Corpus.RawQuery
-> Maybe Limit -> Maybe Limit
-> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ())) -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get apiKey q l = do get apiKey q l = do
-- The documentation for PUBMED says: -- NOTE(adinapoli): For now we do not interpret the PUBMED query into something
-- Values for query keys may also be provided in term if they are preceeded by a -- more structured, like an 'ESearch' term, but we could, in the future.
-- '#' (%23 in the URL). While only one query_key parameter can be provided to ESearch,
-- any number of query keys can be combined in term. Also, if query keys are provided in term,
-- they can be combined with OR or NOT in addition to AND.
-- Example:
-- esearch.fcgi?db=pubmed&term=%231+AND+asthma&WebEnv=<webenv string>&usehistory=y
--
-- Therefore, we can pretty-print our 'Query' back into something that PubMed could understand.
eRes <- runReaderT PubMed.getMetadataWithC (Config { apiKey = Just apiKey eRes <- runReaderT PubMed.getMetadataWithC (Config { apiKey = Just apiKey
, query = getESearch $ convertQuery q , query = getRawQuery q
, perPage = Just 200 , perPage = Just 200
, mWebEnv = Nothing }) , mWebEnv = Nothing })
let takeLimit = case l of let takeLimit = case l of
......
...@@ -226,7 +226,7 @@ testPubMedCovid_01 getPubmedKey = do ...@@ -226,7 +226,7 @@ testPubMedCovid_01 getPubmedKey = do
case mb_key of case mb_key of
Nothing -> pure () Nothing -> pure ()
Just k -> withValidQuery "\"Covid\"" $ \query -> do Just k -> withValidQuery "\"Covid\"" $ \query -> do
res <- Pubmed.get (_PubmedApiKey k) query (Just 1) res <- Pubmed.get (_PubmedApiKey k) (renderQuery query) (Just 1)
case res of case res of
Left err -> fail (show err) Left err -> fail (show err)
Right (_, cnd) -> do Right (_, cnd) -> do
...@@ -241,7 +241,7 @@ testPubMedCovid_02 getPubmedKey = do ...@@ -241,7 +241,7 @@ testPubMedCovid_02 getPubmedKey = do
case mb_key of case mb_key of
Nothing -> pure () Nothing -> pure ()
Just k -> withValidQuery "\"Covid\" AND \"Alzheimer\"" $ \query -> do Just k -> withValidQuery "\"Covid\" AND \"Alzheimer\"" $ \query -> do
res <- Pubmed.get (_PubmedApiKey k) query (Just 1) res <- Pubmed.get (_PubmedApiKey k) (renderQuery query) (Just 1)
case res of case res of
Left err -> fail (show err) Left err -> fail (show err)
Right (_, cnd) -> do Right (_, cnd) -> do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment