[ngrams] refactor PubMed DB type (to include Maybe APIKey)

parent baa2491f
Pipeline #7189 failed with stages
in 41 minutes and 48 seconds
......@@ -25,6 +25,7 @@ import Gargantext.Core.Types (TODO(..))
import Gargantext.Core.Utils.Aeson (jsonOptions)
import Gargantext.Core.Utils.Prefix (dropPrefixT)
import Gargantext.Prelude
import PUBMED.Types qualified as PUBMED
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
......@@ -32,13 +33,13 @@ import Test.QuickCheck.Arbitrary
-- | Main Types
-- TODO IsidoreAuth
data ExternalAPIs = OpenAlex
| PubMed
| PubMed (Maybe PUBMED.APIKey)
| Arxiv
| HAL
| IsTex
| Isidore
| EPO
deriving (Show, Eq, Generic, Enum, Bounded)
deriving (Show, Eq, Generic)
-- | Main Instances
......@@ -46,11 +47,18 @@ instance FromJSON ExternalAPIs
instance ToJSON ExternalAPIs
externalAPIs :: [ExternalAPIs]
externalAPIs = [minBound .. maxBound]
externalAPIs =
[ OpenAlex
, PubMed Nothing
, Arxiv
, HAL
, IsTex
, Isidore
, EPO ]
instance Arbitrary ExternalAPIs
where
arbitrary = arbitraryBoundedEnum
arbitrary = elements externalAPIs
instance ToSchema ExternalAPIs where
declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
......
......@@ -159,7 +159,6 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
, _wq_datafield = datafield
, _wq_lang = l
, _wq_flowListWith = flw
, _wq_pubmedAPIKey = mPubmedAPIKey
, .. }) maybeLimit jobHandle = do
-- TODO ...
$(logLocM) DEBUG $ "[addToCorpusWithQuery] (cid, dbs) " <> show (cid, dbs)
......@@ -194,7 +193,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
let db = database2origin dbs
-- mPubmedAPIKey <- getUserPubmedAPIKey user
-- printDebug "[addToCorpusWithQuery] mPubmedAPIKey" mPubmedAPIKey
eTxt <- getDataText db (Multi l) q mPubmedAPIKey mEPOAuthKey maybeLimit
eTxt <- getDataText db (Multi l) q mEPOAuthKey maybeLimit
-- printDebug "[G.A.N.C.New] lTxts" lTxts
case eTxt of
......
......@@ -22,30 +22,31 @@ import Gargantext.API.Admin.Orchestrator.Types qualified as Types
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow.Types (DataOrigin(..))
import Gargantext.Prelude
import PUBMED.Types qualified as PUBMED
data Database = Empty
| OpenAlex
| PubMed
| PubMed (Maybe PUBMED.APIKey)
| Arxiv
| HAL
| IsTex
| Isidore
| EPO
deriving (Eq, Show, Generic, Enum, Bounded)
deriving (Eq, Show, Generic)
deriveJSON (unPrefix "") ''Database
instance ToSchema Database where
declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
database2origin :: Database -> DataOrigin
database2origin Empty = InternalOrigin Types.IsTex
database2origin OpenAlex = ExternalOrigin Types.OpenAlex
database2origin PubMed = ExternalOrigin Types.PubMed
database2origin Arxiv = ExternalOrigin Types.Arxiv
database2origin HAL = ExternalOrigin Types.HAL
database2origin IsTex = ExternalOrigin Types.IsTex
database2origin Isidore = ExternalOrigin Types.Isidore
database2origin EPO = ExternalOrigin Types.EPO
database2origin Empty = InternalOrigin Types.IsTex
database2origin OpenAlex = ExternalOrigin Types.OpenAlex
database2origin (PubMed k) = ExternalOrigin (Types.PubMed k)
database2origin Arxiv = ExternalOrigin Types.Arxiv
database2origin HAL = ExternalOrigin Types.HAL
database2origin IsTex = ExternalOrigin Types.IsTex
database2origin Isidore = ExternalOrigin Types.Isidore
database2origin EPO = ExternalOrigin Types.EPO
------------------------------------------------------------------------
data Datafield = Gargantext
......
......@@ -36,7 +36,6 @@ import Gargantext.Core.Text.Corpus.Query qualified as Corpus
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Prelude hiding (get)
import Gargantext.Utils.Jobs.Error
import PUBMED.Types qualified as PUBMED
import Servant.Client (ClientError)
data GetCorpusError
......@@ -60,17 +59,16 @@ get :: ExternalAPIs
-- If the provider doesn't support the search filtered by language, or if the language
-- is not important, the frontend will simply send 'EN' to the backend.
-> Corpus.RawQuery
-> Maybe PUBMED.APIKey
-> Maybe EPO.AuthKey
-> Text
-> Maybe Corpus.Limit
-- -> IO [HyperdataDocument]
-> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get externalAPI lang q mPubmedAPIKey epoAuthKey epoAPIUrl limit = do
get externalAPI lang q epoAuthKey epoAPIUrl limit = do
-- For PUBMED, HAL, IsTex, Isidore and OpenAlex, we want to send the query as-it.
-- For Arxiv we parse the query into a structured boolean query we submit over.
case externalAPI of
PubMed ->
PubMed mPubmedAPIKey ->
first (ExternalAPIError externalAPI) <$> PUBMED.get (fromMaybe "" mPubmedAPIKey) q limit
OpenAlex ->
first (ExternalAPIError externalAPI) <$> OpenAlex.get (fromMaybe "" Nothing {- email -}) q (Just $ toISO639 lang) limit
......
......@@ -20,7 +20,7 @@ Portability : POSIX
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
......@@ -34,9 +34,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
, flowCorpus
, flowCorpusUser
, flowAnnuaire
, insertMasterDocs
, buildSocialList
, saveDocNgramsWith
, addDocumentsToHyperCorpus
, reIndexWith
......@@ -109,7 +107,6 @@ import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude hiding (to)
import Gargantext.System.Logging ( logLocM, LogLevel(DEBUG), MonadLogger )
import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..) )
import PUBMED.Types qualified as PUBMED
------------------------------------------------------------------------
-- Imports for upgrade function
......@@ -129,20 +126,20 @@ printDataText (DataNew (maybeInt, conduitData)) = do
res <- C.runConduit (conduitData .| CL.consume)
putText $ show (maybeInt, res)
-- TODO use the split parameter in config file
getDataText :: (HasNodeError err)
=> DataOrigin
-> TermType Lang
-> API.RawQuery
-> Maybe PUBMED.APIKey
-> Maybe EPO.AuthKey
-> Maybe API.Limit
-> DBCmdWithEnv env err (Either API.GetCorpusError DataText)
getDataText (ExternalOrigin api) la q mPubmedAPIKey mAuthKey li = do
getDataText (ExternalOrigin api) la q mAuthKey li = do
cfg <- view hasConfig
eRes <- liftBase $ API.get api (_tt_lang la) q mPubmedAPIKey mAuthKey (_ac_epo_api_url $ _gc_apis cfg) li
eRes <- liftBase $ API.get api (_tt_lang la) q mAuthKey (_ac_epo_api_url $ _gc_apis cfg) li
pure $ DataNew <$> eRes
getDataText (InternalOrigin _) la q _ _ _li = do
getDataText (InternalOrigin _) la q _ _li = do
(_masterUserId, _masterRootId, cId) <- getOrMkRootWithCorpus MkCorpusUserMaster (Nothing :: Maybe HyperdataCorpus)
ids <- map fst <$> searchDocInDatabase cId (stem (_tt_lang la) GargPorterAlgorithm $ API.getRawQuery q)
pure $ Right $ DataOld ids
......@@ -154,7 +151,7 @@ getDataText_Debug :: (HasNodeError err)
-> Maybe API.Limit
-> DBCmdWithEnv env err ()
getDataText_Debug a l q li = do
result <- getDataText a l q Nothing Nothing li
result <- getDataText a l q Nothing li
case result of
Left err -> liftBase $ putText $ show err
Right res -> liftBase $ printDataText res
......@@ -443,6 +440,7 @@ insertMasterDocs ncs c lang hs = do
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
pure $ map contextId2NodeId ids'
saveDocNgramsWith :: (IsDBCmd env err m)
=> ListId
-> HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment