[ngrams] refactor PubMed DB type (to include Maybe APIKey)

parent baa2491f
Pipeline #7189 failed with stages
in 41 minutes and 48 seconds
...@@ -25,6 +25,7 @@ import Gargantext.Core.Types (TODO(..)) ...@@ -25,6 +25,7 @@ import Gargantext.Core.Types (TODO(..))
import Gargantext.Core.Utils.Aeson (jsonOptions) import Gargantext.Core.Utils.Aeson (jsonOptions)
import Gargantext.Core.Utils.Prefix (dropPrefixT) import Gargantext.Core.Utils.Prefix (dropPrefixT)
import Gargantext.Prelude import Gargantext.Prelude
import PUBMED.Types qualified as PUBMED
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary import Test.QuickCheck.Arbitrary
...@@ -32,13 +33,13 @@ import Test.QuickCheck.Arbitrary ...@@ -32,13 +33,13 @@ import Test.QuickCheck.Arbitrary
-- | Main Types -- | Main Types
-- TODO IsidoreAuth -- TODO IsidoreAuth
data ExternalAPIs = OpenAlex data ExternalAPIs = OpenAlex
| PubMed | PubMed (Maybe PUBMED.APIKey)
| Arxiv | Arxiv
| HAL | HAL
| IsTex | IsTex
| Isidore | Isidore
| EPO | EPO
deriving (Show, Eq, Generic, Enum, Bounded) deriving (Show, Eq, Generic)
-- | Main Instances -- | Main Instances
...@@ -46,11 +47,18 @@ instance FromJSON ExternalAPIs ...@@ -46,11 +47,18 @@ instance FromJSON ExternalAPIs
instance ToJSON ExternalAPIs instance ToJSON ExternalAPIs
externalAPIs :: [ExternalAPIs] externalAPIs :: [ExternalAPIs]
externalAPIs = [minBound .. maxBound] externalAPIs =
[ OpenAlex
, PubMed Nothing
, Arxiv
, HAL
, IsTex
, Isidore
, EPO ]
instance Arbitrary ExternalAPIs instance Arbitrary ExternalAPIs
where where
arbitrary = arbitraryBoundedEnum arbitrary = elements externalAPIs
instance ToSchema ExternalAPIs where instance ToSchema ExternalAPIs where
declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
......
...@@ -159,7 +159,6 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q ...@@ -159,7 +159,6 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
, _wq_datafield = datafield , _wq_datafield = datafield
, _wq_lang = l , _wq_lang = l
, _wq_flowListWith = flw , _wq_flowListWith = flw
, _wq_pubmedAPIKey = mPubmedAPIKey
, .. }) maybeLimit jobHandle = do , .. }) maybeLimit jobHandle = do
-- TODO ... -- TODO ...
$(logLocM) DEBUG $ "[addToCorpusWithQuery] (cid, dbs) " <> show (cid, dbs) $(logLocM) DEBUG $ "[addToCorpusWithQuery] (cid, dbs) " <> show (cid, dbs)
...@@ -194,7 +193,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q ...@@ -194,7 +193,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
let db = database2origin dbs let db = database2origin dbs
-- mPubmedAPIKey <- getUserPubmedAPIKey user -- mPubmedAPIKey <- getUserPubmedAPIKey user
-- printDebug "[addToCorpusWithQuery] mPubmedAPIKey" mPubmedAPIKey -- printDebug "[addToCorpusWithQuery] mPubmedAPIKey" mPubmedAPIKey
eTxt <- getDataText db (Multi l) q mPubmedAPIKey mEPOAuthKey maybeLimit eTxt <- getDataText db (Multi l) q mEPOAuthKey maybeLimit
-- printDebug "[G.A.N.C.New] lTxts" lTxts -- printDebug "[G.A.N.C.New] lTxts" lTxts
case eTxt of case eTxt of
......
...@@ -22,30 +22,31 @@ import Gargantext.API.Admin.Orchestrator.Types qualified as Types ...@@ -22,30 +22,31 @@ import Gargantext.API.Admin.Orchestrator.Types qualified as Types
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow.Types (DataOrigin(..)) import Gargantext.Database.Action.Flow.Types (DataOrigin(..))
import Gargantext.Prelude import Gargantext.Prelude
import PUBMED.Types qualified as PUBMED
data Database = Empty data Database = Empty
| OpenAlex | OpenAlex
| PubMed | PubMed (Maybe PUBMED.APIKey)
| Arxiv | Arxiv
| HAL | HAL
| IsTex | IsTex
| Isidore | Isidore
| EPO | EPO
deriving (Eq, Show, Generic, Enum, Bounded) deriving (Eq, Show, Generic)
deriveJSON (unPrefix "") ''Database deriveJSON (unPrefix "") ''Database
instance ToSchema Database where instance ToSchema Database where
declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
database2origin :: Database -> DataOrigin database2origin :: Database -> DataOrigin
database2origin Empty = InternalOrigin Types.IsTex database2origin Empty = InternalOrigin Types.IsTex
database2origin OpenAlex = ExternalOrigin Types.OpenAlex database2origin OpenAlex = ExternalOrigin Types.OpenAlex
database2origin PubMed = ExternalOrigin Types.PubMed database2origin (PubMed k) = ExternalOrigin (Types.PubMed k)
database2origin Arxiv = ExternalOrigin Types.Arxiv database2origin Arxiv = ExternalOrigin Types.Arxiv
database2origin HAL = ExternalOrigin Types.HAL database2origin HAL = ExternalOrigin Types.HAL
database2origin IsTex = ExternalOrigin Types.IsTex database2origin IsTex = ExternalOrigin Types.IsTex
database2origin Isidore = ExternalOrigin Types.Isidore database2origin Isidore = ExternalOrigin Types.Isidore
database2origin EPO = ExternalOrigin Types.EPO database2origin EPO = ExternalOrigin Types.EPO
------------------------------------------------------------------------ ------------------------------------------------------------------------
data Datafield = Gargantext data Datafield = Gargantext
......
...@@ -36,7 +36,6 @@ import Gargantext.Core.Text.Corpus.Query qualified as Corpus ...@@ -36,7 +36,6 @@ import Gargantext.Core.Text.Corpus.Query qualified as Corpus
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Prelude hiding (get) import Gargantext.Prelude hiding (get)
import Gargantext.Utils.Jobs.Error import Gargantext.Utils.Jobs.Error
import PUBMED.Types qualified as PUBMED
import Servant.Client (ClientError) import Servant.Client (ClientError)
data GetCorpusError data GetCorpusError
...@@ -60,17 +59,16 @@ get :: ExternalAPIs ...@@ -60,17 +59,16 @@ get :: ExternalAPIs
-- If the provider doesn't support the search filtered by language, or if the language -- If the provider doesn't support the search filtered by language, or if the language
-- is not important, the frontend will simply send 'EN' to the backend. -- is not important, the frontend will simply send 'EN' to the backend.
-> Corpus.RawQuery -> Corpus.RawQuery
-> Maybe PUBMED.APIKey
-> Maybe EPO.AuthKey -> Maybe EPO.AuthKey
-> Text -> Text
-> Maybe Corpus.Limit -> Maybe Corpus.Limit
-- -> IO [HyperdataDocument] -- -> IO [HyperdataDocument]
-> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ())) -> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get externalAPI lang q mPubmedAPIKey epoAuthKey epoAPIUrl limit = do get externalAPI lang q epoAuthKey epoAPIUrl limit = do
-- For PUBMED, HAL, IsTex, Isidore and OpenAlex, we want to send the query as-it. -- For PUBMED, HAL, IsTex, Isidore and OpenAlex, we want to send the query as-it.
-- For Arxiv we parse the query into a structured boolean query we submit over. -- For Arxiv we parse the query into a structured boolean query we submit over.
case externalAPI of case externalAPI of
PubMed -> PubMed mPubmedAPIKey ->
first (ExternalAPIError externalAPI) <$> PUBMED.get (fromMaybe "" mPubmedAPIKey) q limit first (ExternalAPIError externalAPI) <$> PUBMED.get (fromMaybe "" mPubmedAPIKey) q limit
OpenAlex -> OpenAlex ->
first (ExternalAPIError externalAPI) <$> OpenAlex.get (fromMaybe "" Nothing {- email -}) q (Just $ toISO639 lang) limit first (ExternalAPIError externalAPI) <$> OpenAlex.get (fromMaybe "" Nothing {- email -}) q (Just $ toISO639 lang) limit
......
...@@ -20,7 +20,7 @@ Portability : POSIX ...@@ -20,7 +20,7 @@ Portability : POSIX
{-# LANGUAGE InstanceSigs #-} {-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list) module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
...@@ -34,9 +34,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list) ...@@ -34,9 +34,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
, flowCorpus , flowCorpus
, flowCorpusUser , flowCorpusUser
, flowAnnuaire , flowAnnuaire
, insertMasterDocs
, buildSocialList , buildSocialList
, saveDocNgramsWith
, addDocumentsToHyperCorpus , addDocumentsToHyperCorpus
, reIndexWith , reIndexWith
...@@ -109,7 +107,6 @@ import Gargantext.Database.Schema.Node (node_hyperdata) ...@@ -109,7 +107,6 @@ import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude hiding (to) import Gargantext.Prelude hiding (to)
import Gargantext.System.Logging ( logLocM, LogLevel(DEBUG), MonadLogger ) import Gargantext.System.Logging ( logLocM, LogLevel(DEBUG), MonadLogger )
import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..) ) import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..) )
import PUBMED.Types qualified as PUBMED
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Imports for upgrade function -- Imports for upgrade function
...@@ -129,20 +126,20 @@ printDataText (DataNew (maybeInt, conduitData)) = do ...@@ -129,20 +126,20 @@ printDataText (DataNew (maybeInt, conduitData)) = do
res <- C.runConduit (conduitData .| CL.consume) res <- C.runConduit (conduitData .| CL.consume)
putText $ show (maybeInt, res) putText $ show (maybeInt, res)
-- TODO use the split parameter in config file -- TODO use the split parameter in config file
getDataText :: (HasNodeError err) getDataText :: (HasNodeError err)
=> DataOrigin => DataOrigin
-> TermType Lang -> TermType Lang
-> API.RawQuery -> API.RawQuery
-> Maybe PUBMED.APIKey
-> Maybe EPO.AuthKey -> Maybe EPO.AuthKey
-> Maybe API.Limit -> Maybe API.Limit
-> DBCmdWithEnv env err (Either API.GetCorpusError DataText) -> DBCmdWithEnv env err (Either API.GetCorpusError DataText)
getDataText (ExternalOrigin api) la q mPubmedAPIKey mAuthKey li = do getDataText (ExternalOrigin api) la q mAuthKey li = do
cfg <- view hasConfig cfg <- view hasConfig
eRes <- liftBase $ API.get api (_tt_lang la) q mPubmedAPIKey mAuthKey (_ac_epo_api_url $ _gc_apis cfg) li eRes <- liftBase $ API.get api (_tt_lang la) q mAuthKey (_ac_epo_api_url $ _gc_apis cfg) li
pure $ DataNew <$> eRes pure $ DataNew <$> eRes
getDataText (InternalOrigin _) la q _ _ _li = do getDataText (InternalOrigin _) la q _ _li = do
(_masterUserId, _masterRootId, cId) <- getOrMkRootWithCorpus MkCorpusUserMaster (Nothing :: Maybe HyperdataCorpus) (_masterUserId, _masterRootId, cId) <- getOrMkRootWithCorpus MkCorpusUserMaster (Nothing :: Maybe HyperdataCorpus)
ids <- map fst <$> searchDocInDatabase cId (stem (_tt_lang la) GargPorterAlgorithm $ API.getRawQuery q) ids <- map fst <$> searchDocInDatabase cId (stem (_tt_lang la) GargPorterAlgorithm $ API.getRawQuery q)
pure $ Right $ DataOld ids pure $ Right $ DataOld ids
...@@ -154,7 +151,7 @@ getDataText_Debug :: (HasNodeError err) ...@@ -154,7 +151,7 @@ getDataText_Debug :: (HasNodeError err)
-> Maybe API.Limit -> Maybe API.Limit
-> DBCmdWithEnv env err () -> DBCmdWithEnv env err ()
getDataText_Debug a l q li = do getDataText_Debug a l q li = do
result <- getDataText a l q Nothing Nothing li result <- getDataText a l q Nothing li
case result of case result of
Left err -> liftBase $ putText $ show err Left err -> liftBase $ putText $ show err
Right res -> liftBase $ printDataText res Right res -> liftBase $ printDataText res
...@@ -443,6 +440,7 @@ insertMasterDocs ncs c lang hs = do ...@@ -443,6 +440,7 @@ insertMasterDocs ncs c lang hs = do
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId -- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
pure $ map contextId2NodeId ids' pure $ map contextId2NodeId ids'
saveDocNgramsWith :: (IsDBCmd env err m) saveDocNgramsWith :: (IsDBCmd env err m)
=> ListId => ListId
-> HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount))) -> HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment