[epo] some work toward getting EPO into gargantext

parent e30d8cb7
Pipeline #4530 failed with stages
in 26 seconds
......@@ -10,7 +10,7 @@ STORE_DIR="${1:-$DEFAULT_STORE}"
# changes, you have to make sure to update the `expected_cabal_projet_hash` with the
# `sha256sum` result calculated on the `cabal.project`. This ensures the `cabal.project`
# stays deterministic so that CI cache can kick in.
expected_cabal_project_hash="2754bf61cc7a2aa7b29345ffe34dc1e90a06426f00fc39da9f793cd828be4e15"
expected_cabal_project_hash="720a064535707fc28b8c7b67b1560698d13610a4c1f8a79176b4c5bd40514979"
cabal --store-dir=$STORE_DIR v2-update 'hackage.haskell.org,2023-06-24T21:28:46Z'
......
......@@ -148,6 +148,11 @@ source-repository-package
location: https://github.com/rspeer/wikiparsec.git
tag: 9637a82344bb70f7fa8f02e75db3c081ccd434ce
source-repository-package
type: git
location: ssh://git@gitlab.iscpif.fr:20022/gargantext/crawlers/epo.git
tag: ac9f20b36e8659267d7525fe2c74c7286a0350cb
allow-older: *
allow-newer: *
......
......@@ -62,6 +62,7 @@ library
Gargantext.Core.Text.Context
Gargantext.Core.Text.Corpus.API
Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.EPO
Gargantext.Core.Text.Corpus.API.Pubmed
Gargantext.Core.Text.Corpus.API.OpenAlex
Gargantext.Core.Text.Corpus.Query
......@@ -403,11 +404,6 @@ library
, conduit-extra ^>= 1.3.5
, containers ^>= 0.6.5.1
, contravariant ^>= 1.5.5
, crawlerArxiv
, crawlerHAL
, crawlerISTEX
, crawlerIsidore
, crawlerPubMed
, cron ^>= 0.7.0
, cryptohash ^>= 0.11.9
, data-time-segment ^>= 0.1.0.0
......@@ -465,7 +461,6 @@ library
, natural-transformation ^>= 0.4
, network-uri ^>= 2.6.4.1
, opaleye ^>= 0.9.6.1
, openalex
, pandoc ^>= 2.14.0.3
, parallel ^>= 3.2.2.0
, parsec ^>= 3.1.14.0
......@@ -551,6 +546,15 @@ library
, yaml ^>= 0.11.8.0
, zip ^>= 1.7.2
, zlib ^>= 0.6.2.3
-- crawlers
, crawlerArxiv
, crawlerHAL
, crawlerISTEX
, crawlerIsidore
, crawlerPubMed
, epo
, openalex
default-language: Haskell2010
executable gargantext-admin
......
......@@ -40,6 +40,7 @@ data ExternalAPIs = OpenAlex
| HAL
| IsTex
| Isidore
| EPO
deriving (Show, Eq, Generic, Enum, Bounded)
......
......@@ -22,19 +22,21 @@ import Conduit
import Data.Bifunctor
import Data.Either (Either(..))
import Data.Maybe
import qualified Data.Text as T
import Data.Text qualified as T
import EPO qualified as EPO
import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude
import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX
import qualified Gargantext.Core.Text.Corpus.API.OpenAlex as OpenAlex
import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED
import qualified Gargantext.Core.Text.Corpus.Query as Corpus
import qualified PUBMED.Types as PUBMED
import Gargantext.Core.Text.Corpus.API.Arxiv qualified as Arxiv
import Gargantext.Core.Text.Corpus.API.EPO qualified as EPOAPI
import Gargantext.Core.Text.Corpus.API.Hal qualified as HAL
import Gargantext.Core.Text.Corpus.API.Isidore qualified as ISIDORE
import Gargantext.Core.Text.Corpus.API.Istex qualified as ISTEX
import Gargantext.Core.Text.Corpus.API.OpenAlex qualified as OpenAlex
import Gargantext.Core.Text.Corpus.API.Pubmed qualified as PUBMED
import Gargantext.Core.Text.Corpus.Query qualified as Corpus
import PUBMED.Types qualified as PUBMED
import Servant.Client (ClientError)
data GetCorpusError
......@@ -49,10 +51,11 @@ get :: ExternalAPIs
-> Lang
-> Corpus.RawQuery
-> Maybe PUBMED.APIKey
-> Maybe EPO.AuthKey
-> Maybe Corpus.Limit
-- -> IO [HyperdataDocument]
-> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get externalAPI la q mPubmedAPIKey limit = do
get externalAPI la q mPubmedAPIKey mEPOAuthKey limit = do
case Corpus.parseQuery q of
Left err -> pure $ Left $ InvalidInputQuery q (T.pack err)
Right corpusQuery -> case externalAPI of
......@@ -68,3 +71,5 @@ get externalAPI la q mPubmedAPIKey limit = do
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
Isidore -> do docs <- ISIDORE.get la (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
EPO -> first ExternalAPIError <$>
EPOAPI.get mEPOAuthKey q la limit
......@@ -10,13 +10,13 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API.OpenAlex where
import Conduit
import qualified Data.Text as T
import Data.Text qualified as T
import Gargantext.Core (Lang, toISO639Lang)
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Protolude
import qualified OpenAlex as OA
import qualified OpenAlex.Types as OA
import OpenAlex qualified as OA
import OpenAlex.Types qualified as OA
import Servant.Client (ClientError)
......
......@@ -61,6 +61,8 @@ extra-deps:
commit: 2d7e5753cbbce248b860b571a0e9885415c846f7
- git: https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git
commit: dab07cb89e8ab8eaaff8619f5e21d944d9c526ab
- git: ssh://git@gitlab.iscpif.fr:20022/gargantext/crawlers/epo.git
commit: ac9f20b36e8659267d7525fe2c74c7286a0350cb
# NP libs
- git: https://github.com/alpmestan/servant-job.git
commit: b4182487cfe479777c11ca19f3c0d47840b376f6
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment