[epo] some work toward getting EPO into gargantext

parent e30d8cb7
Pipeline #4530 failed with stages
in 26 seconds
...@@ -10,7 +10,7 @@ STORE_DIR="${1:-$DEFAULT_STORE}" ...@@ -10,7 +10,7 @@ STORE_DIR="${1:-$DEFAULT_STORE}"
# changes, you have to make sure to update the `expected_cabal_projet_hash` with the # changes, you have to make sure to update the `expected_cabal_projet_hash` with the
# `sha256sum` result calculated on the `cabal.project`. This ensures the `cabal.project` # `sha256sum` result calculated on the `cabal.project`. This ensures the `cabal.project`
# stays deterministic so that CI cache can kick in. # stays deterministic so that CI cache can kick in.
expected_cabal_project_hash="2754bf61cc7a2aa7b29345ffe34dc1e90a06426f00fc39da9f793cd828be4e15" expected_cabal_project_hash="720a064535707fc28b8c7b67b1560698d13610a4c1f8a79176b4c5bd40514979"
cabal --store-dir=$STORE_DIR v2-update 'hackage.haskell.org,2023-06-24T21:28:46Z' cabal --store-dir=$STORE_DIR v2-update 'hackage.haskell.org,2023-06-24T21:28:46Z'
......
...@@ -148,6 +148,11 @@ source-repository-package ...@@ -148,6 +148,11 @@ source-repository-package
location: https://github.com/rspeer/wikiparsec.git location: https://github.com/rspeer/wikiparsec.git
tag: 9637a82344bb70f7fa8f02e75db3c081ccd434ce tag: 9637a82344bb70f7fa8f02e75db3c081ccd434ce
source-repository-package
type: git
location: ssh://git@gitlab.iscpif.fr:20022/gargantext/crawlers/epo.git
tag: ac9f20b36e8659267d7525fe2c74c7286a0350cb
allow-older: * allow-older: *
allow-newer: * allow-newer: *
......
...@@ -62,6 +62,7 @@ library ...@@ -62,6 +62,7 @@ library
Gargantext.Core.Text.Context Gargantext.Core.Text.Context
Gargantext.Core.Text.Corpus.API Gargantext.Core.Text.Corpus.API
Gargantext.Core.Text.Corpus.API.Arxiv Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.EPO
Gargantext.Core.Text.Corpus.API.Pubmed Gargantext.Core.Text.Corpus.API.Pubmed
Gargantext.Core.Text.Corpus.API.OpenAlex Gargantext.Core.Text.Corpus.API.OpenAlex
Gargantext.Core.Text.Corpus.Query Gargantext.Core.Text.Corpus.Query
...@@ -403,11 +404,6 @@ library ...@@ -403,11 +404,6 @@ library
, conduit-extra ^>= 1.3.5 , conduit-extra ^>= 1.3.5
, containers ^>= 0.6.5.1 , containers ^>= 0.6.5.1
, contravariant ^>= 1.5.5 , contravariant ^>= 1.5.5
, crawlerArxiv
, crawlerHAL
, crawlerISTEX
, crawlerIsidore
, crawlerPubMed
, cron ^>= 0.7.0 , cron ^>= 0.7.0
, cryptohash ^>= 0.11.9 , cryptohash ^>= 0.11.9
, data-time-segment ^>= 0.1.0.0 , data-time-segment ^>= 0.1.0.0
...@@ -465,7 +461,6 @@ library ...@@ -465,7 +461,6 @@ library
, natural-transformation ^>= 0.4 , natural-transformation ^>= 0.4
, network-uri ^>= 2.6.4.1 , network-uri ^>= 2.6.4.1
, opaleye ^>= 0.9.6.1 , opaleye ^>= 0.9.6.1
, openalex
, pandoc ^>= 2.14.0.3 , pandoc ^>= 2.14.0.3
, parallel ^>= 3.2.2.0 , parallel ^>= 3.2.2.0
, parsec ^>= 3.1.14.0 , parsec ^>= 3.1.14.0
...@@ -551,6 +546,15 @@ library ...@@ -551,6 +546,15 @@ library
, yaml ^>= 0.11.8.0 , yaml ^>= 0.11.8.0
, zip ^>= 1.7.2 , zip ^>= 1.7.2
, zlib ^>= 0.6.2.3 , zlib ^>= 0.6.2.3
-- crawlers
, crawlerArxiv
, crawlerHAL
, crawlerISTEX
, crawlerIsidore
, crawlerPubMed
, epo
, openalex
default-language: Haskell2010 default-language: Haskell2010
executable gargantext-admin executable gargantext-admin
......
...@@ -40,6 +40,7 @@ data ExternalAPIs = OpenAlex ...@@ -40,6 +40,7 @@ data ExternalAPIs = OpenAlex
| HAL | HAL
| IsTex | IsTex
| Isidore | Isidore
| EPO
deriving (Show, Eq, Generic, Enum, Bounded) deriving (Show, Eq, Generic, Enum, Bounded)
......
...@@ -22,19 +22,21 @@ import Conduit ...@@ -22,19 +22,21 @@ import Conduit
import Data.Bifunctor import Data.Bifunctor
import Data.Either (Either(..)) import Data.Either (Either(..))
import Data.Maybe import Data.Maybe
import qualified Data.Text as T import Data.Text qualified as T
import EPO qualified as EPO
import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs) import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude import Gargantext.Prelude
import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv import Gargantext.Core.Text.Corpus.API.Arxiv qualified as Arxiv
import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL import Gargantext.Core.Text.Corpus.API.EPO qualified as EPOAPI
import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE import Gargantext.Core.Text.Corpus.API.Hal qualified as HAL
import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX import Gargantext.Core.Text.Corpus.API.Isidore qualified as ISIDORE
import qualified Gargantext.Core.Text.Corpus.API.OpenAlex as OpenAlex import Gargantext.Core.Text.Corpus.API.Istex qualified as ISTEX
import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED import Gargantext.Core.Text.Corpus.API.OpenAlex qualified as OpenAlex
import qualified Gargantext.Core.Text.Corpus.Query as Corpus import Gargantext.Core.Text.Corpus.API.Pubmed qualified as PUBMED
import qualified PUBMED.Types as PUBMED import Gargantext.Core.Text.Corpus.Query qualified as Corpus
import PUBMED.Types qualified as PUBMED
import Servant.Client (ClientError) import Servant.Client (ClientError)
data GetCorpusError data GetCorpusError
...@@ -49,10 +51,11 @@ get :: ExternalAPIs ...@@ -49,10 +51,11 @@ get :: ExternalAPIs
-> Lang -> Lang
-> Corpus.RawQuery -> Corpus.RawQuery
-> Maybe PUBMED.APIKey -> Maybe PUBMED.APIKey
-> Maybe EPO.AuthKey
-> Maybe Corpus.Limit -> Maybe Corpus.Limit
-- -> IO [HyperdataDocument] -- -> IO [HyperdataDocument]
-> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ())) -> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get externalAPI la q mPubmedAPIKey limit = do get externalAPI la q mPubmedAPIKey mEPOAuthKey limit = do
case Corpus.parseQuery q of case Corpus.parseQuery q of
Left err -> pure $ Left $ InvalidInputQuery q (T.pack err) Left err -> pure $ Left $ InvalidInputQuery q (T.pack err)
Right corpusQuery -> case externalAPI of Right corpusQuery -> case externalAPI of
...@@ -68,3 +71,5 @@ get externalAPI la q mPubmedAPIKey limit = do ...@@ -68,3 +71,5 @@ get externalAPI la q mPubmedAPIKey limit = do
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs) pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
Isidore -> do docs <- ISIDORE.get la (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing Isidore -> do docs <- ISIDORE.get la (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs) pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
EPO -> first ExternalAPIError <$>
EPOAPI.get mEPOAuthKey q la limit
...@@ -10,13 +10,13 @@ Portability : POSIX ...@@ -10,13 +10,13 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API.OpenAlex where module Gargantext.Core.Text.Corpus.API.OpenAlex where
import Conduit import Conduit
import qualified Data.Text as T import Data.Text qualified as T
import Gargantext.Core (Lang, toISO639Lang) import Gargantext.Core (Lang, toISO639Lang)
import Gargantext.Core.Text.Corpus.Query as Corpus import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Protolude import Protolude
import qualified OpenAlex as OA import OpenAlex qualified as OA
import qualified OpenAlex.Types as OA import OpenAlex.Types qualified as OA
import Servant.Client (ClientError) import Servant.Client (ClientError)
......
...@@ -61,6 +61,8 @@ extra-deps: ...@@ -61,6 +61,8 @@ extra-deps:
commit: 2d7e5753cbbce248b860b571a0e9885415c846f7 commit: 2d7e5753cbbce248b860b571a0e9885415c846f7
- git: https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git
commit: dab07cb89e8ab8eaaff8619f5e21d944d9c526ab commit: dab07cb89e8ab8eaaff8619f5e21d944d9c526ab
- git: ssh://git@gitlab.iscpif.fr:20022/gargantext/crawlers/epo.git
commit: ac9f20b36e8659267d7525fe2c74c7286a0350cb
# NP libs # NP libs
- git: https://github.com/alpmestan/servant-job.git - git: https://github.com/alpmestan/servant-job.git
commit: b4182487cfe479777c11ca19f3c0d47840b376f6 commit: b4182487cfe479777c11ca19f3c0d47840b376f6
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment