Commit adc4c653 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge remote-tracking branch 'origin/dev-openalex' into dev

parents 364a44c2 a522479e
Pipeline #4315 canceled with stages
...@@ -143,6 +143,11 @@ source-repository-package ...@@ -143,6 +143,11 @@ source-repository-package
location: https://github.com/rspeer/wikiparsec.git location: https://github.com/rspeer/wikiparsec.git
tag: 9637a82344bb70f7fa8f02e75db3c081ccd434ce tag: 9637a82344bb70f7fa8f02e75db3c081ccd434ce
source-repository-package
type: git
location: https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git
tag: 5aac73a7be1b86dcc312936c000e5e5444144491
allow-older: * allow-older: *
allow-newer: * allow-newer: *
......
cabal-version: 1.12 cabal-version: 1.12
-- This file has been generated from package.yaml by hpack version 0.34.6. -- This file has been generated from package.yaml by hpack version 0.35.1.
-- --
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
...@@ -59,11 +59,11 @@ library ...@@ -59,11 +59,11 @@ library
Gargantext.Core.Text.Corpus.API Gargantext.Core.Text.Corpus.API
Gargantext.Core.Text.Corpus.API.Arxiv Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.Pubmed Gargantext.Core.Text.Corpus.API.Pubmed
Gargantext.Core.Text.Corpus.API.OpenAlex
Gargantext.Core.Text.Corpus.Query Gargantext.Core.Text.Corpus.Query
Gargantext.Core.Text.Corpus.Parsers Gargantext.Core.Text.Corpus.Parsers
Gargantext.Core.Text.Corpus.Parsers.CSV Gargantext.Core.Text.Corpus.Parsers.CSV
Gargantext.Core.Text.Corpus.Parsers.Date.Parsec Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
Gargantext.Core.Text.Corpus.Parsers.JSON
Gargantext.Core.Text.List.Formats.CSV Gargantext.Core.Text.List.Formats.CSV
Gargantext.Core.Text.Metrics Gargantext.Core.Text.Metrics
Gargantext.Core.Text.Metrics.CharByChar Gargantext.Core.Text.Metrics.CharByChar
...@@ -202,6 +202,7 @@ library ...@@ -202,6 +202,7 @@ library
Gargantext.Core.Text.Corpus.Parsers.GrandDebat Gargantext.Core.Text.Corpus.Parsers.GrandDebat
Gargantext.Core.Text.Corpus.Parsers.Iramuteq Gargantext.Core.Text.Corpus.Parsers.Iramuteq
Gargantext.Core.Text.Corpus.Parsers.Isidore Gargantext.Core.Text.Corpus.Parsers.Isidore
Gargantext.Core.Text.Corpus.Parsers.JSON
Gargantext.Core.Text.Corpus.Parsers.Json2Csv Gargantext.Core.Text.Corpus.Parsers.Json2Csv
Gargantext.Core.Text.Corpus.Parsers.RIS Gargantext.Core.Text.Corpus.Parsers.RIS
Gargantext.Core.Text.Corpus.Parsers.RIS.Presse Gargantext.Core.Text.Corpus.Parsers.RIS.Presse
...@@ -457,6 +458,7 @@ library ...@@ -457,6 +458,7 @@ library
, natural-transformation , natural-transformation
, network-uri , network-uri
, opaleye , opaleye
, openalex
, pandoc , pandoc
, parallel , parallel
, parsec , parsec
......
...@@ -88,11 +88,11 @@ library: ...@@ -88,11 +88,11 @@ library:
- Gargantext.Core.Text.Corpus.API - Gargantext.Core.Text.Corpus.API
- Gargantext.Core.Text.Corpus.API.Arxiv - Gargantext.Core.Text.Corpus.API.Arxiv
- Gargantext.Core.Text.Corpus.API.Pubmed - Gargantext.Core.Text.Corpus.API.Pubmed
- Gargantext.Core.Text.Corpus.API.OpenAlex
- Gargantext.Core.Text.Corpus.Query - Gargantext.Core.Text.Corpus.Query
- Gargantext.Core.Text.Corpus.Parsers - Gargantext.Core.Text.Corpus.Parsers
- Gargantext.Core.Text.Corpus.Parsers.CSV - Gargantext.Core.Text.Corpus.Parsers.CSV
- Gargantext.Core.Text.Corpus.Parsers.Date.Parsec - Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
- Gargantext.Core.Text.Corpus.Parsers.JSON
- Gargantext.Core.Text.List.Formats.CSV - Gargantext.Core.Text.List.Formats.CSV
- Gargantext.Core.Text.Metrics - Gargantext.Core.Text.Metrics
- Gargantext.Core.Text.Metrics.CharByChar - Gargantext.Core.Text.Metrics.CharByChar
...@@ -245,6 +245,7 @@ library: ...@@ -245,6 +245,7 @@ library:
- natural-transformation - natural-transformation
- network-uri - network-uri
- opaleye - opaleye
- openalex
- pandoc - pandoc
- parallel - parallel
- parsec - parsec
......
...@@ -34,7 +34,8 @@ instance Arbitrary a => Arbitrary (JobOutput a) where ...@@ -34,7 +34,8 @@ instance Arbitrary a => Arbitrary (JobOutput a) where
-- | Main Types -- | Main Types
-- TODO IsidoreAuth -- TODO IsidoreAuth
data ExternalAPIs = PubMed data ExternalAPIs = OpenAlex
| PubMed
| Arxiv | Arxiv
| HAL | HAL
| IsTex | IsTex
......
...@@ -19,6 +19,7 @@ import Gargantext.Core.Utils.Prefix (unPrefix) ...@@ -19,6 +19,7 @@ import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow (DataOrigin(..)) import Gargantext.Database.Action.Flow (DataOrigin(..))
data Database = Empty data Database = Empty
| OpenAlex
| PubMed | PubMed
| Arxiv | Arxiv
| HAL | HAL
...@@ -34,12 +35,13 @@ instance ToSchema Database where ...@@ -34,12 +35,13 @@ instance ToSchema Database where
declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
database2origin :: Database -> DataOrigin database2origin :: Database -> DataOrigin
database2origin Empty = InternalOrigin Types.IsTex database2origin Empty = InternalOrigin Types.IsTex
database2origin PubMed = ExternalOrigin Types.PubMed database2origin OpenAlex = ExternalOrigin Types.OpenAlex
database2origin Arxiv = ExternalOrigin Types.Arxiv database2origin PubMed = ExternalOrigin Types.PubMed
database2origin HAL = ExternalOrigin Types.HAL database2origin Arxiv = ExternalOrigin Types.Arxiv
database2origin IsTex = ExternalOrigin Types.IsTex database2origin HAL = ExternalOrigin Types.HAL
database2origin Isidore = ExternalOrigin Types.Isidore database2origin IsTex = ExternalOrigin Types.IsTex
database2origin Isidore = ExternalOrigin Types.Isidore
------------------------------------------------------------------------ ------------------------------------------------------------------------
data Datafield = Gargantext data Datafield = Gargantext
......
...@@ -31,6 +31,7 @@ import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv ...@@ -31,6 +31,7 @@ import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX
import qualified Gargantext.Core.Text.Corpus.API.OpenAlex as OpenAlex
import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED
import qualified Gargantext.Core.Text.Corpus.Query as Corpus import qualified Gargantext.Core.Text.Corpus.Query as Corpus
import qualified PUBMED.Types as PUBMED import qualified PUBMED.Types as PUBMED
...@@ -55,6 +56,8 @@ get externalAPI la q mPubmedAPIKey limit = do ...@@ -55,6 +56,8 @@ get externalAPI la q mPubmedAPIKey limit = do
case Corpus.parseQuery q of case Corpus.parseQuery q of
Left err -> pure $ Left $ InvalidInputQuery q (T.pack err) Left err -> pure $ Left $ InvalidInputQuery q (T.pack err)
Right corpusQuery -> case externalAPI of Right corpusQuery -> case externalAPI of
OpenAlex -> first ExternalAPIError <$>
OpenAlex.get (fromMaybe "" Nothing {- email -}) q limit
PubMed -> first ExternalAPIError <$> PubMed -> first ExternalAPIError <$>
PUBMED.get (fromMaybe "" mPubmedAPIKey) corpusQuery limit PUBMED.get (fromMaybe "" mPubmedAPIKey) corpusQuery limit
--docs <- PUBMED.get q default_limit -- EN only by default --docs <- PUBMED.get q default_limit -- EN only by default
......
{-|
Module : Gargantext.Core.Text.Corpus.API.OpenAlex
Description : OpenAlex API connection
Copyright : (c) CNRS, 2023
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.Corpus.API.OpenAlex where
import Conduit
import qualified Data.Text as T
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Protolude
import qualified OpenAlex as OA
import qualified OpenAlex.Types as OA
import Servant.Client (ClientError)
get :: Text
-> Corpus.RawQuery
-> Maybe Limit
-> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get _email q _l = do
eRes <- OA.fetchWorksC Nothing $ Just $ Corpus.getRawQuery q
pure $ (\(len, docsC) -> (len, docsC .| takeC 1000 .| mapC toDoc)) <$> eRes
toDoc :: OA.Work -> HyperdataDocument
toDoc (OA.Work { .. } ) =
HyperdataDocument { _hd_bdd = Just "OpenAlex"
, _hd_doi = doi
, _hd_url = url
, _hd_uniqId = Just id
, _hd_uniqIdBdd = Just id
, _hd_page = firstPage biblio
, _hd_title = title
, _hd_authors = authors authorships
, _hd_institutes = institutes authorships
, _hd_source = source
, _hd_abstract = Just abstract_reconstructed
, _hd_publication_date = Just $ show publication_date
, _hd_publication_year = Just $ publication_year
, _hd_publication_month = Nothing -- TODO
, _hd_publication_day = Nothing -- TODO
, _hd_publication_hour = Nothing -- TODO
, _hd_publication_minute = Nothing -- TODO
, _hd_publication_second = Nothing -- TODO
, _hd_language_iso2 = language }
where
firstPage :: OA.Biblio -> Maybe Int
firstPage OA.Biblio { first_page } = maybe Nothing readMaybe $ T.unpack <$> first_page
authors :: [OA.Authorship] -> Maybe Text
authors [] = Nothing
authors aus = Just $ T.intercalate ", " (getDisplayName <$> aus)
where
getDisplayName :: OA.Authorship -> Text
getDisplayName OA.Authorship { author = OA.DehydratedAuthor { display_name = dn } } = dn
institutes :: [OA.Authorship] -> Maybe Text
institutes [] = Nothing
institutes aus = Just $ T.intercalate ", " ((T.replace ", " " - ") . getInstitutesNames <$> aus)
where
getInstitutesNames OA.Authorship { institutions } = T.intercalate ", " $ getDisplayName <$> institutions
getDisplayName :: OA.DehydratedInstitution -> Text
getDisplayName OA.DehydratedInstitution { display_name = dn } = dn
source :: Maybe Text
source = maybe Nothing getSource primary_location
where
getSource OA.Location { source = s } = getSourceDisplayName <$> s
getSourceDisplayName OA.DehydratedSource { display_name = dn } = dn
...@@ -59,6 +59,8 @@ extra-deps: ...@@ -59,6 +59,8 @@ extra-deps:
commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b
- git: https://gitlab.iscpif.fr/gargantext/crawlers/arxiv-api.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/arxiv-api.git
commit: 2d7e5753cbbce248b860b571a0e9885415c846f7 commit: 2d7e5753cbbce248b860b571a0e9885415c846f7
- git: https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git
commit: 723a641f108e11e0b57ecbf22d279aca81317c0a
# NP libs # NP libs
- git: https://github.com/alpmestan/servant-job.git - git: https://github.com/alpmestan/servant-job.git
commit: b4182487cfe479777c11ca19f3c0d47840b376f6 commit: b4182487cfe479777c11ca19f3c0d47840b376f6
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment