Commit b454c5eb authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge remote-tracking branch 'origin/121-dev-arxiv' into dev-merge

parents 75afdbb5 b691ddf2
...@@ -107,7 +107,6 @@ library ...@@ -107,7 +107,6 @@ library
Gargantext.API.Flow Gargantext.API.Flow
Gargantext.API.GraphQL Gargantext.API.GraphQL
Gargantext.API.GraphQL.AsyncTask Gargantext.API.GraphQL.AsyncTask
Gargantext.API.GraphQL.Contact
Gargantext.API.GraphQL.IMT Gargantext.API.GraphQL.IMT
Gargantext.API.GraphQL.Node Gargantext.API.GraphQL.Node
Gargantext.API.GraphQL.TreeFirstLevel Gargantext.API.GraphQL.TreeFirstLevel
...@@ -161,6 +160,7 @@ library ...@@ -161,6 +160,7 @@ library
Gargantext.Core.Methods.Matrix.Accelerate.Utils Gargantext.Core.Methods.Matrix.Accelerate.Utils
Gargantext.Core.Statistics Gargantext.Core.Statistics
Gargantext.Core.Text.Convert Gargantext.Core.Text.Convert
Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.Hal Gargantext.Core.Text.Corpus.API.Hal
Gargantext.Core.Text.Corpus.API.Isidore Gargantext.Core.Text.Corpus.API.Isidore
Gargantext.Core.Text.Corpus.API.Istex Gargantext.Core.Text.Corpus.API.Istex
...@@ -342,6 +342,7 @@ library ...@@ -342,6 +342,7 @@ library
, aeson-lens , aeson-lens
, aeson-pretty , aeson-pretty
, array , array
, arxiv
, async , async
, attoparsec , attoparsec
, auto-update , auto-update
...@@ -360,6 +361,7 @@ library ...@@ -360,6 +361,7 @@ library
, conduit-extra , conduit-extra
, containers , containers
, contravariant , contravariant
, crawlerArxiv
, crawlerHAL , crawlerHAL
, crawlerISTEX , crawlerISTEX
, crawlerIsidore , crawlerIsidore
......
...@@ -132,6 +132,7 @@ library: ...@@ -132,6 +132,7 @@ library:
- aeson-lens - aeson-lens
- aeson-pretty - aeson-pretty
- array - array
- arxiv
- async - async
- attoparsec - attoparsec
- auto-update - auto-update
...@@ -150,6 +151,7 @@ library: ...@@ -150,6 +151,7 @@ library:
- conduit-extra - conduit-extra
- containers - containers
- contravariant - contravariant
- crawlerArxiv
- crawlerHAL - crawlerHAL
- crawlerISTEX - crawlerISTEX
- crawlerIsidore - crawlerIsidore
......
...@@ -36,6 +36,7 @@ instance Arbitrary a => Arbitrary (JobOutput a) where ...@@ -36,6 +36,7 @@ instance Arbitrary a => Arbitrary (JobOutput a) where
-- TODO IsidoreAuth -- TODO IsidoreAuth
data ExternalAPIs = All data ExternalAPIs = All
| PubMed | PubMed
| Arxiv
| HAL | HAL
| IsTex | IsTex
| Isidore | Isidore
......
...@@ -22,6 +22,7 @@ import Gargantext.Database.Action.Flow (DataOrigin(..)) ...@@ -22,6 +22,7 @@ import Gargantext.Database.Action.Flow (DataOrigin(..))
data Database = Empty data Database = Empty
| PubMed | PubMed
| Arxiv
| HAL | HAL
| IsTex | IsTex
| Isidore | Isidore
...@@ -33,6 +34,7 @@ instance ToSchema Database ...@@ -33,6 +34,7 @@ instance ToSchema Database
database2origin :: Database -> DataOrigin database2origin :: Database -> DataOrigin
database2origin Empty = InternalOrigin T.IsTex database2origin Empty = InternalOrigin T.IsTex
database2origin PubMed = ExternalOrigin T.PubMed database2origin PubMed = ExternalOrigin T.PubMed
database2origin Arxiv = ExternalOrigin T.Arxiv
database2origin HAL = ExternalOrigin T.HAL database2origin HAL = ExternalOrigin T.HAL
database2origin IsTex = ExternalOrigin T.IsTex database2origin IsTex = ExternalOrigin T.IsTex
database2origin Isidore = ExternalOrigin T.Isidore database2origin Isidore = ExternalOrigin T.Isidore
......
...@@ -25,6 +25,7 @@ import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs) ...@@ -25,6 +25,7 @@ import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude import Gargantext.Prelude
import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX
...@@ -41,6 +42,7 @@ get :: ExternalAPIs ...@@ -41,6 +42,7 @@ get :: ExternalAPIs
get PubMed _la q limit = PUBMED.get q limit get PubMed _la q limit = PUBMED.get q limit
--docs <- PUBMED.get q default_limit -- EN only by default --docs <- PUBMED.get q default_limit -- EN only by default
--pure (Just $ fromIntegral $ length docs, yieldMany docs) --pure (Just $ fromIntegral $ length docs, yieldMany docs)
get Arxiv la q limit = Arxiv.get la q (fromIntegral <$> limit)
get HAL la q limit = HAL.getC la q limit get HAL la q limit = HAL.getC la q limit
get IsTex la q limit = do get IsTex la q limit = do
docs <- ISTEX.get la q limit docs <- ISTEX.get la q limit
......
{-|
Module : Gargantext.Core.Text.Corpus.API.Arxiv
Description : Pubmed API connection
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans -fno-warn-unused-top-binds #-}
module Gargantext.Core.Text.Corpus.API.Arxiv
where
import Conduit
import Data.Either (Either(..))
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as Text
import Servant.Client (ClientError)
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import qualified Arxiv as Arxiv
import qualified Network.Api.Arxiv as Ax
type Query = Text
type Limit = Arxiv.Limit
-- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs
get :: Lang -> Query -> Maybe Limit -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get la q l = do
(cnt, resC) <- Arxiv.apiSimpleC l [Text.unpack q]
pure $ Right (Just $ fromIntegral cnt, resC .| mapC (toDoc la))
toDoc :: Lang -> Arxiv.Result -> HyperdataDocument
toDoc l (Arxiv.Result { abstract
, authors = aus
--, categories
, doi
, id
, journal
--, primaryCategory
, publication_date
, title
--, total
, url
, year }
) = HyperdataDocument { _hd_bdd = Just "Arxiv"
, _hd_doi = Just $ Text.pack doi
, _hd_url = Just $ Text.pack url
, _hd_uniqId = Just $ Text.pack id
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just $ Text.pack title
, _hd_authors = authors aus
, _hd_institutes = institutes aus
, _hd_source = Just $ Text.pack journal
, _hd_abstract = Just $ Text.pack abstract
, _hd_publication_date = Just $ Text.pack publication_date
, _hd_publication_year = fromIntegral <$> year
, _hd_publication_month = Nothing -- TODO parse publication_date
, _hd_publication_day = Nothing
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (Text.pack . show) l }
where
authors :: [Ax.Author] -> Maybe Text
authors [] = Nothing
authors aus' = Just $ (Text.intercalate ", ")
$ map Text.pack
$ map Ax.auName aus'
institutes :: [Ax.Author] -> Maybe Text
institutes [] = Nothing
institutes aus' = Just $ (Text.intercalate ", ")
$ (map (Text.replace ", " " - "))
$ map Text.pack
$ map Ax.auFil aus'
resolver: resolver:
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/18.yaml url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/28.yaml
flags: {} flags: {}
extra-package-dbs: [] extra-package-dbs: []
skip-ghc-check: true skip-ghc-check: true
...@@ -76,6 +76,10 @@ extra-deps: ...@@ -76,6 +76,10 @@ extra-deps:
commit: 3bf77f28d3dc71d2e8349cbf422a34cf4c23cd11 commit: 3bf77f28d3dc71d2e8349cbf422a34cf4c23cd11
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b
#- git: https://gitlab.iscpif.fr/gargantext/crawlers/arxiv-api.git
- git: https://gitlab.iscpif.fr/cgenie/arxiv-api.git
commit: f3e517cc40d92e282c5245b23d253d2ca3f802e5
- arxiv-0.0.3@sha256:02de1114091d11f1f3ab401d104d125ad4301260806feb7f63b3dcefc7db88cf,1588
# NP libs # NP libs
#- git: https://github.com/np/servant-job.git # waiting for PR #- git: https://github.com/np/servant-job.git # waiting for PR
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment