Commit 2d431a63 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[arxiv] simple crawler endpoint

parent dea27613
cabal-version: '0.0.5.8.4' cabal-version: 1.12
-- This file has been generated from package.yaml by hpack version 0.34.4. -- This file has been generated from package.yaml by hpack version 0.34.4.
-- --
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
name: gargantext name: gargantext
version: '0.0.5.8.4' version: 0.0.5.8.4
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
...@@ -107,7 +107,6 @@ library ...@@ -107,7 +107,6 @@ library
Gargantext.API.Flow Gargantext.API.Flow
Gargantext.API.GraphQL Gargantext.API.GraphQL
Gargantext.API.GraphQL.AsyncTask Gargantext.API.GraphQL.AsyncTask
Gargantext.API.GraphQL.Contact
Gargantext.API.GraphQL.IMT Gargantext.API.GraphQL.IMT
Gargantext.API.GraphQL.Node Gargantext.API.GraphQL.Node
Gargantext.API.GraphQL.TreeFirstLevel Gargantext.API.GraphQL.TreeFirstLevel
...@@ -161,6 +160,7 @@ library ...@@ -161,6 +160,7 @@ library
Gargantext.Core.Methods.Matrix.Accelerate.Utils Gargantext.Core.Methods.Matrix.Accelerate.Utils
Gargantext.Core.Statistics Gargantext.Core.Statistics
Gargantext.Core.Text.Convert Gargantext.Core.Text.Convert
Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.Hal Gargantext.Core.Text.Corpus.API.Hal
Gargantext.Core.Text.Corpus.API.Isidore Gargantext.Core.Text.Corpus.API.Isidore
Gargantext.Core.Text.Corpus.API.Istex Gargantext.Core.Text.Corpus.API.Istex
...@@ -360,6 +360,7 @@ library ...@@ -360,6 +360,7 @@ library
, conduit-extra , conduit-extra
, containers , containers
, contravariant , contravariant
, crawlerArxiv
, crawlerHAL , crawlerHAL
, crawlerISTEX , crawlerISTEX
, crawlerIsidore , crawlerIsidore
......
...@@ -150,6 +150,7 @@ library: ...@@ -150,6 +150,7 @@ library:
- conduit-extra - conduit-extra
- containers - containers
- contravariant - contravariant
- crawlerArxiv
- crawlerHAL - crawlerHAL
- crawlerISTEX - crawlerISTEX
- crawlerIsidore - crawlerIsidore
......
...@@ -36,6 +36,7 @@ instance Arbitrary a => Arbitrary (JobOutput a) where ...@@ -36,6 +36,7 @@ instance Arbitrary a => Arbitrary (JobOutput a) where
-- TODO IsidoreAuth -- TODO IsidoreAuth
data ExternalAPIs = All data ExternalAPIs = All
| PubMed | PubMed
| Arxiv
| HAL | HAL
| IsTex | IsTex
| Isidore | Isidore
......
...@@ -22,6 +22,7 @@ import Gargantext.Database.Action.Flow (DataOrigin(..)) ...@@ -22,6 +22,7 @@ import Gargantext.Database.Action.Flow (DataOrigin(..))
data Database = Empty data Database = Empty
| PubMed | PubMed
| Arxiv
| HAL | HAL
| IsTex | IsTex
| Isidore | Isidore
...@@ -33,6 +34,7 @@ instance ToSchema Database ...@@ -33,6 +34,7 @@ instance ToSchema Database
database2origin :: Database -> DataOrigin database2origin :: Database -> DataOrigin
database2origin Empty = InternalOrigin T.IsTex database2origin Empty = InternalOrigin T.IsTex
database2origin PubMed = ExternalOrigin T.PubMed database2origin PubMed = ExternalOrigin T.PubMed
database2origin Arxiv = ExternalOrigin T.Arxiv
database2origin HAL = ExternalOrigin T.HAL database2origin HAL = ExternalOrigin T.HAL
database2origin IsTex = ExternalOrigin T.IsTex database2origin IsTex = ExternalOrigin T.IsTex
database2origin Isidore = ExternalOrigin T.Isidore database2origin Isidore = ExternalOrigin T.Isidore
......
...@@ -25,6 +25,7 @@ import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs) ...@@ -25,6 +25,7 @@ import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude import Gargantext.Prelude
import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX
...@@ -41,6 +42,7 @@ get :: ExternalAPIs ...@@ -41,6 +42,7 @@ get :: ExternalAPIs
get PubMed _la q limit = PUBMED.get q limit get PubMed _la q limit = PUBMED.get q limit
--docs <- PUBMED.get q default_limit -- EN only by default --docs <- PUBMED.get q default_limit -- EN only by default
--pure (Just $ fromIntegral $ length docs, yieldMany docs) --pure (Just $ fromIntegral $ length docs, yieldMany docs)
get Arxiv la q limit = Arxiv.get la q (fromIntegral <$> limit)
get HAL la q limit = HAL.getC la q limit get HAL la q limit = HAL.getC la q limit
get IsTex la q limit = do get IsTex la q limit = do
docs <- ISTEX.get la q limit docs <- ISTEX.get la q limit
......
{-|
Module : Gargantext.Core.Text.Corpus.API.Arxiv
Description : Pubmed API connection
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans -fno-warn-unused-top-binds #-}
module Gargantext.Core.Text.Corpus.API.Arxiv
where
import Conduit
import Data.Either (Either(..))
import Data.Maybe
import Data.Text (Text)
--import qualified Data.Text as Text
import Servant.Client (ClientError)
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
--import qualified Arxiv.Wrapper as Arxiv
type Query = Text
type Limit = Int
-- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs
get :: Lang -> Query -> Maybe Limit -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get _la _q _l = pure $ Right $ (Nothing, yieldMany [])
resolver: resolver:
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/18.yaml url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/28.yaml
flags: {} flags: {}
extra-package-dbs: [] extra-package-dbs: []
skip-ghc-check: true skip-ghc-check: true
...@@ -76,6 +76,10 @@ extra-deps: ...@@ -76,6 +76,10 @@ extra-deps:
commit: 3bf77f28d3dc71d2e8349cbf422a34cf4c23cd11 commit: 3bf77f28d3dc71d2e8349cbf422a34cf4c23cd11
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b
#- git: https://gitlab.iscpif.fr/gargantext/crawlers/arxiv-api.git
- git: https://gitlab.iscpif.fr/cgenie/arxiv-api.git
commit: 84e9efb798b2937ea360b6f36d5931997987d5b4
- arxiv-0.0.3@sha256:02de1114091d11f1f3ab401d104d125ad4301260806feb7f63b3dcefc7db88cf,1588
# NP libs # NP libs
#- git: https://github.com/np/servant-job.git # waiting for PR #- git: https://github.com/np/servant-job.git # waiting for PR
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment