Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
2d431a63
Commit
2d431a63
authored
Apr 07, 2022
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[arxiv] simple crawler endpoint
parent
dea27613
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
52 additions
and
4 deletions
+52
-4
gargantext.cabal
gargantext.cabal
+4
-3
package.yaml
package.yaml
+1
-0
Types.hs
src/Gargantext/API/Admin/Orchestrator/Types.hs
+1
-0
Types.hs
src/Gargantext/API/Node/Corpus/Types.hs
+2
-0
API.hs
src/Gargantext/Core/Text/Corpus/API.hs
+2
-0
Arxiv.hs
src/Gargantext/Core/Text/Corpus/API/Arxiv.hs
+37
-0
stack.yaml
stack.yaml
+5
-1
No files found.
gargantext.cabal
View file @
2d431a63
cabal-version:
'0.0.5.8.4'
cabal-version:
1.12
-- This file has been generated from package.yaml by hpack version 0.34.4.
--
-- see: https://github.com/sol/hpack
name: gargantext
version:
'0.0.5.8.4'
version:
0.0.5.8.4
synopsis: Search, map, share
description: Please see README.md
category: Data
...
...
@@ -107,7 +107,6 @@ library
Gargantext.API.Flow
Gargantext.API.GraphQL
Gargantext.API.GraphQL.AsyncTask
Gargantext.API.GraphQL.Contact
Gargantext.API.GraphQL.IMT
Gargantext.API.GraphQL.Node
Gargantext.API.GraphQL.TreeFirstLevel
...
...
@@ -161,6 +160,7 @@ library
Gargantext.Core.Methods.Matrix.Accelerate.Utils
Gargantext.Core.Statistics
Gargantext.Core.Text.Convert
Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.Hal
Gargantext.Core.Text.Corpus.API.Isidore
Gargantext.Core.Text.Corpus.API.Istex
...
...
@@ -360,6 +360,7 @@ library
, conduit-extra
, containers
, contravariant
, crawlerArxiv
, crawlerHAL
, crawlerISTEX
, crawlerIsidore
...
...
package.yaml
View file @
2d431a63
...
...
@@ -150,6 +150,7 @@ library:
-
conduit-extra
-
containers
-
contravariant
-
crawlerArxiv
-
crawlerHAL
-
crawlerISTEX
-
crawlerIsidore
...
...
src/Gargantext/API/Admin/Orchestrator/Types.hs
View file @
2d431a63
...
...
@@ -36,6 +36,7 @@ instance Arbitrary a => Arbitrary (JobOutput a) where
-- TODO IsidoreAuth
data
ExternalAPIs
=
All
|
PubMed
|
Arxiv
|
HAL
|
IsTex
|
Isidore
...
...
src/Gargantext/API/Node/Corpus/Types.hs
View file @
2d431a63
...
...
@@ -22,6 +22,7 @@ import Gargantext.Database.Action.Flow (DataOrigin(..))
data
Database
=
Empty
|
PubMed
|
Arxiv
|
HAL
|
IsTex
|
Isidore
...
...
@@ -33,6 +34,7 @@ instance ToSchema Database
database2origin
::
Database
->
DataOrigin
database2origin
Empty
=
InternalOrigin
T
.
IsTex
database2origin
PubMed
=
ExternalOrigin
T
.
PubMed
database2origin
Arxiv
=
ExternalOrigin
T
.
Arxiv
database2origin
HAL
=
ExternalOrigin
T
.
HAL
database2origin
IsTex
=
ExternalOrigin
T
.
IsTex
database2origin
Isidore
=
ExternalOrigin
T
.
Isidore
...
...
src/Gargantext/Core/Text/Corpus/API.hs
View file @
2d431a63
...
...
@@ -25,6 +25,7 @@ import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
import
qualified
Gargantext.Core.Text.Corpus.API.Arxiv
as
Arxiv
import
qualified
Gargantext.Core.Text.Corpus.API.Hal
as
HAL
import
qualified
Gargantext.Core.Text.Corpus.API.Isidore
as
ISIDORE
import
qualified
Gargantext.Core.Text.Corpus.API.Istex
as
ISTEX
...
...
@@ -41,6 +42,7 @@ get :: ExternalAPIs
get
PubMed
_la
q
limit
=
PUBMED
.
get
q
limit
--docs <- PUBMED.get q default_limit -- EN only by default
--pure (Just $ fromIntegral $ length docs, yieldMany docs)
get
Arxiv
la
q
limit
=
Arxiv
.
get
la
q
(
fromIntegral
<$>
limit
)
get
HAL
la
q
limit
=
HAL
.
getC
la
q
limit
get
IsTex
la
q
limit
=
do
docs
<-
ISTEX
.
get
la
q
limit
...
...
src/Gargantext/Core/Text/Corpus/API/Arxiv.hs
0 → 100644
View file @
2d431a63
{-|
Module : Gargantext.Core.Text.Corpus.API.Arxiv
Description : Pubmed API connection
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans -fno-warn-unused-top-binds #-}
module
Gargantext.Core.Text.Corpus.API.Arxiv
where
import
Conduit
import
Data.Either
(
Either
(
..
))
import
Data.Maybe
import
Data.Text
(
Text
)
--import qualified Data.Text as Text
import
Servant.Client
(
ClientError
)
import
Gargantext.Prelude
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
--import qualified Arxiv.Wrapper as Arxiv
type
Query
=
Text
type
Limit
=
Int
-- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs
get
::
Lang
->
Query
->
Maybe
Limit
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
get
_la
_q
_l
=
pure
$
Right
$
(
Nothing
,
yieldMany
[]
)
stack.yaml
View file @
2d431a63
resolver
:
url
:
https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/
1
8.yaml
url
:
https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/
2
8.yaml
flags
:
{}
extra-package-dbs
:
[]
skip-ghc-check
:
true
...
...
@@ -76,6 +76,10 @@ extra-deps:
commit
:
3bf77f28d3dc71d2e8349cbf422a34cf4c23cd11
-
git
:
https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit
:
3db385e767d2100d8abe900833c6e7de3ac55e1b
#- git: https://gitlab.iscpif.fr/gargantext/crawlers/arxiv-api.git
-
git
:
https://gitlab.iscpif.fr/cgenie/arxiv-api.git
commit
:
84e9efb798b2937ea360b6f36d5931997987d5b4
-
arxiv-0.0.3@sha256:02de1114091d11f1f3ab401d104d125ad4301260806feb7f63b3dcefc7db88cf,1588
# NP libs
#- git
:
https://github.com/np/servant-job.git
# waiting for PR
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment