Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
2d431a63
Commit
2d431a63
authored
Apr 07, 2022
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[arxiv] simple crawler endpoint
parent
dea27613
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
52 additions
and
4 deletions
+52
-4
gargantext.cabal
gargantext.cabal
+4
-3
package.yaml
package.yaml
+1
-0
Types.hs
src/Gargantext/API/Admin/Orchestrator/Types.hs
+1
-0
Types.hs
src/Gargantext/API/Node/Corpus/Types.hs
+2
-0
API.hs
src/Gargantext/Core/Text/Corpus/API.hs
+2
-0
Arxiv.hs
src/Gargantext/Core/Text/Corpus/API/Arxiv.hs
+37
-0
stack.yaml
stack.yaml
+5
-1
No files found.
gargantext.cabal
View file @
2d431a63
cabal-version:
'0.0.5.8.4'
cabal-version:
1.12
-- This file has been generated from package.yaml by hpack version 0.34.4.
-- This file has been generated from package.yaml by hpack version 0.34.4.
--
--
-- see: https://github.com/sol/hpack
-- see: https://github.com/sol/hpack
name: gargantext
name: gargantext
version:
'0.0.5.8.4'
version:
0.0.5.8.4
synopsis: Search, map, share
synopsis: Search, map, share
description: Please see README.md
description: Please see README.md
category: Data
category: Data
...
@@ -107,7 +107,6 @@ library
...
@@ -107,7 +107,6 @@ library
Gargantext.API.Flow
Gargantext.API.Flow
Gargantext.API.GraphQL
Gargantext.API.GraphQL
Gargantext.API.GraphQL.AsyncTask
Gargantext.API.GraphQL.AsyncTask
Gargantext.API.GraphQL.Contact
Gargantext.API.GraphQL.IMT
Gargantext.API.GraphQL.IMT
Gargantext.API.GraphQL.Node
Gargantext.API.GraphQL.Node
Gargantext.API.GraphQL.TreeFirstLevel
Gargantext.API.GraphQL.TreeFirstLevel
...
@@ -161,6 +160,7 @@ library
...
@@ -161,6 +160,7 @@ library
Gargantext.Core.Methods.Matrix.Accelerate.Utils
Gargantext.Core.Methods.Matrix.Accelerate.Utils
Gargantext.Core.Statistics
Gargantext.Core.Statistics
Gargantext.Core.Text.Convert
Gargantext.Core.Text.Convert
Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.Hal
Gargantext.Core.Text.Corpus.API.Hal
Gargantext.Core.Text.Corpus.API.Isidore
Gargantext.Core.Text.Corpus.API.Isidore
Gargantext.Core.Text.Corpus.API.Istex
Gargantext.Core.Text.Corpus.API.Istex
...
@@ -360,6 +360,7 @@ library
...
@@ -360,6 +360,7 @@ library
, conduit-extra
, conduit-extra
, containers
, containers
, contravariant
, contravariant
, crawlerArxiv
, crawlerHAL
, crawlerHAL
, crawlerISTEX
, crawlerISTEX
, crawlerIsidore
, crawlerIsidore
...
...
package.yaml
View file @
2d431a63
...
@@ -150,6 +150,7 @@ library:
...
@@ -150,6 +150,7 @@ library:
-
conduit-extra
-
conduit-extra
-
containers
-
containers
-
contravariant
-
contravariant
-
crawlerArxiv
-
crawlerHAL
-
crawlerHAL
-
crawlerISTEX
-
crawlerISTEX
-
crawlerIsidore
-
crawlerIsidore
...
...
src/Gargantext/API/Admin/Orchestrator/Types.hs
View file @
2d431a63
...
@@ -36,6 +36,7 @@ instance Arbitrary a => Arbitrary (JobOutput a) where
...
@@ -36,6 +36,7 @@ instance Arbitrary a => Arbitrary (JobOutput a) where
-- TODO IsidoreAuth
-- TODO IsidoreAuth
data
ExternalAPIs
=
All
data
ExternalAPIs
=
All
|
PubMed
|
PubMed
|
Arxiv
|
HAL
|
HAL
|
IsTex
|
IsTex
|
Isidore
|
Isidore
...
...
src/Gargantext/API/Node/Corpus/Types.hs
View file @
2d431a63
...
@@ -22,6 +22,7 @@ import Gargantext.Database.Action.Flow (DataOrigin(..))
...
@@ -22,6 +22,7 @@ import Gargantext.Database.Action.Flow (DataOrigin(..))
data
Database
=
Empty
data
Database
=
Empty
|
PubMed
|
PubMed
|
Arxiv
|
HAL
|
HAL
|
IsTex
|
IsTex
|
Isidore
|
Isidore
...
@@ -33,6 +34,7 @@ instance ToSchema Database
...
@@ -33,6 +34,7 @@ instance ToSchema Database
database2origin
::
Database
->
DataOrigin
database2origin
::
Database
->
DataOrigin
database2origin
Empty
=
InternalOrigin
T
.
IsTex
database2origin
Empty
=
InternalOrigin
T
.
IsTex
database2origin
PubMed
=
ExternalOrigin
T
.
PubMed
database2origin
PubMed
=
ExternalOrigin
T
.
PubMed
database2origin
Arxiv
=
ExternalOrigin
T
.
Arxiv
database2origin
HAL
=
ExternalOrigin
T
.
HAL
database2origin
HAL
=
ExternalOrigin
T
.
HAL
database2origin
IsTex
=
ExternalOrigin
T
.
IsTex
database2origin
IsTex
=
ExternalOrigin
T
.
IsTex
database2origin
Isidore
=
ExternalOrigin
T
.
Isidore
database2origin
Isidore
=
ExternalOrigin
T
.
Isidore
...
...
src/Gargantext/Core/Text/Corpus/API.hs
View file @
2d431a63
...
@@ -25,6 +25,7 @@ import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
...
@@ -25,6 +25,7 @@ import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
import
Gargantext.Prelude
import
qualified
Gargantext.Core.Text.Corpus.API.Arxiv
as
Arxiv
import
qualified
Gargantext.Core.Text.Corpus.API.Hal
as
HAL
import
qualified
Gargantext.Core.Text.Corpus.API.Hal
as
HAL
import
qualified
Gargantext.Core.Text.Corpus.API.Isidore
as
ISIDORE
import
qualified
Gargantext.Core.Text.Corpus.API.Isidore
as
ISIDORE
import
qualified
Gargantext.Core.Text.Corpus.API.Istex
as
ISTEX
import
qualified
Gargantext.Core.Text.Corpus.API.Istex
as
ISTEX
...
@@ -41,6 +42,7 @@ get :: ExternalAPIs
...
@@ -41,6 +42,7 @@ get :: ExternalAPIs
get
PubMed
_la
q
limit
=
PUBMED
.
get
q
limit
get
PubMed
_la
q
limit
=
PUBMED
.
get
q
limit
--docs <- PUBMED.get q default_limit -- EN only by default
--docs <- PUBMED.get q default_limit -- EN only by default
--pure (Just $ fromIntegral $ length docs, yieldMany docs)
--pure (Just $ fromIntegral $ length docs, yieldMany docs)
get
Arxiv
la
q
limit
=
Arxiv
.
get
la
q
(
fromIntegral
<$>
limit
)
get
HAL
la
q
limit
=
HAL
.
getC
la
q
limit
get
HAL
la
q
limit
=
HAL
.
getC
la
q
limit
get
IsTex
la
q
limit
=
do
get
IsTex
la
q
limit
=
do
docs
<-
ISTEX
.
get
la
q
limit
docs
<-
ISTEX
.
get
la
q
limit
...
...
src/Gargantext/Core/Text/Corpus/API/Arxiv.hs
0 → 100644
View file @
2d431a63
{-|
Module : Gargantext.Core.Text.Corpus.API.Arxiv
Description : Pubmed API connection
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans -fno-warn-unused-top-binds #-}
module
Gargantext.Core.Text.Corpus.API.Arxiv
where
import
Conduit
import
Data.Either
(
Either
(
..
))
import
Data.Maybe
import
Data.Text
(
Text
)
--import qualified Data.Text as Text
import
Servant.Client
(
ClientError
)
import
Gargantext.Prelude
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
--import qualified Arxiv.Wrapper as Arxiv
type
Query
=
Text
type
Limit
=
Int
-- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs
get
::
Lang
->
Query
->
Maybe
Limit
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
get
_la
_q
_l
=
pure
$
Right
$
(
Nothing
,
yieldMany
[]
)
stack.yaml
View file @
2d431a63
resolver
:
resolver
:
url
:
https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/
1
8.yaml
url
:
https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/
2
8.yaml
flags
:
{}
flags
:
{}
extra-package-dbs
:
[]
extra-package-dbs
:
[]
skip-ghc-check
:
true
skip-ghc-check
:
true
...
@@ -76,6 +76,10 @@ extra-deps:
...
@@ -76,6 +76,10 @@ extra-deps:
commit
:
3bf77f28d3dc71d2e8349cbf422a34cf4c23cd11
commit
:
3bf77f28d3dc71d2e8349cbf422a34cf4c23cd11
-
git
:
https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
-
git
:
https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit
:
3db385e767d2100d8abe900833c6e7de3ac55e1b
commit
:
3db385e767d2100d8abe900833c6e7de3ac55e1b
#- git: https://gitlab.iscpif.fr/gargantext/crawlers/arxiv-api.git
-
git
:
https://gitlab.iscpif.fr/cgenie/arxiv-api.git
commit
:
84e9efb798b2937ea360b6f36d5931997987d5b4
-
arxiv-0.0.3@sha256:02de1114091d11f1f3ab401d104d125ad4301260806feb7f63b3dcefc7db88cf,1588
# NP libs
# NP libs
#- git
:
https://github.com/np/servant-job.git
# waiting for PR
#- git
:
https://github.com/np/servant-job.git
# waiting for PR
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment