Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
istex
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
1
Issues
1
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
crawlers
istex
Commits
fad7b60b
Commit
fad7b60b
authored
Dec 17, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[API] scroll implementation, first draft
parent
bb8919de
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
90 additions
and
25 deletions
+90
-25
README.md
README.md
+5
-0
Main.hs
app/Main.hs
+0
-2
crawlerISTEX.cabal
crawlerISTEX.cabal
+26
-2
package.yaml
package.yaml
+9
-0
ISTEX.hs
src/ISTEX.hs
+8
-2
Client.hs
src/ISTEX/Client.hs
+41
-19
stack.yaml
stack.yaml
+1
-0
No files found.
README.md
View file @
fad7b60b
...
...
@@ -33,3 +33,8 @@ main = do
print
$
take
5
val
```
https://api.istex.fr/document/?output=author,title,abstract,publicationDate,refBibs&scroll=1m&q=coffee
FatalError {fatalErrorMessage = "ConnectionError (HttpExceptionRequest Request {
\n
host =
\"
api.istex.fr
\"\n
port = 443
\n
secure = True
\n
requestHeaders =
[
(\"Accept\",\"application/json;charset=utf-8,application/json\")
]
\n
path =
\"
document
\"\n
queryString =
\"
?output=author,title,abstract,publicationDate,refBibs&scroll=1m&q=coffee
\"\n
method =
\"
GET
\"\n
proxy = Nothing
\n
rawBody = False
\n
redirectCount = 10
\n
responseTimeout = ResponseTimeoutDefault
\n
requestVersion = HTTP/1.1
\n
}
\n
ConnectionTimeout)"}
app/Main.hs
View file @
fad7b60b
{-# LANGUAGE OverloadedStrings #-}
module
Main
where
import
ISTEX
...
...
crawlerISTEX.cabal
View file @
fad7b60b
cabal-version: 1.12
-- This file has been generated from package.yaml by hpack version 0.3
1.1
.
-- This file has been generated from package.yaml by hpack version 0.3
4.4
.
--
-- see: https://github.com/sol/hpack
--
-- hash:
1b6f6ece1304eb1586d6ae93969d55e01cea6c4a9bae285b84794dd960541b1b
-- hash:
940816b6d9cd2940801674b8216a13ce8c7bb42b5951821a262fbc6086def8cc
name: crawlerISTEX
version: 0.1.0.0
...
...
@@ -33,9 +33,17 @@ library
Paths_crawlerISTEX
hs-source-dirs:
src
default-extensions:
DataKinds
DeriveGeneric
NamedFieldPuns
OverloadedStrings
RecordWildCards
TypeOperators
build-depends:
aeson
, base >=4.7 && <5
, ghc
, http-client
, http-client-tls
, lens
...
...
@@ -50,11 +58,19 @@ executable crawlerISTEX-exe
Paths_crawlerISTEX
hs-source-dirs:
app
default-extensions:
DataKinds
DeriveGeneric
NamedFieldPuns
OverloadedStrings
RecordWildCards
TypeOperators
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends:
aeson
, base >=4.7 && <5
, crawlerISTEX
, ghc
, http-client
, http-client-tls
, lens
...
...
@@ -70,11 +86,19 @@ test-suite crawlerISTEX-test
Paths_crawlerISTEX
hs-source-dirs:
test
default-extensions:
DataKinds
DeriveGeneric
NamedFieldPuns
OverloadedStrings
RecordWildCards
TypeOperators
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends:
aeson
, base >=4.7 && <5
, crawlerISTEX
, ghc
, http-client
, http-client-tls
, lens
...
...
package.yaml
View file @
fad7b60b
...
...
@@ -22,6 +22,7 @@ description: Please see the README on GitHub at <https://github.com/Muda
dependencies
:
-
aeson
-
base >= 4.7 && < 5
-
ghc
# Panic
-
text
-
lens
-
servant
...
...
@@ -29,6 +30,14 @@ dependencies:
-
http-client
-
http-client-tls
default-extensions
:
-
DataKinds
-
DeriveGeneric
-
NamedFieldPuns
-
OverloadedStrings
-
RecordWildCards
-
TypeOperators
library
:
source-dirs
:
src
...
...
src/ISTEX.hs
View file @
fad7b60b
{-# LANGUAGE OverloadedStrings #-}
module
ISTEX
where
import
ISTEX.Client
...
...
@@ -22,6 +20,14 @@ getMetadataWith q n = do
n
(
Just
q
)
getMetadataScrollWith
::
Text
->
Maybe
Text
->
IO
(
Either
ClientError
Documents
)
getMetadataScrollWith
q
n
=
do
runIstexAPIClient
$
searchScroll
(
Just
"author,title,abstract,publicationDate,refBibs"
)
n
(
Just
q
)
type
Query
=
Text
runTest
::
Query
->
Maybe
Int
->
IO
()
...
...
src/ISTEX/Client.hs
View file @
fad7b60b
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TemplateHaskell #-}
module
ISTEX.Client
where
import
GHC.Generics
import
Data.Aeson
import
Control.Applicative
((
<|>
))
import
Data.Aeson
import
GHC.Generics
import
Panic
(
panic
)
import
Text.Read
(
readEither
)
import
Servant.API
import
Servant.Client
...
...
@@ -34,12 +32,23 @@ data Source = Source
}
deriving
(
Show
,
Generic
)
L
.
makeLenses
''
S
ource
parsePubDate
::
Maybe
T
.
Text
->
Maybe
Int
parsePubDate
Nothing
=
Nothing
parsePubDate
(
Just
pubDate
)
=
case
ePubDate
of
Left
err
->
panic
$
T
.
unpack
$
"[parseJSON] Cannot parse publicationDate (Int): "
<>
pubDate
<>
"("
<>
(
T
.
pack
err
)
<>
")"
Right
pubDate'
->
Just
pubDate'
where
ePubDate
=
readEither
(
T
.
unpack
pubDate
)
::
Either
String
Int
instance
FromJSON
Source
where
parseJSON
(
Object
o
)
=
Source
<$>
(
o
.:?
"title"
)
<*>
(
o
.:
"author"
)
<*>
do
pPubDate
<-
(
o
.:?
"publicationDate"
)
return
$
(
read
.
T
.
unpack
)
<$>
pPubDate
parseJSON
(
Object
o
)
=
do
_source_title
<-
o
.:?
"title"
_source_authors
<-
o
.:
"author"
mPubDate
<-
o
.:?
"publicationDate"
pure
$
Source
{
_source_publicationDate
=
parsePubDate
mPubDate
,
..
}
data
Document
=
Document
{
_document_id
::
T
.
Text
...
...
@@ -52,14 +61,16 @@ data Document = Document
L
.
makeLenses
''
D
ocument
instance
FromJSON
Document
where
parseJSON
(
Object
o
)
=
Document
<$>
(
o
.:
"id"
)
<*>
(
o
.:?
"title"
)
<*>
(
o
.:
"author"
<|>
pure
[]
)
<*>
(
o
.:?
"abstract"
)
<*>
((
o
.:?
"publicationDate"
)
>>=
\
date
->
return
$
fmap
(
read
.
T
.
unpack
)
date
)
<*>
(
o
.:
"refBibs"
<|>
pure
[]
)
parseJSON
(
Object
o
)
=
do
_document_id
<-
o
.:
"id"
_document_title
<-
o
.:?
"title"
_document_authors
<-
o
.:
"author"
<|>
pure
[]
_document_abstract
<-
o
.:?
"abstract"
_document_sources
<-
o
.:
"refBibs"
<|>
pure
[]
mPubDate
<-
o
.:?
"publicationDate"
pure
$
Document
{
_document_publicationDate
=
parsePubDate
mPubDate
,
..
}
data
Documents
=
Documents
{
_documents_total
::
Int
,
_documents_hits
::
[
Document
]
...
...
@@ -82,3 +93,14 @@ istexProxy = Proxy
search
::
Maybe
T
.
Text
->
Maybe
Int
->
Maybe
T
.
Text
->
ClientM
Documents
search
=
client
istexProxy
type
SearchScroll
=
QueryParam
"output"
T
.
Text
:>
QueryParam
"scroll"
T
.
Text
:>
QueryParam
"q"
T
.
Text
:>
Get
'[
J
SON
]
Documents
istexProxyScroll
::
Proxy
SearchScroll
istexProxyScroll
=
Proxy
searchScroll
::
Maybe
T
.
Text
->
Maybe
T
.
Text
->
Maybe
T
.
Text
->
ClientM
Documents
searchScroll
=
client
istexProxyScroll
stack.yaml
View file @
fad7b60b
...
...
@@ -38,6 +38,7 @@ packages:
# using the same syntax as the packages field.
# (e.g., acme-missiles-0.3)
# extra-deps: []
extra-deps
:
[]
# Override default flag values for local packages and extra-deps
# flags: {}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment