Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
O
openalex
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
2
Issues
2
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
crawlers
openalex
Commits
8d3a0ab8
Verified
Commit
8d3a0ab8
authored
Jun 28, 2023
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Conduit implemented for works, fixes for types
parent
5aac73a7
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
90 additions
and
63 deletions
+90
-63
Main.hs
app/Main.hs
+20
-2
openalex.cabal
openalex.cabal
+1
-0
OpenAlex.hs
src/OpenAlex.hs
+27
-21
Types.hs
src/OpenAlex/Types.hs
+42
-40
No files found.
app/Main.hs
View file @
8d3a0ab8
module
Main
where
import
Conduit
import
Options.Applicative.Simple
import
Protolude
import
qualified
OpenAlex
as
OA
import
qualified
OpenAlex.Types
as
OA
main
::
IO
()
main
=
do
...
...
@@ -20,13 +22,16 @@ main = do
"Fetch OpenAlex works (https://docs.openalex.org/api-entities/works/work-object)"
(
const
fetchWorks
)
(
pure
()
)
addCommand
"works-c"
"Fetch OpenAlex works (https://docs.openalex.org/api-entities/works/work-object) with conduit"
(
const
fetchWorksC
)
(
pure
()
)
runCmd
()
fetchConcepts
::
()
->
IO
()
fetchConcepts
_
=
do
-- ec <- OA.fetchConcepts (Just 1) (Just 1) Nothing
ec
<-
OA
.
fetchConcepts
(
Just
1
)
(
Just
1
)
(
Just
"*"
)
case
ec
of
Left
err
->
putText
$
"error: "
<>
show
err
...
...
@@ -35,9 +40,22 @@ fetchConcepts _ = do
fetchWorks
::
()
->
IO
()
fetchWorks
_
=
do
-- ec <- OA.fetchConcepts (Just 1) (Just 1) Nothing
ew
<-
OA
.
fetchWorks
(
Just
1
)
(
Just
1
)
(
Just
"*"
)
case
ew
of
Left
err
->
putText
$
"error: "
<>
show
err
Right
w
->
do
putText
$
show
w
fetchWorksC
::
()
->
IO
()
fetchWorksC
_
=
do
eWorksC
<-
OA
.
fetchWorksC
Nothing
case
eWorksC
of
Left
err
->
putText
$
"error: "
<>
show
err
Right
(
Just
count
,
c
)
->
do
putText
$
"Count: "
<>
show
count
_
<-
runConduit
$
c
.|
takeC
1000
.|
mapM_C
(
\
(
OA
.
Work
{
..
})
->
do
liftIO
$
putText
$
show
id
)
pure
()
openalex.cabal
View file @
8d3a0ab8
...
...
@@ -81,6 +81,7 @@ executable openalex-main
-- LANGUAGE extensions used by modules in this package.
-- other-extensions:
build-depends: base ^>=4.14.3.0 && < 5
, conduit >= 1.3.5 && < 1.4
, optparse-simple >= 0.1.1.4 && < 0.2
, protolude >= 0.3.3 && < 0.4
...
...
src/OpenAlex.hs
View file @
8d3a0ab8
...
...
@@ -16,20 +16,18 @@ module OpenAlex
-- , fetchConcepts'
,
fetchConcepts
,
fetchWorks
,
fetchWorksC
)
where
-- import Data.Aeson
-- import qualified Data.ByteString.Lazy as BSL
import
Conduit
import
qualified
Data.Text
as
T
-- import qualified Data.Text.Encoding as TE
-- import Network.HTTP.Client (httpLbs, newManager, parseRequest, requestHeaders, responseBody)
import
Network.HTTP.Client
(
newManager
,
requestHeaders
)
import
Network.HTTP.Client.TLS
(
tlsManagerSettings
)
import
Protolude
import
Protolude
hiding
(
yield
)
import
OpenAlex.Client
import
OpenAlex.ServantClientLogging
import
OpenAlex.Types
(
ListOf
(
..
),
Page
,
PerPage
,
Cursor
,
Concept
,
Work
)
import
OpenAlex.Types
(
ListOf
(
..
),
Meta
(
..
),
Page
,
PerPage
,
Cursor
,
Concept
,
Work
)
import
Servant.Client
(
BaseUrl
(
..
),
ClientEnv
(
..
),
ClientError
,
Scheme
(
Https
),
defaultMakeClientRequest
,
mkClientEnv
,
runClientM
)
defaultClientEnv
::
IO
ClientEnv
...
...
@@ -53,18 +51,26 @@ fetchWorks mPage mPerPage mCursor = do
env
<-
defaultClientEnv
runClientM
(
works
mPage
mPerPage
mCursor
)
env
-- fetchConcepts' :: IO (Either Text (ListOf Concept))
-- fetchConcepts' = do
-- manager <- newManager tlsManagerSettings
-- req' <- parseRequest "https://api.openalex.org/concepts?page=1&per_page=10"
-- -- let setHeaders r =
-- -- filter (\(h, _) -> h /= "Accept") $
-- -- requestHeaders r -- <> [("Content-Type", "application/json")]
-- let setHeaders = const [("User-Agent", "v.http")]
-- let req = req' { requestHeaders = setHeaders req' }
-- putText $ show req
-- resBS <- httpLbs req manager
-- putText $ TE.decodeUtf8 $ BSL.toStrict $ responseBody resBS
-- pure $ case eitherDecode (responseBody resBS) of
-- Left err -> Left $ T.pack err
-- Right r -> Right $ r
fetchWorksC
::
Maybe
Cursor
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
Work
IO
()
))
fetchWorksC
Nothing
=
do
fetchWorksC
(
Just
"*"
)
fetchWorksC
mCursor
=
do
env
<-
defaultClientEnv
-- NOTE: per-page max is 200
eRes
<-
runClientM
(
works
(
Just
1
)
(
Just
1
)
Nothing
)
env
case
eRes
of
Left
err
->
pure
$
Left
err
Right
ListOf
{
meta
=
Meta
{
count
}}
->
do
pure
$
Right
(
Just
$
fromIntegral
count
,
producer
env
mCursor
)
where
producer
::
ClientEnv
->
Maybe
Cursor
->
ConduitT
()
Work
IO
()
producer
env
mCursor'
=
do
eRes
<-
liftIO
$
runClientM
(
works
Nothing
(
Just
20
)
mCursor'
)
env
liftIO
$
putText
$
"Conduit fetching page with cursor "
<>
show
mCursor'
case
eRes
of
Left
err
->
panic
$
"error: "
<>
show
err
Right
(
ListOf
{
results
,
meta
=
meta
@
(
Meta
{
next_cursor
})
})
->
do
liftIO
$
putText
$
"Meta: "
<>
show
meta
yieldMany
results
producer
env
next_cursor
src/OpenAlex/Types.hs
View file @
8d3a0ab8
...
...
@@ -194,11 +194,11 @@ instance FromJSON SummaryStats where
-- | https://docs.openalex.org/api-entities/works/work-object
data
Work
=
Work
{
abstract_inverted_index
::
Ma
p
Text
[
Int
]
-- TODO
{
abstract_inverted_index
::
Ma
ybe
(
Map
Text
[
Int
])
-- TODO
,
authorships
::
[
Authorship
]
,
apc_list
::
APCList
,
apc_paid
::
APCPaid
,
best_oa_location
::
Location
,
apc_list
::
Maybe
APCList
,
apc_paid
::
Maybe
APCPaid
,
best_oa_location
::
Maybe
Location
,
biblio
::
Biblio
,
cited_by_api_url
::
Text
,
cited_by_count
::
Count
...
...
@@ -207,25 +207,25 @@ data Work = Work
,
corresponding_institution_ids
::
[
OpenAlexID
]
,
counts_by_year
::
[
CountByYear
]
,
created_date
::
CreatedDate
,
display_name
::
Text
,
doi
::
DOI
,
display_name
::
Maybe
Text
,
doi
::
Maybe
DOI
,
grants
::
[
Grant
]
,
id
::
OpenAlexID
,
ids
::
Map
Text
ExternalID
-- TODO ExternalDB
,
is_paratext
::
Bool
,
is_retracted
::
Bool
,
language
::
Text
,
language
::
Maybe
Text
,
locations
::
[
Location
]
,
locations_count
::
Count
,
mesh
::
[
MeSH
]
,
ngrams_url
::
URL
,
open_access
::
OpenAccess
,
primary_location
::
Location
,
primary_location
::
Maybe
Location
,
publication_date
::
CreatedDate
,
publication_year
::
Year
,
referenced_works
::
[
OpenAlexID
]
,
related_works
::
[
OpenAlexID
]
,
title
::
Text
,
title
::
Maybe
Text
,
type_
::
Text
,
updated_date
::
UpdatedDate
,
is_oa
::
Maybe
Bool
...
...
@@ -248,25 +248,25 @@ instance FromJSON Work where
corresponding_institution_ids
<-
v
.:
"corresponding_institution_ids"
counts_by_year
<-
v
.:
"counts_by_year"
created_date
<-
v
.:
"created_date"
display_name
<-
v
.:
"display_name"
doi
<-
v
.:
"doi"
display_name
<-
v
.:
?
"display_name"
doi
<-
v
.:
?
"doi"
grants
<-
v
.:
"grants"
id
<-
v
.:
"id"
ids
<-
v
.:
"ids"
is_paratext
<-
v
.:
"is_paratext"
is_retracted
<-
v
.:
"is_retracted"
language
<-
v
.:
"language"
language
<-
v
.:
?
"language"
locations
<-
v
.:
"locations"
locations_count
<-
v
.:
"locations_count"
mesh
<-
v
.:
"mesh"
ngrams_url
<-
v
.:
"ngrams_url"
open_access
<-
v
.:
"open_access"
primary_location
<-
v
.:
"primary_location"
primary_location
<-
v
.:
?
"primary_location"
publication_date
<-
v
.:
"publication_date"
publication_year
<-
v
.:
"publication_year"
referenced_works
<-
v
.:
"referenced_works"
related_works
<-
v
.:
"related_works"
title
<-
v
.:
"title"
title
<-
v
.:
?
"title"
type_
<-
v
.:
"type"
updated_date
<-
v
.:
"updated_date"
is_oa
<-
v
.:?
"is_oa"
...
...
@@ -299,10 +299,10 @@ data Authorship = Authorship
}
deriving
(
Generic
,
Show
,
FromJSON
)
data
Biblio
=
Biblio
{
volume
::
Text
,
issue
::
Text
,
first_page
::
Text
,
last_page
::
Text
{
volume
::
Maybe
Text
,
issue
::
Maybe
Text
,
first_page
::
Maybe
Text
,
last_page
::
Maybe
Text
}
deriving
(
Generic
,
Show
,
FromJSON
)
data
DehydratedAuthor
=
DehydratedAuthor
...
...
@@ -312,33 +312,33 @@ data DehydratedAuthor = DehydratedAuthor
}
deriving
(
Generic
,
Show
,
FromJSON
)
data
DehydratedInstitution
=
DehydratedInstitution
{
id
::
OpenAlexID
{
id
::
Maybe
OpenAlexID
,
display_name
::
Text
,
ror
::
Text
,
country_code
::
Text
,
type_
::
Text
,
ror
::
Maybe
Text
,
country_code
::
Maybe
Text
,
type_
::
Maybe
Text
}
deriving
(
Generic
,
Show
)
instance
FromJSON
DehydratedInstitution
where
parseJSON
(
Object
v
)
=
do
id
<-
v
.:
"id"
id
<-
v
.:
?
"id"
display_name
<-
v
.:
"display_name"
ror
<-
v
.:
"ror"
country_code
<-
v
.:
"country_code"
type_
<-
v
.:
"type"
ror
<-
v
.:
?
"ror"
country_code
<-
v
.:
?
"country_code"
type_
<-
v
.:
?
"type"
pure
$
DehydratedInstitution
{
..
}
parseJSON
_
=
fail
"Don't know how to parse a dehydrated institution from a non-object"
data
Grant
=
Grant
{
funder
::
OpenAlexID
,
funder_display_name
::
Text
,
award_id
::
Text
,
award_id
::
Maybe
Text
}
deriving
(
Generic
,
Show
,
FromJSON
)
data
Location
=
Location
{
is_oa
::
Bool
,
landing_page_url
::
URL
,
license
::
Text
,
source
::
DehydratedSource
,
landing_page_url
::
Maybe
URL
,
license
::
Maybe
Text
,
source
::
Maybe
DehydratedSource
,
pdf_url
::
Maybe
URL
,
version
::
Maybe
Text
}
deriving
(
Generic
,
Show
,
FromJSON
)
...
...
@@ -348,7 +348,7 @@ data MeSH = MeSH
{
descriptor_ui
::
Text
,
descriptor_name
::
Text
,
qualifier_ui
::
Text
,
qualifier_name
::
Text
,
qualifier_name
::
Maybe
Text
,
is_major_topic
::
Bool
}
deriving
(
Generic
,
Show
,
FromJSON
)
...
...
@@ -357,30 +357,32 @@ data OpenAccess = OpenAccess
{
any_repository_has_fulltext
::
Bool
,
is_oa
::
Bool
,
oa_status
::
OAStatus
,
oa_url
::
URL
,
oa_url
::
Maybe
URL
}
deriving
(
Generic
,
Show
,
FromJSON
)
-- | https://docs.openalex.org/api-entities/sources/source-object#the-dehydratedsource-object
data
DehydratedSource
=
DehydratedSource
{
display_name
::
Text
,
host_organization
::
Text
,
host_organization
::
Maybe
Text
,
host_organization_lineage
::
[
OpenAlexID
]
,
host_organization_name
::
Text
,
host_organization_name
::
Maybe
Text
,
id
::
OpenAlexID
,
is_in_doaj
::
Bool
,
issn
::
[
ISSN
]
,
issn_l
::
ISSNL
,
issn_l
::
Maybe
ISSNL
,
type_
::
Text
}
deriving
(
Generic
,
Show
)
instance
FromJSON
DehydratedSource
where
parseJSON
=
withObject
"DehydratedSource"
$
\
v
->
do
display_name
<-
v
.:
"display_name"
host_organization
<-
v
.:
"host_organization"
host_organization_lineage
<-
v
.:
"host_organization_lineage"
host_organization_name
<-
v
.:
"host_organization_name"
host_organization
<-
v
.:?
"host_organization"
host_organization_lineage'
<-
v
.:?
"host_organization_lineage"
let
host_organization_lineage
=
fromMaybe
[]
host_organization_lineage'
host_organization_name
<-
v
.:?
"host_organization_name"
id
<-
v
.:
"id"
is_in_doaj
<-
v
.:
"is_in_doaj"
issn
<-
v
.:
"issn"
issn_l
<-
v
.:
"issn_l"
issn'
<-
v
.:?
"issn"
let
issn
=
fromMaybe
[]
issn'
issn_l
<-
v
.:?
"issn_l"
type_
<-
v
.:
"type"
pure
$
DehydratedSource
{
..
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment