Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pubmed
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
2
Issues
2
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
crawlers
pubmed
Commits
31cb4d28
Commit
31cb4d28
authored
Nov 28, 2022
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Main.hs works now, returning list of docs
parent
47ed870c
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
81 additions
and
73 deletions
+81
-73
Main.hs
app/Main.hs
+9
-5
PUBMED.hs
src/PUBMED.hs
+53
-62
Client.hs
src/PUBMED/Client.hs
+6
-4
Parser.hs
src/PUBMED/Parser.hs
+1
-1
Types.hs
src/PUBMED/Types.hs
+12
-1
No files found.
app/Main.hs
View file @
31cb4d28
...
...
@@ -63,18 +63,22 @@ main = run =<< execParser opts
run
::
Command
->
IO
()
run
(
Meta
(
MetaParams
{
mp_mAPIKey
,
mp_term
}))
=
do
let
config
=
Config
{
mAPIKey
=
Text
.
pack
<$>
mp_mAPIKey
}
let
config
=
Config
{
apiKey
=
Text
.
pack
<$>
mp_mAPIKey
,
query
=
Text
.
pack
mp_term
,
perPage
=
Nothing
}
eDocs
<-
runReaderT
(
getMetadataWithC
(
Text
.
pack
mp_term
)
Nothing
)
config
eDocs
<-
runReaderT
getMetadataWithC
config
case
eDocs
of
Left
err
->
print
err
Right
(
count
,
_docsC
)
->
print
$
show
count
run
(
Fetch
(
FetchParams
{
fp_mAPIKey
,
fp_term
,
fp_limit
}))
=
do
let
config
=
Config
{
mAPIKey
=
Text
.
pack
<$>
fp_mAPIKey
}
let
config
=
Config
{
apiKey
=
Text
.
pack
<$>
fp_mAPIKey
,
query
=
Text
.
pack
fp_term
,
perPage
=
Nothing
}
eDocs
<-
runReaderT
(
getMetadataWithC
(
Text
.
pack
fp_term
)
(
Just
$
fromIntegral
fp_limit
))
config
eDocs
<-
runReaderT
getMetadataWithC
config
case
eDocs
of
Left
err
->
print
err
Right
(
count
,
docsC
)
->
do
print
$
show
count
runConduit
$
docsC
.|
mapM_C
print
runConduit
$
docsC
.|
takeC
fp_limit
.|
mapM_C
print
src/PUBMED.hs
View file @
31cb4d28
...
...
@@ -32,87 +32,60 @@ pmSearchPath = "entrez/eutils"
pmPort
::
Int
pmPort
=
443
batchSize
::
Int
batchSize
=
200
defaultClientEnv
::
IO
ClientEnv
defaultClientEnv
=
do
manager'
<-
newManager
tlsManagerSettings
pure
$
mkClientEnv
manager'
$
BaseUrl
Https
pmHost
pmPort
pmSearchPath
-- | API main function
getMetadataWith
::
Text
->
Maybe
Integer
->
Maybe
Limit
->
Env
(
Either
Text
[
PubMed
])
getMetadataWith
=
runSimpleFindPubmedAbstractRequest
getMetadataWith
::
Env
(
Either
Text
[
PubMed
])
getMetadataWith
=
runSimpleFindPubmedAbstractRequest
Nothing
getMetadataWithC
::
Text
->
Maybe
Limit
->
Env
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
PubMed
IO
()
))
getMetadataWithC
query
limit
=
do
config
@
(
Config
{
mAPIKey
=
mAPIKey
})
<-
ask
getMetadataWithC
::
Env
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
PubMed
IO
()
))
getMetadataWithC
=
do
config
@
(
Config
{
apiKey
,
query
,
perPage
})
<-
ask
liftIO
$
do
env
<-
defaultClientEnv
-- First, estimate the total number of documents
eRes
<-
runClientM
(
search
mAPI
Key
(
Just
query
)
Nothing
(
Just
1
))
env
pure
$
get'
config
env
query
limit
batchSize
<$>
eRes
eRes
<-
runClientM
(
search
api
Key
(
Just
query
)
Nothing
(
Just
1
))
env
pure
$
get'
config
env
(
fromMaybe
defaultPerPage
perPage
)
<$>
eRes
where
get'
::
Config
->
ClientEnv
->
Text
->
Maybe
Limit
->
Int
->
PerPage
->
BsXml
->
(
Maybe
Integer
,
ConduitT
()
PubMed
IO
()
)
get'
config
env
q
l
perPage
(
BsXml
res
)
=
get'
config
env
perPage
(
BsXml
res
)
=
(
Just
numResults
,
yieldMany
[
0
..
]
.|
takeC
(
fromInteger
numPages
)
.|
concatMapMC
(
\
pageNum
->
runReaderT
(
getPage
q
perPage
pageNum
)
config
))
.|
concatMapMC
(
\
pageNum
->
runReaderT
(
getPage
pageNum
)
config
))
where
numResults
=
fromMaybe
0
$
parseDocCount
$
TL
.
fromStrict
$
TE
.
decodeUtf8
$
LBS
.
toStrict
res
numPages
=
numResults
`
div
`
(
fromIntegral
perPage
)
+
1
getPage
::
Text
->
Int
->
Int
getPage
::
Integer
->
Env
[
PubMed
]
getPage
q
perPage
pageNum
=
do
getPage
pageNum
=
do
(
Config
{
perPage
=
mPerPage
})
<-
ask
let
perPage
=
fromMaybe
defaultPerPage
mPerPage
let
offset
=
fromIntegral
$
pageNum
*
perPage
liftIO
$
print
$
"[getPage] getting page "
<>
show
pageNum
<>
", offset: "
<>
show
offset
<>
", perPage: "
<>
show
perPage
eDocs
<-
runSimpleFindPubmedAbstractRequest
q
(
Just
offset
)
(
Just
$
fromIntegral
perPage
)
liftIO
$
case
eDocs
of
Left
err
->
panic
$
"[getPage] error: "
<>
show
err
Right
docs
->
liftIO
$
do
_
<-
threadDelay
2000000
-- One seconds
print
$
"[getPage] docs length: "
<>
show
(
length
docs
)
pure
docs
-- | TODO this parser need at least one subs at the end
-- (use endOfInput)
removeSub
::
Parser
ByteString
removeSub
=
do
dt
<-
many
textWithBalise
eo
<-
manyTill
anyChar
endOfInput
pure
$
LBS
.
fromStrict
$
pack
$
concat
dt
<>
eo
where
textWithBalise
=
manyTill
anyChar
(
sub
<|>
asub
)
sub
=
string
"<sub>"
<|>
string
"<sup>"
<|>
string
"<i>"
<|>
string
"<b>"
asub
=
string
"</sub>"
<|>
string
"</sup>"
<|>
string
"</i>"
<|>
string
"</b>"
type
Query
=
Text
type
Limit
=
Integer
eDocs
<-
runSimpleFindPubmedAbstractRequest
(
Just
offset
)
case
eDocs
of
Left
err
->
panic
$
"[getPage] error: "
<>
show
err
Right
docs
->
liftIO
$
do
_
<-
threadDelay
2000000
-- One seconds
print
$
"[getPage] docs length: "
<>
show
(
length
docs
)
pure
docs
runMultipleFPAR
::
[
Integer
]
->
Env
(
Either
Text
[
PubMed
])
runMultipleFPAR
[]
=
pure
$
Right
[]
runMultipleFPAR
ids
=
do
List
.
foldl1'
concat'
<$>
mapM
runSimpleFetchPubmedAbstractRequest
(
by
batchSize
ids
)
(
Config
{
perPage
=
mPerPage
})
<-
ask
let
perPage
=
fromInteger
$
fromMaybe
defaultPerPage
mPerPage
List
.
foldl1'
concat'
<$>
mapM
runSimpleFetchPubmedAbstractRequest
(
by
perPage
ids
)
where
by
_
[]
=
[]
by
n
ns
=
head'
:
(
by
n
tail'
)
...
...
@@ -125,11 +98,11 @@ runMultipleFPAR ids = do
runSimpleFetchPubmedAbstractRequest
::
[
Integer
]
->
Env
(
Either
Text
[
PubMed
])
runSimpleFetchPubmedAbstractRequest
ids
=
do
(
Config
{
mAPIKey
=
mAPI
Key
})
<-
ask
(
Config
{
api
Key
})
<-
ask
liftIO
$
do
env
<-
defaultClientEnv
res
<-
runClientM
(
fetch
mAPI
Key
(
Just
"pubmed"
)
(
Just
"abstract"
)
ids
)
(
fetch
api
Key
(
Just
"pubmed"
)
(
Just
"abstract"
)
ids
)
env
case
res
of
(
Left
err
)
->
pure
(
Left
.
T
.
pack
$
show
err
)
...
...
@@ -145,25 +118,23 @@ runSimpleFetchPubmedAbstractRequest ids = do
-- pure []) :: XmlException -> IO [PubMed])
-- Right <$> pure parsed
runSimpleFindPubmedAbstractRequest
::
Text
->
Maybe
Integer
->
Maybe
Limit
runSimpleFindPubmedAbstractRequest
::
Maybe
Integer
->
Env
(
Either
Text
[
PubMed
])
runSimpleFindPubmedAbstractRequest
query
offset
limi
t
=
do
eDocIds
<-
searchDocIds
query
offset
limi
t
runSimpleFindPubmedAbstractRequest
offse
t
=
do
eDocIds
<-
searchDocIds
offse
t
case
eDocIds
of
Left
err
->
pure
$
Left
err
Right
docIds
->
do
liftIO
$
print
$
"[runSimpleFindPubmedAbstractRequest] docIds"
<>
show
docIds
runMultipleFPAR
docIds
searchDocIds
::
Text
->
Maybe
Integer
->
Maybe
Limit
->
Env
(
Either
Text
[
Integer
])
searchDocIds
query
offset
limi
t
=
do
(
Config
{
mAPIKey
=
mAPIKey
})
<-
ask
searchDocIds
::
Maybe
Integer
->
Env
(
Either
Text
[
Integer
])
searchDocIds
offse
t
=
do
(
Config
{
apiKey
,
query
,
perPage
})
<-
ask
liftIO
$
do
env
<-
defaultClientEnv
res
<-
runClientM
(
search
mAPIKey
(
Just
query
)
offset
limit
)
(
search
apiKey
(
Just
query
)
offset
perPage
)
env
case
res
of
(
Left
err
)
->
pure
(
Left
$
T
.
pack
$
show
err
)
...
...
@@ -173,3 +144,23 @@ searchDocIds query offset limit = do
--runParser :: Show res => (Cursor -> res) -> LBS.ByteString -> res
--runParser parser = parser . fromDocument . parseLBS_ def
-- | TODO this parser need at least one subs at the end
-- (use endOfInput)
removeSub
::
Parser
ByteString
removeSub
=
do
dt
<-
many
textWithBalise
eo
<-
manyTill
anyChar
endOfInput
pure
$
LBS
.
fromStrict
$
pack
$
concat
dt
<>
eo
where
textWithBalise
=
manyTill
anyChar
(
sub
<|>
asub
)
sub
=
string
"<sub>"
<|>
string
"<sup>"
<|>
string
"<i>"
<|>
string
"<b>"
asub
=
string
"</sub>"
<|>
string
"</sup>"
<|>
string
"</i>"
<|>
string
"</b>"
src/PUBMED/Client.hs
View file @
31cb4d28
...
...
@@ -7,6 +7,8 @@ import Servant.Client
import
qualified
Data.Text
as
T
import
qualified
Network.HTTP.Media
as
M
import
PUBMED.Types
(
APIKey
,
Query
,
PerPage
)
data
DB
=
PUBMED
newtype
BsXml
=
BsXml
ByteString
...
...
@@ -22,14 +24,14 @@ type PUBMEDAPI =
"esearch.fcgi"
-- :> QueryParam "db" DB
-- not mandatory since the base db is pubmed
:>
QueryParam
"api_key"
T
.
Text
:>
QueryParam
"api_key"
APIKey
:>
QueryParam
"term"
T
.
Text
:>
QueryParam
"retstart"
Integer
:>
QueryParam
"retmax"
Integer
:>
Get
'[
B
sXml
]
BsXml
:<|>
"efetch.fcgi"
:>
QueryParam
"api_key"
T
.
Text
:>
QueryParam
"api_key"
APIKey
:>
QueryParam
"db"
T
.
Text
:>
QueryParam
"rettype"
T
.
Text
:>
QueryParams
"id"
Integer
...
...
@@ -38,13 +40,13 @@ type PUBMEDAPI =
pubmedApi
::
Proxy
PUBMEDAPI
pubmedApi
=
Proxy
search
::
Maybe
T
.
Text
search
::
Maybe
APIKey
->
Maybe
T
.
Text
->
Maybe
Integer
->
Maybe
Integer
->
ClientM
BsXml
fetch
::
Maybe
T
.
Text
fetch
::
Maybe
APIKey
->
Maybe
T
.
Text
->
Maybe
T
.
Text
->
[
Integer
]
...
...
src/PUBMED/Parser.hs
View file @
31cb4d28
...
...
@@ -16,7 +16,7 @@ import Panic (panic)
import
qualified
Text.Read
as
TR
import
qualified
Text.Taggy.Lens
as
TTL
namedEl
name
=
TTL
.
elements
.
TTL
.
named
(
only
name
)
namedEl
name
=
TTL
.
elements
.
TTL
.
named
(
only
name
)
contentWithChildren
::
Prism'
Node
T
.
Text
contentWithChildren
=
prism'
NodeContent
$
\
case
{
NodeContent
c
->
Just
c
...
...
src/PUBMED/Types.hs
View file @
31cb4d28
...
...
@@ -3,8 +3,19 @@ module PUBMED.Types where
import
Control.Monad.Reader
(
ReaderT
)
import
Data.Text
(
Text
)
type
APIKey
=
Text
type
Query
=
Text
type
PerPage
=
Integer
data
Config
=
Config
{
mAPIKey
::
Maybe
Text
apiKey
::
Maybe
APIKey
,
query
::
Query
,
perPage
::
Maybe
PerPage
}
type
Env
=
ReaderT
Config
IO
defaultPerPage
::
PerPage
defaultPerPage
=
200
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment