Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pubmed
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
2
Issues
2
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
crawlers
pubmed
Commits
31cb4d28
Commit
31cb4d28
authored
Nov 28, 2022
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Main.hs works now, returning list of docs
parent
47ed870c
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
81 additions
and
73 deletions
+81
-73
Main.hs
app/Main.hs
+9
-5
PUBMED.hs
src/PUBMED.hs
+53
-62
Client.hs
src/PUBMED/Client.hs
+6
-4
Parser.hs
src/PUBMED/Parser.hs
+1
-1
Types.hs
src/PUBMED/Types.hs
+12
-1
No files found.
app/Main.hs
View file @
31cb4d28
...
@@ -63,18 +63,22 @@ main = run =<< execParser opts
...
@@ -63,18 +63,22 @@ main = run =<< execParser opts
run
::
Command
->
IO
()
run
::
Command
->
IO
()
run
(
Meta
(
MetaParams
{
mp_mAPIKey
,
mp_term
}))
=
do
run
(
Meta
(
MetaParams
{
mp_mAPIKey
,
mp_term
}))
=
do
let
config
=
Config
{
mAPIKey
=
Text
.
pack
<$>
mp_mAPIKey
}
let
config
=
Config
{
apiKey
=
Text
.
pack
<$>
mp_mAPIKey
,
query
=
Text
.
pack
mp_term
,
perPage
=
Nothing
}
eDocs
<-
runReaderT
(
getMetadataWithC
(
Text
.
pack
mp_term
)
Nothing
)
config
eDocs
<-
runReaderT
getMetadataWithC
config
case
eDocs
of
case
eDocs
of
Left
err
->
print
err
Left
err
->
print
err
Right
(
count
,
_docsC
)
->
print
$
show
count
Right
(
count
,
_docsC
)
->
print
$
show
count
run
(
Fetch
(
FetchParams
{
fp_mAPIKey
,
fp_term
,
fp_limit
}))
=
do
run
(
Fetch
(
FetchParams
{
fp_mAPIKey
,
fp_term
,
fp_limit
}))
=
do
let
config
=
Config
{
mAPIKey
=
Text
.
pack
<$>
fp_mAPIKey
}
let
config
=
Config
{
apiKey
=
Text
.
pack
<$>
fp_mAPIKey
,
query
=
Text
.
pack
fp_term
,
perPage
=
Nothing
}
eDocs
<-
runReaderT
(
getMetadataWithC
(
Text
.
pack
fp_term
)
(
Just
$
fromIntegral
fp_limit
))
config
eDocs
<-
runReaderT
getMetadataWithC
config
case
eDocs
of
case
eDocs
of
Left
err
->
print
err
Left
err
->
print
err
Right
(
count
,
docsC
)
->
do
Right
(
count
,
docsC
)
->
do
print
$
show
count
print
$
show
count
runConduit
$
docsC
.|
mapM_C
print
runConduit
$
docsC
.|
takeC
fp_limit
.|
mapM_C
print
src/PUBMED.hs
View file @
31cb4d28
...
@@ -32,87 +32,60 @@ pmSearchPath = "entrez/eutils"
...
@@ -32,87 +32,60 @@ pmSearchPath = "entrez/eutils"
pmPort
::
Int
pmPort
::
Int
pmPort
=
443
pmPort
=
443
batchSize
::
Int
batchSize
=
200
defaultClientEnv
::
IO
ClientEnv
defaultClientEnv
::
IO
ClientEnv
defaultClientEnv
=
do
defaultClientEnv
=
do
manager'
<-
newManager
tlsManagerSettings
manager'
<-
newManager
tlsManagerSettings
pure
$
mkClientEnv
manager'
$
BaseUrl
Https
pmHost
pmPort
pmSearchPath
pure
$
mkClientEnv
manager'
$
BaseUrl
Https
pmHost
pmPort
pmSearchPath
-- | API main function
-- | API main function
getMetadataWith
::
Text
->
Maybe
Integer
->
Maybe
Limit
->
Env
(
Either
Text
[
PubMed
])
getMetadataWith
::
Env
(
Either
Text
[
PubMed
])
getMetadataWith
=
runSimpleFindPubmedAbstractRequest
getMetadataWith
=
runSimpleFindPubmedAbstractRequest
Nothing
getMetadataWithC
::
Text
getMetadataWithC
::
Env
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
PubMed
IO
()
))
->
Maybe
Limit
getMetadataWithC
=
do
->
Env
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
PubMed
IO
()
))
config
@
(
Config
{
apiKey
,
query
,
perPage
})
<-
ask
getMetadataWithC
query
limit
=
do
config
@
(
Config
{
mAPIKey
=
mAPIKey
})
<-
ask
liftIO
$
do
liftIO
$
do
env
<-
defaultClientEnv
env
<-
defaultClientEnv
-- First, estimate the total number of documents
-- First, estimate the total number of documents
eRes
<-
runClientM
(
search
mAPI
Key
(
Just
query
)
Nothing
(
Just
1
))
env
eRes
<-
runClientM
(
search
api
Key
(
Just
query
)
Nothing
(
Just
1
))
env
pure
$
get'
config
env
query
limit
batchSize
<$>
eRes
pure
$
get'
config
env
(
fromMaybe
defaultPerPage
perPage
)
<$>
eRes
where
where
get'
::
Config
get'
::
Config
->
ClientEnv
->
ClientEnv
->
Text
->
PerPage
->
Maybe
Limit
->
Int
->
BsXml
->
BsXml
->
(
Maybe
Integer
,
ConduitT
()
PubMed
IO
()
)
->
(
Maybe
Integer
,
ConduitT
()
PubMed
IO
()
)
get'
config
env
q
l
perPage
(
BsXml
res
)
=
get'
config
env
perPage
(
BsXml
res
)
=
(
Just
numResults
(
Just
numResults
,
yieldMany
[
0
..
]
,
yieldMany
[
0
..
]
.|
takeC
(
fromInteger
numPages
)
.|
takeC
(
fromInteger
numPages
)
.|
concatMapMC
(
\
pageNum
->
runReaderT
(
getPage
q
perPage
pageNum
)
config
))
.|
concatMapMC
(
\
pageNum
->
runReaderT
(
getPage
pageNum
)
config
))
where
where
numResults
=
fromMaybe
0
$
parseDocCount
$
TL
.
fromStrict
$
TE
.
decodeUtf8
$
LBS
.
toStrict
res
numResults
=
fromMaybe
0
$
parseDocCount
$
TL
.
fromStrict
$
TE
.
decodeUtf8
$
LBS
.
toStrict
res
numPages
=
numResults
`
div
`
(
fromIntegral
perPage
)
+
1
numPages
=
numResults
`
div
`
(
fromIntegral
perPage
)
+
1
getPage
::
Text
getPage
::
Integer
->
Int
->
Int
->
Env
[
PubMed
]
->
Env
[
PubMed
]
getPage
q
perPage
pageNum
=
do
getPage
pageNum
=
do
(
Config
{
perPage
=
mPerPage
})
<-
ask
let
perPage
=
fromMaybe
defaultPerPage
mPerPage
let
offset
=
fromIntegral
$
pageNum
*
perPage
let
offset
=
fromIntegral
$
pageNum
*
perPage
liftIO
$
print
$
"[getPage] getting page "
<>
show
pageNum
<>
", offset: "
<>
show
offset
<>
", perPage: "
<>
show
perPage
liftIO
$
print
$
"[getPage] getting page "
<>
show
pageNum
<>
", offset: "
<>
show
offset
<>
", perPage: "
<>
show
perPage
eDocs
<-
runSimpleFindPubmedAbstractRequest
q
(
Just
offset
)
(
Just
$
fromIntegral
perPage
)
eDocs
<-
runSimpleFindPubmedAbstractRequest
(
Just
offset
)
liftIO
$
case
eDocs
of
case
eDocs
of
Left
err
->
panic
$
"[getPage] error: "
<>
show
err
Left
err
->
panic
$
"[getPage] error: "
<>
show
err
Right
docs
->
liftIO
$
do
Right
docs
->
liftIO
$
do
_
<-
threadDelay
2000000
-- One seconds
_
<-
threadDelay
2000000
-- One seconds
print
$
"[getPage] docs length: "
<>
show
(
length
docs
)
print
$
"[getPage] docs length: "
<>
show
(
length
docs
)
pure
docs
pure
docs
-- | TODO this parser need at least one subs at the end
-- (use endOfInput)
removeSub
::
Parser
ByteString
removeSub
=
do
dt
<-
many
textWithBalise
eo
<-
manyTill
anyChar
endOfInput
pure
$
LBS
.
fromStrict
$
pack
$
concat
dt
<>
eo
where
textWithBalise
=
manyTill
anyChar
(
sub
<|>
asub
)
sub
=
string
"<sub>"
<|>
string
"<sup>"
<|>
string
"<i>"
<|>
string
"<b>"
asub
=
string
"</sub>"
<|>
string
"</sup>"
<|>
string
"</i>"
<|>
string
"</b>"
type
Query
=
Text
type
Limit
=
Integer
runMultipleFPAR
::
[
Integer
]
runMultipleFPAR
::
[
Integer
]
->
Env
(
Either
Text
[
PubMed
])
->
Env
(
Either
Text
[
PubMed
])
runMultipleFPAR
[]
=
pure
$
Right
[]
runMultipleFPAR
[]
=
pure
$
Right
[]
runMultipleFPAR
ids
=
do
runMultipleFPAR
ids
=
do
List
.
foldl1'
concat'
<$>
mapM
runSimpleFetchPubmedAbstractRequest
(
by
batchSize
ids
)
(
Config
{
perPage
=
mPerPage
})
<-
ask
let
perPage
=
fromInteger
$
fromMaybe
defaultPerPage
mPerPage
List
.
foldl1'
concat'
<$>
mapM
runSimpleFetchPubmedAbstractRequest
(
by
perPage
ids
)
where
where
by
_
[]
=
[]
by
_
[]
=
[]
by
n
ns
=
head'
:
(
by
n
tail'
)
by
n
ns
=
head'
:
(
by
n
tail'
)
...
@@ -125,11 +98,11 @@ runMultipleFPAR ids = do
...
@@ -125,11 +98,11 @@ runMultipleFPAR ids = do
runSimpleFetchPubmedAbstractRequest
::
[
Integer
]
runSimpleFetchPubmedAbstractRequest
::
[
Integer
]
->
Env
(
Either
Text
[
PubMed
])
->
Env
(
Either
Text
[
PubMed
])
runSimpleFetchPubmedAbstractRequest
ids
=
do
runSimpleFetchPubmedAbstractRequest
ids
=
do
(
Config
{
mAPIKey
=
mAPI
Key
})
<-
ask
(
Config
{
api
Key
})
<-
ask
liftIO
$
do
liftIO
$
do
env
<-
defaultClientEnv
env
<-
defaultClientEnv
res
<-
runClientM
res
<-
runClientM
(
fetch
mAPI
Key
(
Just
"pubmed"
)
(
Just
"abstract"
)
ids
)
(
fetch
api
Key
(
Just
"pubmed"
)
(
Just
"abstract"
)
ids
)
env
env
case
res
of
case
res
of
(
Left
err
)
->
pure
(
Left
.
T
.
pack
$
show
err
)
(
Left
err
)
->
pure
(
Left
.
T
.
pack
$
show
err
)
...
@@ -145,25 +118,23 @@ runSimpleFetchPubmedAbstractRequest ids = do
...
@@ -145,25 +118,23 @@ runSimpleFetchPubmedAbstractRequest ids = do
-- pure []) :: XmlException -> IO [PubMed])
-- pure []) :: XmlException -> IO [PubMed])
-- Right <$> pure parsed
-- Right <$> pure parsed
runSimpleFindPubmedAbstractRequest
::
Text
runSimpleFindPubmedAbstractRequest
::
Maybe
Integer
->
Maybe
Integer
->
Maybe
Limit
->
Env
(
Either
Text
[
PubMed
])
->
Env
(
Either
Text
[
PubMed
])
runSimpleFindPubmedAbstractRequest
query
offset
limi
t
=
do
runSimpleFindPubmedAbstractRequest
offse
t
=
do
eDocIds
<-
searchDocIds
query
offset
limi
t
eDocIds
<-
searchDocIds
offse
t
case
eDocIds
of
case
eDocIds
of
Left
err
->
pure
$
Left
err
Left
err
->
pure
$
Left
err
Right
docIds
->
do
Right
docIds
->
do
liftIO
$
print
$
"[runSimpleFindPubmedAbstractRequest] docIds"
<>
show
docIds
liftIO
$
print
$
"[runSimpleFindPubmedAbstractRequest] docIds"
<>
show
docIds
runMultipleFPAR
docIds
runMultipleFPAR
docIds
searchDocIds
::
Text
->
Maybe
Integer
->
Maybe
Limit
->
Env
(
Either
Text
[
Integer
])
searchDocIds
::
Maybe
Integer
->
Env
(
Either
Text
[
Integer
])
searchDocIds
query
offset
limi
t
=
do
searchDocIds
offse
t
=
do
(
Config
{
mAPIKey
=
mAPIKey
})
<-
ask
(
Config
{
apiKey
,
query
,
perPage
})
<-
ask
liftIO
$
do
liftIO
$
do
env
<-
defaultClientEnv
env
<-
defaultClientEnv
res
<-
runClientM
res
<-
runClientM
(
search
mAPIKey
(
Just
query
)
offset
limit
)
(
search
apiKey
(
Just
query
)
offset
perPage
)
env
env
case
res
of
case
res
of
(
Left
err
)
->
pure
(
Left
$
T
.
pack
$
show
err
)
(
Left
err
)
->
pure
(
Left
$
T
.
pack
$
show
err
)
...
@@ -173,3 +144,23 @@ searchDocIds query offset limit = do
...
@@ -173,3 +144,23 @@ searchDocIds query offset limit = do
--runParser :: Show res => (Cursor -> res) -> LBS.ByteString -> res
--runParser :: Show res => (Cursor -> res) -> LBS.ByteString -> res
--runParser parser = parser . fromDocument . parseLBS_ def
--runParser parser = parser . fromDocument . parseLBS_ def
-- | TODO this parser need at least one subs at the end
-- (use endOfInput)
removeSub
::
Parser
ByteString
removeSub
=
do
dt
<-
many
textWithBalise
eo
<-
manyTill
anyChar
endOfInput
pure
$
LBS
.
fromStrict
$
pack
$
concat
dt
<>
eo
where
textWithBalise
=
manyTill
anyChar
(
sub
<|>
asub
)
sub
=
string
"<sub>"
<|>
string
"<sup>"
<|>
string
"<i>"
<|>
string
"<b>"
asub
=
string
"</sub>"
<|>
string
"</sup>"
<|>
string
"</i>"
<|>
string
"</b>"
src/PUBMED/Client.hs
View file @
31cb4d28
...
@@ -7,6 +7,8 @@ import Servant.Client
...
@@ -7,6 +7,8 @@ import Servant.Client
import
qualified
Data.Text
as
T
import
qualified
Data.Text
as
T
import
qualified
Network.HTTP.Media
as
M
import
qualified
Network.HTTP.Media
as
M
import
PUBMED.Types
(
APIKey
,
Query
,
PerPage
)
data
DB
=
PUBMED
data
DB
=
PUBMED
newtype
BsXml
=
BsXml
ByteString
newtype
BsXml
=
BsXml
ByteString
...
@@ -22,14 +24,14 @@ type PUBMEDAPI =
...
@@ -22,14 +24,14 @@ type PUBMEDAPI =
"esearch.fcgi"
"esearch.fcgi"
-- :> QueryParam "db" DB
-- :> QueryParam "db" DB
-- not mandatory since the base db is pubmed
-- not mandatory since the base db is pubmed
:>
QueryParam
"api_key"
T
.
Text
:>
QueryParam
"api_key"
APIKey
:>
QueryParam
"term"
T
.
Text
:>
QueryParam
"term"
T
.
Text
:>
QueryParam
"retstart"
Integer
:>
QueryParam
"retstart"
Integer
:>
QueryParam
"retmax"
Integer
:>
QueryParam
"retmax"
Integer
:>
Get
'[
B
sXml
]
BsXml
:>
Get
'[
B
sXml
]
BsXml
:<|>
:<|>
"efetch.fcgi"
"efetch.fcgi"
:>
QueryParam
"api_key"
T
.
Text
:>
QueryParam
"api_key"
APIKey
:>
QueryParam
"db"
T
.
Text
:>
QueryParam
"db"
T
.
Text
:>
QueryParam
"rettype"
T
.
Text
:>
QueryParam
"rettype"
T
.
Text
:>
QueryParams
"id"
Integer
:>
QueryParams
"id"
Integer
...
@@ -38,13 +40,13 @@ type PUBMEDAPI =
...
@@ -38,13 +40,13 @@ type PUBMEDAPI =
pubmedApi
::
Proxy
PUBMEDAPI
pubmedApi
::
Proxy
PUBMEDAPI
pubmedApi
=
Proxy
pubmedApi
=
Proxy
search
::
Maybe
T
.
Text
search
::
Maybe
APIKey
->
Maybe
T
.
Text
->
Maybe
T
.
Text
->
Maybe
Integer
->
Maybe
Integer
->
Maybe
Integer
->
Maybe
Integer
->
ClientM
BsXml
->
ClientM
BsXml
fetch
::
Maybe
T
.
Text
fetch
::
Maybe
APIKey
->
Maybe
T
.
Text
->
Maybe
T
.
Text
->
Maybe
T
.
Text
->
Maybe
T
.
Text
->
[
Integer
]
->
[
Integer
]
...
...
src/PUBMED/Parser.hs
View file @
31cb4d28
...
@@ -16,7 +16,7 @@ import Panic (panic)
...
@@ -16,7 +16,7 @@ import Panic (panic)
import
qualified
Text.Read
as
TR
import
qualified
Text.Read
as
TR
import
qualified
Text.Taggy.Lens
as
TTL
import
qualified
Text.Taggy.Lens
as
TTL
namedEl
name
=
TTL
.
elements
.
TTL
.
named
(
only
name
)
namedEl
name
=
TTL
.
elements
.
TTL
.
named
(
only
name
)
contentWithChildren
::
Prism'
Node
T
.
Text
contentWithChildren
::
Prism'
Node
T
.
Text
contentWithChildren
=
prism'
NodeContent
$
\
case
{
NodeContent
c
->
Just
c
contentWithChildren
=
prism'
NodeContent
$
\
case
{
NodeContent
c
->
Just
c
...
...
src/PUBMED/Types.hs
View file @
31cb4d28
...
@@ -3,8 +3,19 @@ module PUBMED.Types where
...
@@ -3,8 +3,19 @@ module PUBMED.Types where
import
Control.Monad.Reader
(
ReaderT
)
import
Control.Monad.Reader
(
ReaderT
)
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
type
APIKey
=
Text
type
Query
=
Text
type
PerPage
=
Integer
data
Config
=
Config
{
data
Config
=
Config
{
mAPIKey
::
Maybe
Text
apiKey
::
Maybe
APIKey
,
query
::
Query
,
perPage
::
Maybe
PerPage
}
}
type
Env
=
ReaderT
Config
IO
type
Env
=
ReaderT
Config
IO
defaultPerPage
::
PerPage
defaultPerPage
=
200
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment