Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pubmed
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
2
Issues
2
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
crawlers
pubmed
Commits
9a375182
Commit
9a375182
authored
May 13, 2019
by
Mael NICOLAS
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add parser for search, took gg's parser for fetch
parent
eb248441
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
216 additions
and
5 deletions
+216
-5
Main.hs
app/Main.hs
+18
-2
package.yaml
package.yaml
+6
-0
PUBMED.hs
src/PUBMED.hs
+4
-0
Client.hs
src/PUBMED/Client.hs
+4
-2
Parser.hs
src/PUBMED/Parser.hs
+180
-0
stack.yaml
stack.yaml
+4
-1
No files found.
app/Main.hs
View file @
9a375182
...
...
@@ -3,17 +3,33 @@
module
Main
where
import
PUBMED.Client
import
PUBMED.Parser
import
Network.HTTP.Client
(
newManager
)
import
Network.HTTP.Client.TLS
(
tlsManagerSettings
)
import
Servant.Client
(
runClientM
,
mkClientEnv
,
BaseUrl
(
..
),
Scheme
(
..
))
import
Text.XML
(
parseLBS_
,
def
)
import
Text.XML.Cursor
(
fromDocument
,
Cursor
)
import
qualified
Data.ByteString.Lazy
as
LBS
runParser
::
Show
res
=>
(
Cursor
->
res
)
->
LBS
.
ByteString
->
res
runParser
parser
=
parser
.
fromDocument
.
parseLBS_
def
main
::
IO
()
main
=
do
manager'
<-
newManager
tlsManagerSettings
res
<-
runClientM
(
fetch
(
Just
"pubmed"
)
[
"31059770"
,
"31059556"
]
)
(
search
(
Just
"organ"
)
)
(
mkClientEnv
manager'
$
BaseUrl
Https
"eutils.ncbi.nlm.nih.gov"
443
"entrez/eutils"
)
case
res
of
(
Left
err
)
->
print
err
(
Right
ok
)
->
print
ok
(
Right
(
BsXml
docs
))
->
do
let
docIds
=
runParser
parseDocId
docs
res'
<-
runClientM
(
fetch
(
Just
"pubmed"
)
(
Just
"abstract"
)
docIds
)
(
mkClientEnv
manager'
$
BaseUrl
Https
"eutils.ncbi.nlm.nih.gov"
443
"entrez/eutils"
)
case
res'
of
(
Left
err
)
->
print
err
(
Right
(
BsXml
abstracts
))
->
do
pmeds
<-
pubMedParser
abstracts
print
pmeds
package.yaml
View file @
9a375182
...
...
@@ -29,6 +29,12 @@ dependencies:
-
http-client
-
http-client-tls
-
http-media
-
exceptions
-
conduit
-
xml-types
-
time
-
data-time-segment
-
protolude
library
:
source-dirs
:
src
...
...
src/PUBMED.hs
0 → 100644
View file @
9a375182
module
PUBMED
where
import
PUBMED.Client
import
PUBMED.Parser
src/PUBMED/Client.hs
View file @
9a375182
...
...
@@ -34,7 +34,8 @@ type PUBMEDAPI =
:<|>
"efetch.fcgi"
:>
QueryParam
"db"
T
.
Text
:>
QueryParams
"id"
T
.
Text
:>
QueryParam
"rettype"
T
.
Text
:>
QueryParams
"id"
Integer
:>
Get
'[
B
sXml
]
BsXml
pubmedApi
::
Proxy
PUBMEDAPI
...
...
@@ -43,6 +44,7 @@ pubmedApi = Proxy
search
::
Maybe
T
.
Text
->
ClientM
BsXml
fetch
::
Maybe
T
.
Text
->
[
T
.
Text
]
->
Maybe
T
.
Text
->
[
Integer
]
->
ClientM
BsXml
search
:<|>
fetch
=
client
pubmedApi
src/PUBMED/Parser.hs
0 → 100644
View file @
9a375182
{-# LANGUAGE OverloadedStrings #-}
module
PUBMED.Parser
where
import
Text.XML.Stream.Parse
import
qualified
Text.XML.Cursor
as
C
-- ((&/), (&//), Cursor, content, element)
import
Text.XML
(
Name
)
import
Data.Either
(
rights
)
import
Data.Maybe
(
Maybe
)
import
Data.Monoid
(
mconcat
)
import
Data.Conduit
(
runConduit
,
(
.|
),
ConduitT
)
import
Data.Text
(
Text
,
unpack
)
import
Data.XML.Types
(
Event
)
import
Data.Time.Segment
(
jour
)
import
Data.Time
(
UTCTime
(
..
))
import
GHC.IO
(
FilePath
)
import
Protolude
(
head
)
import
Prelude
hiding
(
head
)
import
Control.Monad.Catch
(
MonadThrow
)
import
Control.Monad
(
join
)
import
qualified
Data.Text
as
T
import
qualified
Data.Text.Read
as
T
import
qualified
Data.ByteString.Lazy
as
DBL
import
qualified
Data.Conduit.List
as
CL
parseDocId
::
C
.
Cursor
->
[
Integer
]
parseDocId
cursor
=
fst
<$>
rights
(
T
.
decimal
<$>
filter
notNullOrEOL
(
rawElement
cursor
)
)
where
rawElement
=
C
.
element
"eSearchResult"
C
.&/
C
.
element
"IdList"
C
.&//
C
.
content
notNullOrEOL
t
=
not
(
T
.
null
t
)
&&
t
/=
"
\n
"
identity
::
a
->
a
identity
x
=
x
manyTagsUntil
::
MonadThrow
m
=>
Name
->
ConduitT
Event
o
m
b
->
ConduitT
Event
o
m
(
Maybe
b
)
manyTagsUntil
name
f
=
do
_
<-
manyTagsUntil_
name
tagIgnoreAttrs
(
matching
(
==
name
))
f
-- | Utility function that matches everything but the tag given
tagUntil
::
Name
->
NameMatcher
Name
tagUntil
name
=
matching
(
/=
name
)
-- | Utility function that consumes everything but the tag given
-- usefull because we have to consume every data.
manyTagsUntil_
::
MonadThrow
m
=>
Name
->
ConduitT
Event
o
m
()
manyTagsUntil_
=
many_
.
ignoreTreeContent
.
tagUntil
manyTagsUntil_'
::
MonadThrow
m
=>
Name
->
ConduitT
Event
o
m
()
manyTagsUntil_'
=
many_
.
ignoreEmptyTag
.
tagUntil
data
PubMed
=
PubMed
{
pubmed_article
::
PubMedArticle
,
pubmed_date
Protolude
::
PubMedData
}
deriving
Show
data
PubMedArticle
=
PubMedArticle
{
pubmed_title
::
Maybe
T
.
Text
,
pubmed_journal
::
Maybe
T
.
Text
,
pubmed_abstract
::
Maybe
[
T
.
Text
]
}
deriving
(
Show
)
data
PubMedData
=
PubMedData
{
pubmedData_date
::
UTCTime
,
pubmedData_year
::
Integer
,
pubmedData_month
::
Int
,
pubmedData_day
::
Int
}
deriving
(
Show
)
readPubMedFile
::
FilePath
->
IO
[
PubMed
]
readPubMedFile
fp
=
do
input
<-
DBL
.
readFile
fp
pubMedParser
input
pubMedParser
::
DBL
.
ByteString
->
IO
[
PubMed
]
pubMedParser
bstring
=
runConduit
$
parseLBS
def
bstring
.|
parseArticleSet
.|
CL
.
consume
parseArticleSet
::
MonadThrow
m
=>
ConduitT
Event
PubMed
m
()
parseArticleSet
=
do
as
<-
force
"force"
$
tagIgnoreAttrs
"PubmedArticleSet"
$
manyYield
parsePubMedArticle
return
as
parsePubMedArticle
::
MonadThrow
m
=>
ConduitT
Event
o
m
(
Maybe
PubMed
)
parsePubMedArticle
=
do
articles
<-
tagIgnoreAttrs
"PubmedArticle"
parsePubMedArticle'
return
articles
parsePubMedArticle'
::
MonadThrow
m
=>
ConduitT
Event
o
m
(
PubMed
)
parsePubMedArticle'
=
do
article
<-
force
"MedlineCitation"
$
tagIgnoreAttrs
"MedlineCitation"
parseMedlineCitation
dates
<-
tagIgnoreAttrs
"PubmedData"
$
do
dates'
<-
tagIgnoreAttrs
"History"
$
many
$
tagIgnoreAttrs
"PubMedPubDate"
$
do
y'
<-
force
"Year"
$
tagIgnoreAttrs
"Year"
content
m'
<-
force
"Month"
$
tagIgnoreAttrs
"Month"
content
d'
<-
force
"Day"
$
tagIgnoreAttrs
"Day"
content
_
<-
many
$
ignoreAnyTreeContent
return
(
read
$
unpack
y'
,
read
$
unpack
m'
,
read
$
unpack
d'
)
_
<-
many
$
ignoreAnyTreeContent
return
dates'
_
<-
many
$
ignoreAnyTreeContent
let
(
y
,
m
,
d
)
=
maybe
(
1
,
1
,
1
)
identity
$
join
$
fmap
head
$
reverse
<$>
join
dates
return
$
PubMed
(
article
)
(
PubMedData
(
jour
y
m
d
)
y
m
d
)
parseMedlineCitation
::
MonadThrow
m
=>
ConduitT
Event
o
m
PubMedArticle
parseMedlineCitation
=
do
a
<-
force
"article"
$
manyTagsUntil
"Article"
parseArticle
_
<-
many
$
ignoreAnyTreeContent
return
a
parseArticle
::
MonadThrow
m
=>
ConduitT
Event
o
m
PubMedArticle
parseArticle
=
do
journal
<-
force
"journal"
$
manyTagsUntil
"Journal"
$
do
j
<-
manyTagsUntil
"Title"
content
_
<-
many
$
ignoreAnyTreeContent
return
j
title
<-
do
t
<-
manyTagsUntil
"ArticleTitle"
content
return
t
abstracts
<-
do
as
<-
manyTagsUntil
"Abstract"
$
many
$
do
txt
<-
tagIgnoreAttrs
"AbstractT.Text"
$
do
c
<-
content
_
<-
many
$
ignoreAnyTreeContent
return
c
_
<-
many
$
ignoreAnyTreeContent
return
txt
return
as
-- TODO add authos
_
<-
many
$
ignoreAnyTreeContent
return
$
PubMedArticle
title
journal
abstracts
pubMedData
::
DBL
.
ByteString
pubMedData
=
mconcat
[
"<?xml version=
\"
1.0
\"
?>
\n
"
,
"<!DOCTYPE PubmedArticleSet PUBLIC
\"
-//NLM//DTD PubMedArticle, 1st June 2018//EN
\"
\"
https://dtd.nlm.nih.gov/ncbi/pubmed/out/pubmed_180601.dtd
\"
>
\n
"
,
"<PubmedArticleSet>
\n
"
,
"<PubmedArticle>
\n
"
,
" <MedlineCitation Status=
\"
Publisher
\"
Owner=
\"
NLM
\"
>
\n
"
,
" <PMID Version=
\"
1
\"
>30357468</PMID>
\n
"
,
" <DateRevised>
\n
"
,
" <Year>2018</Year>
\n
"
,
" </DateRevised>
\n
"
,
" <Article PubModel=
\"
Print-Electronic
\"
>
\n
"
,
" <Journal>
\n
"
,
" <ISSN IssnType=
\"
Electronic
\"
>1432-1076</ISSN>
\n
"
,
" <Title>European journal of pediatrics</Title>
\n
"
,
" </Journal>
\n
"
,
" <ArticleTitle>Title of the Article</ArticleTitle>
\n
"
,
" <ELocationID EIdType=
\"
doi
\"
ValidYN=
\"
Y
\"
>10.1007/s00431-018-3270-3</ELocationID>
\n
"
,
" <Abstract>
\n
"
,
" <AbstractText>Abstract Text.</AbstractText>
\n
"
,
" </Abstract>
\n
"
,
" <AuthorList>
\n
"
,
" </AuthorList>
\n
"
,
" </Article>
\n
"
,
" </MedlineCitation>
\n
"
,
" <PubmedData>
\n
"
,
" <History>
\n
"
,
" </History>
\n
"
,
" </PubmedData>
\n
"
,
"</PubmedArticle>
\n
"
,
"</PubmedArticleSet>
\n
"
]
stack.yaml
View file @
9a375182
...
...
@@ -37,7 +37,10 @@ packages:
# Dependency packages to be pulled from upstream that are not in the resolver
# using the same syntax as the packages field.
# (e.g., acme-missiles-0.3)
# extra-deps: []
extra-deps
:
-
git
:
https://github.com/delanoe/data-time-segment.git
commit
:
4e3d57d80e9dfe6624c8eeaa8595fc8fe64d8723
# Override default flag values for local packages and extra-deps
# flags: {}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment