Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pubmed
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
2
Issues
2
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
crawlers
pubmed
Commits
1ffff0cb
Commit
1ffff0cb
authored
May 13, 2019
by
Mael NICOLAS
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add author in pubmed parser
parent
9a375182
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
47 additions
and
33 deletions
+47
-33
Main.hs
app/Main.hs
+9
-6
Parser.hs
src/PUBMED/Parser.hs
+38
-27
No files found.
app/Main.hs
View file @
1ffff0cb
...
@@ -11,15 +11,16 @@ import Text.XML (parseLBS_, def)
...
@@ -11,15 +11,16 @@ import Text.XML (parseLBS_, def)
import
Text.XML.Cursor
(
fromDocument
,
Cursor
)
import
Text.XML.Cursor
(
fromDocument
,
Cursor
)
import
qualified
Data.ByteString.Lazy
as
LBS
import
qualified
Data.ByteString.Lazy
as
LBS
import
qualified
Data.Text
as
T
runParser
::
Show
res
=>
(
Cursor
->
res
)
->
LBS
.
ByteString
->
res
runParser
::
Show
res
=>
(
Cursor
->
res
)
->
LBS
.
ByteString
->
res
runParser
parser
=
parser
.
fromDocument
.
parseLBS_
def
runParser
parser
=
parser
.
fromDocument
.
parseLBS_
def
main
::
IO
()
runSimpleFindPubmedAbstractRequest
::
T
.
Text
->
IO
()
main
=
do
runSimpleFindPubmedAbstractRequest
rq
=
do
manager'
<-
newManager
tlsManagerSettings
manager'
<-
newManager
tlsManagerSettings
res
<-
runClientM
res
<-
runClientM
(
search
(
Just
"organ"
))
(
search
(
Just
rq
))
(
mkClientEnv
manager'
$
BaseUrl
Https
"eutils.ncbi.nlm.nih.gov"
443
"entrez/eutils"
)
(
mkClientEnv
manager'
$
BaseUrl
Https
"eutils.ncbi.nlm.nih.gov"
443
"entrez/eutils"
)
case
res
of
case
res
of
(
Left
err
)
->
print
err
(
Left
err
)
->
print
err
...
@@ -30,6 +31,8 @@ main = do
...
@@ -30,6 +31,8 @@ main = do
(
mkClientEnv
manager'
$
BaseUrl
Https
"eutils.ncbi.nlm.nih.gov"
443
"entrez/eutils"
)
(
mkClientEnv
manager'
$
BaseUrl
Https
"eutils.ncbi.nlm.nih.gov"
443
"entrez/eutils"
)
case
res'
of
case
res'
of
(
Left
err
)
->
print
err
(
Left
err
)
->
print
err
(
Right
(
BsXml
abstracts
))
->
do
(
Right
(
BsXml
abstracts
))
->
pmeds
<-
pubMedParser
abstracts
print
=<<
pubMedParser
abstracts
print
pmeds
main
::
IO
()
main
=
runSimpleFindPubmedAbstractRequest
"organ"
src/PUBMED/Parser.hs
View file @
1ffff0cb
...
@@ -7,7 +7,7 @@ import qualified Text.XML.Cursor as C -- ((&/), (&//), Cursor, content, element)
...
@@ -7,7 +7,7 @@ import qualified Text.XML.Cursor as C -- ((&/), (&//), Cursor, content, element)
import
Text.XML
(
Name
)
import
Text.XML
(
Name
)
import
Data.Either
(
rights
)
import
Data.Either
(
rights
)
import
Data.Maybe
(
Maybe
)
import
Data.Maybe
(
Maybe
,
fromMaybe
)
import
Data.Monoid
(
mconcat
)
import
Data.Monoid
(
mconcat
)
import
Data.Conduit
(
runConduit
,
(
.|
),
ConduitT
)
import
Data.Conduit
(
runConduit
,
(
.|
),
ConduitT
)
import
Data.Text
(
Text
,
unpack
)
import
Data.Text
(
Text
,
unpack
)
...
@@ -59,18 +59,25 @@ manyTagsUntil_' = many_ . ignoreEmptyTag . tagUntil
...
@@ -59,18 +59,25 @@ manyTagsUntil_' = many_ . ignoreEmptyTag . tagUntil
data
PubMed
=
data
PubMed
=
PubMed
{
pubmed_article
::
PubMedArticle
PubMed
{
pubmed_article
::
PubMedArticle
,
pubmed_date
Protolude
,
pubmed_date
::
PubMedData
::
PubMedData
}
deriving
Show
}
deriving
Show
data
PubMedArticle
=
data
PubMedArticle
=
PubMedArticle
{
pubmed_title
::
Maybe
T
.
Text
PubMedArticle
{
pubmed_title
::
Maybe
T
.
Text
,
pubmed_journal
::
Maybe
T
.
Text
,
pubmed_journal
::
Maybe
T
.
Text
,
pubmed_abstract
::
Maybe
[
T
.
Text
]
,
pubmed_abstract
::
Maybe
[
T
.
Text
]
,
pubmed_authors
::
Maybe
[
Author
]
}
}
deriving
(
Show
)
deriving
(
Show
)
data
Author
=
Author
{
lastName
::
Maybe
T
.
Text
,
foreName
::
Maybe
T
.
Text
,
affiliation
::
Maybe
T
.
Text
}
deriving
(
Show
)
data
PubMedData
=
data
PubMedData
=
PubMedData
{
pubmedData_date
::
UTCTime
PubMedData
{
pubmedData_date
::
UTCTime
,
pubmedData_year
::
Integer
,
pubmedData_year
::
Integer
...
@@ -89,16 +96,14 @@ pubMedParser bstring = runConduit $ parseLBS def bstring
...
@@ -89,16 +96,14 @@ pubMedParser bstring = runConduit $ parseLBS def bstring
.|
CL
.
consume
.|
CL
.
consume
parseArticleSet
::
MonadThrow
m
=>
ConduitT
Event
PubMed
m
()
parseArticleSet
::
MonadThrow
m
=>
ConduitT
Event
PubMed
m
()
parseArticleSet
=
do
parseArticleSet
=
as
<-
force
"force"
$
tagIgnoreAttrs
"PubmedArticleSet"
$
manyYield
parsePubMedArticle
force
"force"
$
tagIgnoreAttrs
"PubmedArticleSet"
$
manyYield
parsePubMedArticle
return
as
parsePubMedArticle
::
MonadThrow
m
=>
ConduitT
Event
o
m
(
Maybe
PubMed
)
parsePubMedArticle
::
MonadThrow
m
=>
ConduitT
Event
o
m
(
Maybe
PubMed
)
parsePubMedArticle
=
do
parsePubMedArticle
=
articles
<-
tagIgnoreAttrs
"PubmedArticle"
parsePubMedArticle'
tagIgnoreAttrs
"PubmedArticle"
parsePubMedArticle'
return
articles
parsePubMedArticle'
::
MonadThrow
m
=>
ConduitT
Event
o
m
(
PubMed
)
parsePubMedArticle'
::
MonadThrow
m
=>
ConduitT
Event
o
m
PubMed
parsePubMedArticle'
=
do
parsePubMedArticle'
=
do
article
<-
force
"MedlineCitation"
$
tagIgnoreAttrs
"MedlineCitation"
parseMedlineCitation
article
<-
force
"MedlineCitation"
$
tagIgnoreAttrs
"MedlineCitation"
parseMedlineCitation
dates
<-
tagIgnoreAttrs
"PubmedData"
$
do
dates
<-
tagIgnoreAttrs
"PubmedData"
$
do
...
@@ -106,44 +111,50 @@ parsePubMedArticle' = do
...
@@ -106,44 +111,50 @@ parsePubMedArticle' = do
y'
<-
force
"Year"
$
tagIgnoreAttrs
"Year"
content
y'
<-
force
"Year"
$
tagIgnoreAttrs
"Year"
content
m'
<-
force
"Month"
$
tagIgnoreAttrs
"Month"
content
m'
<-
force
"Month"
$
tagIgnoreAttrs
"Month"
content
d'
<-
force
"Day"
$
tagIgnoreAttrs
"Day"
content
d'
<-
force
"Day"
$
tagIgnoreAttrs
"Day"
content
_
<-
many
$
ignoreAnyTreeContent
_
<-
many
ignoreAnyTreeContent
return
(
read
$
unpack
y'
,
read
$
unpack
m'
,
read
$
unpack
d'
)
return
(
read
$
unpack
y'
,
read
$
unpack
m'
,
read
$
unpack
d'
)
_
<-
many
$
ignoreAnyTreeContent
_
<-
many
ignoreAnyTreeContent
return
dates'
return
dates'
_
<-
many
$
ignoreAnyTreeContent
_
<-
many
ignoreAnyTreeContent
let
(
y
,
m
,
d
)
=
maybe
(
1
,
1
,
1
)
identity
$
join
$
fmap
head
$
reverse
<$>
join
dates
let
(
y
,
m
,
d
)
=
maybe
(
1
,
1
,
1
)
identity
$
join
$
fmap
head
$
reverse
<$>
join
dates
return
$
PubMed
(
article
)
(
PubMedData
(
jour
y
m
d
)
y
m
d
)
return
$
PubMed
article
(
PubMedData
(
jour
y
m
d
)
y
m
d
)
parseMedlineCitation
::
MonadThrow
m
=>
ConduitT
Event
o
m
PubMedArticle
parseMedlineCitation
::
MonadThrow
m
=>
ConduitT
Event
o
m
PubMedArticle
parseMedlineCitation
=
do
parseMedlineCitation
=
do
a
<-
force
"article"
$
manyTagsUntil
"Article"
parseArticle
a
<-
force
"article"
$
manyTagsUntil
"Article"
parseArticle
_
<-
many
$
ignoreAnyTreeContent
_
<-
many
ignoreAnyTreeContent
return
a
return
a
parseArticle
::
MonadThrow
m
=>
ConduitT
Event
o
m
PubMedArticle
parseArticle
::
MonadThrow
m
=>
ConduitT
Event
o
m
PubMedArticle
parseArticle
=
do
parseArticle
=
do
journal
<-
force
"journal"
$
manyTagsUntil
"Journal"
$
do
journal
<-
force
"journal"
$
manyTagsUntil
"Journal"
$
do
j
<-
manyTagsUntil
"Title"
content
j
<-
manyTagsUntil
"Title"
content
_
<-
many
$
ignoreAnyTreeContent
_
<-
many
ignoreAnyTreeContent
return
j
return
j
title
<-
do
title
<-
manyTagsUntil
"ArticleTitle"
content
t
<-
manyTagsUntil
"ArticleTitle"
content
return
t
authors
<-
manyTagsUntil
"AuthorList"
.
many
$
tagIgnoreAttrs
"Author"
$
do
ln
<-
tagIgnoreAttrs
"LastName"
content
fn
<-
tagIgnoreAttrs
"ForeName"
content
affi
<-
manyTagsUntil
"AffiliationInfo"
$
tagIgnoreAttrs
"Affiliation"
content
_
<-
many
ignoreAnyTreeContent
return
Author
{
lastName
=
ln
,
foreName
=
fn
,
affiliation
=
fromMaybe
Nothing
affi
}
abstracts
<-
do
abstracts
<-
as
<-
manyTagsUntil
"Abstract"
$
many
$
do
manyTagsUntil
"Abstract"
.
many
$
do
txt
<-
tagIgnoreAttrs
"AbstractT.Text"
$
do
txt
<-
tagIgnoreAttrs
"AbstractT.Text"
$
do
c
<-
content
c
<-
content
_
<-
many
$
ignoreAnyTreeContent
_
<-
many
ignoreAnyTreeContent
return
c
return
c
_
<-
many
$
ignoreAnyTreeContent
_
<-
many
ignoreAnyTreeContent
return
txt
return
txt
return
as
-- TODO add authos
-- TODO add authos
_
<-
many
$
ignoreAnyTreeContent
_
<-
many
ignoreAnyTreeContent
return
$
PubMedArticle
title
journal
abstracts
return
$
PubMedArticle
title
journal
abstracts
authors
pubMedData
::
DBL
.
ByteString
pubMedData
::
DBL
.
ByteString
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment