Commit 1ffff0cb authored by Mael NICOLAS's avatar Mael NICOLAS

add author in pubmed parser

parent 9a375182
......@@ -11,15 +11,16 @@ import Text.XML (parseLBS_, def)
import Text.XML.Cursor (fromDocument, Cursor)
import qualified Data.ByteString.Lazy as LBS
import qualified Data.Text as T
runParser :: Show res => (Cursor -> res) -> LBS.ByteString -> res
runParser parser = parser . fromDocument . parseLBS_ def
main :: IO ()
main = do
runSimpleFindPubmedAbstractRequest :: T.Text -> IO ()
runSimpleFindPubmedAbstractRequest rq = do
manager' <- newManager tlsManagerSettings
res <- runClientM
(search (Just "organ"))
(search (Just rq))
(mkClientEnv manager' $ BaseUrl Https "eutils.ncbi.nlm.nih.gov" 443 "entrez/eutils")
case res of
(Left err) -> print err
......@@ -30,6 +31,8 @@ main = do
(mkClientEnv manager' $ BaseUrl Https "eutils.ncbi.nlm.nih.gov" 443 "entrez/eutils")
case res' of
(Left err) -> print err
(Right (BsXml abstracts)) -> do
pmeds <- pubMedParser abstracts
print pmeds
(Right (BsXml abstracts)) ->
print =<< pubMedParser abstracts
main :: IO ()
main = runSimpleFindPubmedAbstractRequest "organ"
......@@ -7,7 +7,7 @@ import qualified Text.XML.Cursor as C -- ((&/), (&//), Cursor, content, element)
import Text.XML (Name)
import Data.Either (rights)
import Data.Maybe (Maybe)
import Data.Maybe (Maybe, fromMaybe)
import Data.Monoid (mconcat)
import Data.Conduit (runConduit, (.|), ConduitT)
import Data.Text (Text, unpack)
......@@ -59,18 +59,25 @@ manyTagsUntil_' = many_ . ignoreEmptyTag . tagUntil
data PubMed =
PubMed { pubmed_article :: PubMedArticle
, pubmed_date Protolude
:: PubMedData
, pubmed_date :: PubMedData
} deriving Show
data PubMedArticle =
PubMedArticle { pubmed_title :: Maybe T.Text
, pubmed_journal :: Maybe T.Text
, pubmed_abstract :: Maybe [T.Text]
, pubmed_authors :: Maybe [Author]
}
deriving (Show)
data Author =
Author {
lastName :: Maybe T.Text,
foreName :: Maybe T.Text,
affiliation :: Maybe T.Text
}
deriving (Show)
data PubMedData =
PubMedData { pubmedData_date :: UTCTime
, pubmedData_year :: Integer
......@@ -89,16 +96,14 @@ pubMedParser bstring = runConduit $ parseLBS def bstring
.| CL.consume
parseArticleSet :: MonadThrow m => ConduitT Event PubMed m ()
parseArticleSet = do
as <- force "force" $ tagIgnoreAttrs "PubmedArticleSet" $ manyYield parsePubMedArticle
return as
parseArticleSet =
force "force" $ tagIgnoreAttrs "PubmedArticleSet" $ manyYield parsePubMedArticle
parsePubMedArticle :: MonadThrow m => ConduitT Event o m (Maybe PubMed)
parsePubMedArticle = do
articles <- tagIgnoreAttrs "PubmedArticle" parsePubMedArticle'
return articles
parsePubMedArticle =
tagIgnoreAttrs "PubmedArticle" parsePubMedArticle'
parsePubMedArticle' :: MonadThrow m => ConduitT Event o m (PubMed)
parsePubMedArticle' :: MonadThrow m => ConduitT Event o m PubMed
parsePubMedArticle' = do
article <- force "MedlineCitation" $ tagIgnoreAttrs "MedlineCitation" parseMedlineCitation
dates <- tagIgnoreAttrs "PubmedData" $ do
......@@ -106,44 +111,50 @@ parsePubMedArticle' = do
y' <- force "Year" $ tagIgnoreAttrs "Year" content
m' <- force "Month" $ tagIgnoreAttrs "Month" content
d' <- force "Day" $ tagIgnoreAttrs "Day" content
_ <- many $ ignoreAnyTreeContent
_ <- many ignoreAnyTreeContent
return (read $ unpack y', read $ unpack m', read $ unpack d')
_ <- many $ ignoreAnyTreeContent
_ <- many ignoreAnyTreeContent
return dates'
_ <- many $ ignoreAnyTreeContent
_ <- many ignoreAnyTreeContent
let (y,m,d) = maybe (1,1,1) identity $ join $ fmap head $ reverse <$> join dates
return $ PubMed (article) (PubMedData (jour y m d) y m d)
return $ PubMed article (PubMedData (jour y m d) y m d)
parseMedlineCitation :: MonadThrow m => ConduitT Event o m PubMedArticle
parseMedlineCitation = do
a <- force "article" $ manyTagsUntil "Article" parseArticle
_ <- many $ ignoreAnyTreeContent
_ <- many ignoreAnyTreeContent
return a
parseArticle :: MonadThrow m => ConduitT Event o m PubMedArticle
parseArticle = do
journal <- force "journal" $ manyTagsUntil "Journal" $ do
j <- manyTagsUntil "Title" content
_ <- many $ ignoreAnyTreeContent
_ <- many ignoreAnyTreeContent
return j
title <- do
t <- manyTagsUntil "ArticleTitle" content
return t
title <- manyTagsUntil "ArticleTitle" content
authors <- manyTagsUntil "AuthorList" . many $
tagIgnoreAttrs "Author" $ do
ln <- tagIgnoreAttrs "LastName" content
fn <- tagIgnoreAttrs "ForeName" content
affi <- manyTagsUntil "AffiliationInfo" $
tagIgnoreAttrs "Affiliation" content
_ <- many ignoreAnyTreeContent
return Author {lastName=ln, foreName=fn, affiliation=fromMaybe Nothing affi}
abstracts <- do
as <- manyTagsUntil "Abstract" $ many $ do
abstracts <-
manyTagsUntil "Abstract" . many $ do
txt <- tagIgnoreAttrs "AbstractT.Text" $ do
c <- content
_ <- many $ ignoreAnyTreeContent
_ <- many ignoreAnyTreeContent
return c
_ <- many $ ignoreAnyTreeContent
_ <- many ignoreAnyTreeContent
return txt
return as
-- TODO add authos
_ <- many $ ignoreAnyTreeContent
return $ PubMedArticle title journal abstracts
_ <- many ignoreAnyTreeContent
return $ PubMedArticle title journal abstracts authors
pubMedData :: DBL.ByteString
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment