Commit 1ffff0cb authored by Mael NICOLAS's avatar Mael NICOLAS

add author in pubmed parser

parent 9a375182
...@@ -11,15 +11,16 @@ import Text.XML (parseLBS_, def) ...@@ -11,15 +11,16 @@ import Text.XML (parseLBS_, def)
import Text.XML.Cursor (fromDocument, Cursor) import Text.XML.Cursor (fromDocument, Cursor)
import qualified Data.ByteString.Lazy as LBS import qualified Data.ByteString.Lazy as LBS
import qualified Data.Text as T
runParser :: Show res => (Cursor -> res) -> LBS.ByteString -> res runParser :: Show res => (Cursor -> res) -> LBS.ByteString -> res
runParser parser = parser . fromDocument . parseLBS_ def runParser parser = parser . fromDocument . parseLBS_ def
main :: IO () runSimpleFindPubmedAbstractRequest :: T.Text -> IO ()
main = do runSimpleFindPubmedAbstractRequest rq = do
manager' <- newManager tlsManagerSettings manager' <- newManager tlsManagerSettings
res <- runClientM res <- runClientM
(search (Just "organ")) (search (Just rq))
(mkClientEnv manager' $ BaseUrl Https "eutils.ncbi.nlm.nih.gov" 443 "entrez/eutils") (mkClientEnv manager' $ BaseUrl Https "eutils.ncbi.nlm.nih.gov" 443 "entrez/eutils")
case res of case res of
(Left err) -> print err (Left err) -> print err
...@@ -30,6 +31,8 @@ main = do ...@@ -30,6 +31,8 @@ main = do
(mkClientEnv manager' $ BaseUrl Https "eutils.ncbi.nlm.nih.gov" 443 "entrez/eutils") (mkClientEnv manager' $ BaseUrl Https "eutils.ncbi.nlm.nih.gov" 443 "entrez/eutils")
case res' of case res' of
(Left err) -> print err (Left err) -> print err
(Right (BsXml abstracts)) -> do (Right (BsXml abstracts)) ->
pmeds <- pubMedParser abstracts print =<< pubMedParser abstracts
print pmeds
main :: IO ()
main = runSimpleFindPubmedAbstractRequest "organ"
...@@ -7,7 +7,7 @@ import qualified Text.XML.Cursor as C -- ((&/), (&//), Cursor, content, element) ...@@ -7,7 +7,7 @@ import qualified Text.XML.Cursor as C -- ((&/), (&//), Cursor, content, element)
import Text.XML (Name) import Text.XML (Name)
import Data.Either (rights) import Data.Either (rights)
import Data.Maybe (Maybe) import Data.Maybe (Maybe, fromMaybe)
import Data.Monoid (mconcat) import Data.Monoid (mconcat)
import Data.Conduit (runConduit, (.|), ConduitT) import Data.Conduit (runConduit, (.|), ConduitT)
import Data.Text (Text, unpack) import Data.Text (Text, unpack)
...@@ -59,18 +59,25 @@ manyTagsUntil_' = many_ . ignoreEmptyTag . tagUntil ...@@ -59,18 +59,25 @@ manyTagsUntil_' = many_ . ignoreEmptyTag . tagUntil
data PubMed = data PubMed =
PubMed { pubmed_article :: PubMedArticle PubMed { pubmed_article :: PubMedArticle
, pubmed_date Protolude , pubmed_date :: PubMedData
:: PubMedData
} deriving Show } deriving Show
data PubMedArticle = data PubMedArticle =
PubMedArticle { pubmed_title :: Maybe T.Text PubMedArticle { pubmed_title :: Maybe T.Text
, pubmed_journal :: Maybe T.Text , pubmed_journal :: Maybe T.Text
, pubmed_abstract :: Maybe [T.Text] , pubmed_abstract :: Maybe [T.Text]
, pubmed_authors :: Maybe [Author]
} }
deriving (Show) deriving (Show)
data Author =
Author {
lastName :: Maybe T.Text,
foreName :: Maybe T.Text,
affiliation :: Maybe T.Text
}
deriving (Show)
data PubMedData = data PubMedData =
PubMedData { pubmedData_date :: UTCTime PubMedData { pubmedData_date :: UTCTime
, pubmedData_year :: Integer , pubmedData_year :: Integer
...@@ -89,16 +96,14 @@ pubMedParser bstring = runConduit $ parseLBS def bstring ...@@ -89,16 +96,14 @@ pubMedParser bstring = runConduit $ parseLBS def bstring
.| CL.consume .| CL.consume
parseArticleSet :: MonadThrow m => ConduitT Event PubMed m () parseArticleSet :: MonadThrow m => ConduitT Event PubMed m ()
parseArticleSet = do parseArticleSet =
as <- force "force" $ tagIgnoreAttrs "PubmedArticleSet" $ manyYield parsePubMedArticle force "force" $ tagIgnoreAttrs "PubmedArticleSet" $ manyYield parsePubMedArticle
return as
parsePubMedArticle :: MonadThrow m => ConduitT Event o m (Maybe PubMed) parsePubMedArticle :: MonadThrow m => ConduitT Event o m (Maybe PubMed)
parsePubMedArticle = do parsePubMedArticle =
articles <- tagIgnoreAttrs "PubmedArticle" parsePubMedArticle' tagIgnoreAttrs "PubmedArticle" parsePubMedArticle'
return articles
parsePubMedArticle' :: MonadThrow m => ConduitT Event o m (PubMed) parsePubMedArticle' :: MonadThrow m => ConduitT Event o m PubMed
parsePubMedArticle' = do parsePubMedArticle' = do
article <- force "MedlineCitation" $ tagIgnoreAttrs "MedlineCitation" parseMedlineCitation article <- force "MedlineCitation" $ tagIgnoreAttrs "MedlineCitation" parseMedlineCitation
dates <- tagIgnoreAttrs "PubmedData" $ do dates <- tagIgnoreAttrs "PubmedData" $ do
...@@ -106,44 +111,50 @@ parsePubMedArticle' = do ...@@ -106,44 +111,50 @@ parsePubMedArticle' = do
y' <- force "Year" $ tagIgnoreAttrs "Year" content y' <- force "Year" $ tagIgnoreAttrs "Year" content
m' <- force "Month" $ tagIgnoreAttrs "Month" content m' <- force "Month" $ tagIgnoreAttrs "Month" content
d' <- force "Day" $ tagIgnoreAttrs "Day" content d' <- force "Day" $ tagIgnoreAttrs "Day" content
_ <- many $ ignoreAnyTreeContent _ <- many ignoreAnyTreeContent
return (read $ unpack y', read $ unpack m', read $ unpack d') return (read $ unpack y', read $ unpack m', read $ unpack d')
_ <- many $ ignoreAnyTreeContent _ <- many ignoreAnyTreeContent
return dates' return dates'
_ <- many $ ignoreAnyTreeContent _ <- many ignoreAnyTreeContent
let (y,m,d) = maybe (1,1,1) identity $ join $ fmap head $ reverse <$> join dates let (y,m,d) = maybe (1,1,1) identity $ join $ fmap head $ reverse <$> join dates
return $ PubMed (article) (PubMedData (jour y m d) y m d) return $ PubMed article (PubMedData (jour y m d) y m d)
parseMedlineCitation :: MonadThrow m => ConduitT Event o m PubMedArticle parseMedlineCitation :: MonadThrow m => ConduitT Event o m PubMedArticle
parseMedlineCitation = do parseMedlineCitation = do
a <- force "article" $ manyTagsUntil "Article" parseArticle a <- force "article" $ manyTagsUntil "Article" parseArticle
_ <- many $ ignoreAnyTreeContent _ <- many ignoreAnyTreeContent
return a return a
parseArticle :: MonadThrow m => ConduitT Event o m PubMedArticle parseArticle :: MonadThrow m => ConduitT Event o m PubMedArticle
parseArticle = do parseArticle = do
journal <- force "journal" $ manyTagsUntil "Journal" $ do journal <- force "journal" $ manyTagsUntil "Journal" $ do
j <- manyTagsUntil "Title" content j <- manyTagsUntil "Title" content
_ <- many $ ignoreAnyTreeContent _ <- many ignoreAnyTreeContent
return j return j
title <- do title <- manyTagsUntil "ArticleTitle" content
t <- manyTagsUntil "ArticleTitle" content
return t authors <- manyTagsUntil "AuthorList" . many $
tagIgnoreAttrs "Author" $ do
ln <- tagIgnoreAttrs "LastName" content
fn <- tagIgnoreAttrs "ForeName" content
affi <- manyTagsUntil "AffiliationInfo" $
tagIgnoreAttrs "Affiliation" content
_ <- many ignoreAnyTreeContent
return Author {lastName=ln, foreName=fn, affiliation=fromMaybe Nothing affi}
abstracts <- do abstracts <-
as <- manyTagsUntil "Abstract" $ many $ do manyTagsUntil "Abstract" . many $ do
txt <- tagIgnoreAttrs "AbstractT.Text" $ do txt <- tagIgnoreAttrs "AbstractT.Text" $ do
c <- content c <- content
_ <- many $ ignoreAnyTreeContent _ <- many ignoreAnyTreeContent
return c return c
_ <- many $ ignoreAnyTreeContent _ <- many ignoreAnyTreeContent
return txt return txt
return as
-- TODO add authos -- TODO add authos
_ <- many $ ignoreAnyTreeContent _ <- many ignoreAnyTreeContent
return $ PubMedArticle title journal abstracts return $ PubMedArticle title journal abstracts authors
pubMedData :: DBL.ByteString pubMedData :: DBL.ByteString
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment