Commit 62ecec55 authored by Mael NICOLAS's avatar Mael NICOLAS

correct removeSub

parent 06476735
......@@ -6,4 +6,4 @@ import PUBMED (crawler)
main :: IO ()
main = crawler "bisphenol" (Just 3) >>= print
main = crawler "bisphenol" (Just 50) >>= print
......@@ -2,6 +2,7 @@
module PUBMED where
import Prelude hiding (takeWhile)
import Data.Text (Text)
import PUBMED.Client
import PUBMED.Parser
......@@ -31,12 +32,17 @@ runParser parser = parser . fromDocument . parseLBS_ def
removeSub :: Parser ByteString
removeSub = do
dt <- many textWithBalise
pure $ LBS.fromStrict $ pack $ concat dt
eo <- manyTill anyChar endOfInput
pure $ LBS.fromStrict $ pack $ concat dt <> eo
where
textWithBalise = manyTill anyChar (try subs)
subs = sub <|> asub -- <|> isEndOfInput
textWithBalise =
manyTill anyChar (sub <|> asub)
sub = string "<sub>"
<|> string "<sup>"
<|> string "<i>"
asub = string "</sub>"
<|> string "</sup>"
<|> string "</i>"
type Query = Text
type Limit = Integer
......@@ -51,18 +57,18 @@ runSimpleFindPubmedAbstractRequest query limit = do
(search (Just query) limit)
(mkClientEnv manager' $ BaseUrl Https "eutils.ncbi.nlm.nih.gov" 443 "entrez/eutils")
case res of
(Left _) -> pure (Left "Error: PubMed Internet connection")
(Left err) -> pure (Left $ T.pack $ show err)
(Right (BsXml docs)) -> do
let docIds = runParser parseDocId docs
res' <- runClientM
(fetch (Just "pubmed") (Just "abstract") docIds)
(mkClientEnv manager' $ BaseUrl Https "eutils.ncbi.nlm.nih.gov" 443 "entrez/eutils")
case res' of
(Left _) -> pure (Left "Error: PubMed API connection")
(Left err') -> pure (Left $ T.pack $ show err')
(Right (BsXml abstracts)) -> do
-- TODO remove "</sub>" maybe there is a cleaner way with isEndOfInput
case (parseOnly removeSub $ LBS.toStrict abstracts <> "</sub>") of
(Left err) -> pure (Left "Error: PubMed parser")
(Left err'') -> pure (Left $ T.pack err'')
(Right v) -> Right <$> pubMedParser v
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment