Commit 8504d4c5 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] removeSub works for now but TODO inside.

parent d69b3a65
...@@ -13,24 +13,32 @@ import Data.Conduit (ConduitT) ...@@ -13,24 +13,32 @@ import Data.Conduit (ConduitT)
import Data.ByteString.Lazy (ByteString) import Data.ByteString.Lazy (ByteString)
import Data.ByteString.Char8 (pack) import Data.ByteString.Char8 (pack)
import Control.Monad.Catch (MonadThrow) import Control.Monad.Catch (MonadThrow)
import Text.Parsec hiding (runParser)
import Text.Parsec.ByteString.Lazy import Control.Applicative
import Data.Attoparsec.ByteString
import Data.Attoparsec.ByteString.Char8 (anyChar)
import qualified Data.ByteString.Lazy as LBS import qualified Data.ByteString.Lazy as LBS
import qualified Data.ByteString as DB
import qualified Data.Text as T import qualified Data.Text as T
runParser :: Show res => (Cursor -> res) -> LBS.ByteString -> res runParser :: Show res => (Cursor -> res) -> LBS.ByteString -> res
runParser parser = parser . fromDocument . parseLBS_ def runParser parser = parser . fromDocument . parseLBS_ def
-- | TODO this parser need at least one subs at the end
-- (use endOfInput)
removeSub :: Parser ByteString removeSub :: Parser ByteString
removeSub = do removeSub = do
dt <- many1 anyChar <* (skipMany $ sub <|> asub) dt <- many textWithBalise
return . LBS.fromStrict $ pack dt pure $ LBS.fromStrict $ pack $ concat dt
where where
sub = string "<sub>" textWithBalise = manyTill anyChar (try subs)
subs = sub <|> asub -- <|> isEndOfInput
sub = string "<sub>"
asub = string "</sub>" asub = string "</sub>"
runSimpleFindPubmedAbstractRequest :: T.Text -> IO [PubMed] runSimpleFindPubmedAbstractRequest :: T.Text -> IO [PubMed]
runSimpleFindPubmedAbstractRequest rq = do runSimpleFindPubmedAbstractRequest rq = do
manager' <- newManager tlsManagerSettings manager' <- newManager tlsManagerSettings
res <- runClientM res <- runClientM
(search (Just rq)) (search (Just rq))
...@@ -45,7 +53,8 @@ runSimpleFindPubmedAbstractRequest rq = do ...@@ -45,7 +53,8 @@ runSimpleFindPubmedAbstractRequest rq = do
case res' of case res' of
(Left err) -> return [] (Left err) -> return []
(Right (BsXml abstracts)) -> do (Right (BsXml abstracts)) -> do
case (parse removeSub "error" abstracts) of -- TODO remove "</sub>" maybe there is a cleaner way with isEndOfInput
case (parseOnly removeSub $ LBS.toStrict abstracts <> "</sub>") of
(Left _) -> return [] (Left _) -> return []
(Right v) -> pubMedParser v (Right v) -> pubMedParser v
......
...@@ -35,7 +35,7 @@ dependencies: ...@@ -35,7 +35,7 @@ dependencies:
- time - time
- data-time-segment - data-time-segment
- protolude - protolude
- parsec - attoparsec
- either - either
library: library:
......
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment