Commit d69b3a65 authored by Mael NICOLAS's avatar Mael NICOLAS

added a new pre-parser to remove <sub> (not working yet)

parent 8299b701
...@@ -9,13 +9,26 @@ import Network.HTTP.Client.TLS (tlsManagerSettings) ...@@ -9,13 +9,26 @@ import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant.Client (runClientM, mkClientEnv, BaseUrl(..), Scheme(..)) import Servant.Client (runClientM, mkClientEnv, BaseUrl(..), Scheme(..))
import Text.XML (parseLBS_, def) import Text.XML (parseLBS_, def)
import Text.XML.Cursor (fromDocument, Cursor) import Text.XML.Cursor (fromDocument, Cursor)
import Data.Conduit (ConduitT)
import Data.ByteString.Lazy (ByteString)
import Data.ByteString.Char8 (pack)
import Control.Monad.Catch (MonadThrow)
import Text.Parsec hiding (runParser)
import Text.Parsec.ByteString.Lazy
import qualified Data.ByteString.Lazy as LBS import qualified Data.ByteString.Lazy as LBS
import qualified Data.Text as T import qualified Data.Text as T
runParser :: Show res => (Cursor -> res) -> LBS.ByteString -> res runParser :: Show res => (Cursor -> res) -> LBS.ByteString -> res
runParser parser = parser . fromDocument . parseLBS_ def runParser parser = parser . fromDocument . parseLBS_ def
removeSub :: Parser ByteString
removeSub = do
dt <- many1 anyChar <* (skipMany $ sub <|> asub)
return . LBS.fromStrict $ pack dt
where
sub = string "<sub>"
asub = string "</sub>"
runSimpleFindPubmedAbstractRequest :: T.Text -> IO [PubMed] runSimpleFindPubmedAbstractRequest :: T.Text -> IO [PubMed]
runSimpleFindPubmedAbstractRequest rq = do runSimpleFindPubmedAbstractRequest rq = do
manager' <- newManager tlsManagerSettings manager' <- newManager tlsManagerSettings
...@@ -32,8 +45,9 @@ runSimpleFindPubmedAbstractRequest rq = do ...@@ -32,8 +45,9 @@ runSimpleFindPubmedAbstractRequest rq = do
case res' of case res' of
(Left err) -> return [] (Left err) -> return []
(Right (BsXml abstracts)) -> do (Right (BsXml abstracts)) -> do
_ <- print abstracts case (parse removeSub "error" abstracts) of
pubMedParser abstracts (Left _) -> return []
(Right v) -> pubMedParser v
main :: IO () main :: IO ()
main = do main = do
......
...@@ -35,6 +35,8 @@ dependencies: ...@@ -35,6 +35,8 @@ dependencies:
- time - time
- data-time-segment - data-time-segment
- protolude - protolude
- parsec
- either
library: library:
source-dirs: src source-dirs: src
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment