Commit 31cb4d28 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Main.hs works now, returning list of docs

parent 47ed870c
...@@ -63,18 +63,22 @@ main = run =<< execParser opts ...@@ -63,18 +63,22 @@ main = run =<< execParser opts
run :: Command -> IO () run :: Command -> IO ()
run (Meta (MetaParams { mp_mAPIKey, mp_term })) = do run (Meta (MetaParams { mp_mAPIKey, mp_term })) = do
let config = Config { mAPIKey = Text.pack <$> mp_mAPIKey } let config = Config { apiKey = Text.pack <$> mp_mAPIKey
, query = Text.pack mp_term
, perPage = Nothing }
eDocs <- runReaderT (getMetadataWithC (Text.pack mp_term) Nothing) config eDocs <- runReaderT getMetadataWithC config
case eDocs of case eDocs of
Left err -> print err Left err -> print err
Right (count, _docsC) -> print $ show count Right (count, _docsC) -> print $ show count
run (Fetch (FetchParams { fp_mAPIKey, fp_term, fp_limit })) = do run (Fetch (FetchParams { fp_mAPIKey, fp_term, fp_limit })) = do
let config = Config { mAPIKey = Text.pack <$> fp_mAPIKey } let config = Config { apiKey = Text.pack <$> fp_mAPIKey
, query = Text.pack fp_term
, perPage = Nothing }
eDocs <- runReaderT (getMetadataWithC (Text.pack fp_term) (Just $ fromIntegral fp_limit)) config eDocs <- runReaderT getMetadataWithC config
case eDocs of case eDocs of
Left err -> print err Left err -> print err
Right (count, docsC) -> do Right (count, docsC) -> do
print $ show count print $ show count
runConduit $ docsC .| mapM_C print runConduit $ docsC .| takeC fp_limit .| mapM_C print
...@@ -32,87 +32,60 @@ pmSearchPath = "entrez/eutils" ...@@ -32,87 +32,60 @@ pmSearchPath = "entrez/eutils"
pmPort :: Int pmPort :: Int
pmPort = 443 pmPort = 443
batchSize :: Int
batchSize = 200
defaultClientEnv :: IO ClientEnv defaultClientEnv :: IO ClientEnv
defaultClientEnv = do defaultClientEnv = do
manager' <- newManager tlsManagerSettings manager' <- newManager tlsManagerSettings
pure $ mkClientEnv manager' $ BaseUrl Https pmHost pmPort pmSearchPath pure $ mkClientEnv manager' $ BaseUrl Https pmHost pmPort pmSearchPath
-- | API main function -- | API main function
getMetadataWith :: Text -> Maybe Integer -> Maybe Limit -> Env (Either Text [PubMed]) getMetadataWith :: Env (Either Text [PubMed])
getMetadataWith = runSimpleFindPubmedAbstractRequest getMetadataWith = runSimpleFindPubmedAbstractRequest Nothing
getMetadataWithC :: Text getMetadataWithC :: Env (Either ClientError (Maybe Integer, ConduitT () PubMed IO ()))
-> Maybe Limit getMetadataWithC = do
-> Env (Either ClientError (Maybe Integer, ConduitT () PubMed IO ())) config@(Config { apiKey, query, perPage }) <- ask
getMetadataWithC query limit = do
config@(Config { mAPIKey = mAPIKey }) <- ask
liftIO $ do liftIO $ do
env <- defaultClientEnv env <- defaultClientEnv
-- First, estimate the total number of documents -- First, estimate the total number of documents
eRes <- runClientM (search mAPIKey (Just query) Nothing (Just 1)) env eRes <- runClientM (search apiKey (Just query) Nothing (Just 1)) env
pure $ get' config env query limit batchSize <$> eRes pure $ get' config env (fromMaybe defaultPerPage perPage) <$> eRes
where where
get' :: Config get' :: Config
-> ClientEnv -> ClientEnv
-> Text -> PerPage
-> Maybe Limit
-> Int
-> BsXml -> BsXml
-> (Maybe Integer, ConduitT () PubMed IO ()) -> (Maybe Integer, ConduitT () PubMed IO ())
get' config env q l perPage (BsXml res) = get' config env perPage (BsXml res) =
( Just numResults ( Just numResults
, yieldMany [0..] , yieldMany [0..]
.| takeC (fromInteger numPages) .| takeC (fromInteger numPages)
.| concatMapMC (\pageNum -> runReaderT (getPage q perPage pageNum) config)) .| concatMapMC (\pageNum -> runReaderT (getPage pageNum) config))
where where
numResults = fromMaybe 0 $ parseDocCount $ TL.fromStrict $ TE.decodeUtf8 $ LBS.toStrict res numResults = fromMaybe 0 $ parseDocCount $ TL.fromStrict $ TE.decodeUtf8 $ LBS.toStrict res
numPages = numResults `div` (fromIntegral perPage) + 1 numPages = numResults `div` (fromIntegral perPage) + 1
getPage :: Text getPage :: Integer
-> Int
-> Int
-> Env [PubMed] -> Env [PubMed]
getPage q perPage pageNum = do getPage pageNum = do
(Config { perPage = mPerPage }) <- ask
let perPage = fromMaybe defaultPerPage mPerPage
let offset = fromIntegral $ pageNum * perPage let offset = fromIntegral $ pageNum * perPage
liftIO $ print $ "[getPage] getting page " <> show pageNum <> ", offset: " <> show offset <> ", perPage: " <> show perPage liftIO $ print $ "[getPage] getting page " <> show pageNum <> ", offset: " <> show offset <> ", perPage: " <> show perPage
eDocs <- runSimpleFindPubmedAbstractRequest q (Just offset) (Just $ fromIntegral perPage) eDocs <- runSimpleFindPubmedAbstractRequest (Just offset)
liftIO $ case eDocs of case eDocs of
Left err -> panic $ "[getPage] error: " <> show err Left err -> panic $ "[getPage] error: " <> show err
Right docs -> liftIO $ do Right docs -> liftIO $ do
_ <- threadDelay 2000000 -- One seconds _ <- threadDelay 2000000 -- One seconds
print $ "[getPage] docs length: " <> show (length docs) print $ "[getPage] docs length: " <> show (length docs)
pure docs pure docs
-- | TODO this parser need at least one subs at the end
-- (use endOfInput)
removeSub :: Parser ByteString
removeSub = do
dt <- many textWithBalise
eo <- manyTill anyChar endOfInput
pure $ LBS.fromStrict $ pack $ concat dt <> eo
where
textWithBalise =
manyTill anyChar (sub <|> asub)
sub = string "<sub>"
<|> string "<sup>"
<|> string "<i>"
<|> string "<b>"
asub = string "</sub>"
<|> string "</sup>"
<|> string "</i>"
<|> string "</b>"
type Query = Text
type Limit = Integer
runMultipleFPAR :: [Integer] runMultipleFPAR :: [Integer]
-> Env (Either Text [PubMed]) -> Env (Either Text [PubMed])
runMultipleFPAR [] = pure $ Right [] runMultipleFPAR [] = pure $ Right []
runMultipleFPAR ids = do runMultipleFPAR ids = do
List.foldl1' concat' <$> mapM runSimpleFetchPubmedAbstractRequest (by batchSize ids) (Config { perPage = mPerPage }) <- ask
let perPage = fromInteger $ fromMaybe defaultPerPage mPerPage
List.foldl1' concat' <$> mapM runSimpleFetchPubmedAbstractRequest (by perPage ids)
where where
by _ [] = [] by _ [] = []
by n ns = head' : ( by n tail') by n ns = head' : ( by n tail')
...@@ -125,11 +98,11 @@ runMultipleFPAR ids = do ...@@ -125,11 +98,11 @@ runMultipleFPAR ids = do
runSimpleFetchPubmedAbstractRequest :: [Integer] runSimpleFetchPubmedAbstractRequest :: [Integer]
-> Env (Either Text [PubMed]) -> Env (Either Text [PubMed])
runSimpleFetchPubmedAbstractRequest ids = do runSimpleFetchPubmedAbstractRequest ids = do
(Config { mAPIKey = mAPIKey }) <- ask (Config { apiKey }) <- ask
liftIO $ do liftIO $ do
env <- defaultClientEnv env <- defaultClientEnv
res <- runClientM res <- runClientM
(fetch mAPIKey (Just "pubmed") (Just "abstract") ids) (fetch apiKey (Just "pubmed") (Just "abstract") ids)
env env
case res of case res of
(Left err) -> pure (Left . T.pack $ show err) (Left err) -> pure (Left . T.pack $ show err)
...@@ -145,25 +118,23 @@ runSimpleFetchPubmedAbstractRequest ids = do ...@@ -145,25 +118,23 @@ runSimpleFetchPubmedAbstractRequest ids = do
-- pure []) :: XmlException -> IO [PubMed]) -- pure []) :: XmlException -> IO [PubMed])
-- Right <$> pure parsed -- Right <$> pure parsed
runSimpleFindPubmedAbstractRequest :: Text runSimpleFindPubmedAbstractRequest :: Maybe Integer
-> Maybe Integer
-> Maybe Limit
-> Env (Either Text [PubMed]) -> Env (Either Text [PubMed])
runSimpleFindPubmedAbstractRequest query offset limit = do runSimpleFindPubmedAbstractRequest offset = do
eDocIds <- searchDocIds query offset limit eDocIds <- searchDocIds offset
case eDocIds of case eDocIds of
Left err -> pure $ Left err Left err -> pure $ Left err
Right docIds -> do Right docIds -> do
liftIO $ print $ "[runSimpleFindPubmedAbstractRequest] docIds" <> show docIds liftIO $ print $ "[runSimpleFindPubmedAbstractRequest] docIds" <> show docIds
runMultipleFPAR docIds runMultipleFPAR docIds
searchDocIds :: Text -> Maybe Integer -> Maybe Limit -> Env (Either Text [Integer]) searchDocIds :: Maybe Integer -> Env (Either Text [Integer])
searchDocIds query offset limit = do searchDocIds offset = do
(Config { mAPIKey = mAPIKey }) <- ask (Config { apiKey, query, perPage }) <- ask
liftIO $ do liftIO $ do
env <- defaultClientEnv env <- defaultClientEnv
res <- runClientM res <- runClientM
(search mAPIKey (Just query) offset limit) (search apiKey (Just query) offset perPage)
env env
case res of case res of
(Left err) -> pure (Left $ T.pack $ show err) (Left err) -> pure (Left $ T.pack $ show err)
...@@ -173,3 +144,23 @@ searchDocIds query offset limit = do ...@@ -173,3 +144,23 @@ searchDocIds query offset limit = do
--runParser :: Show res => (Cursor -> res) -> LBS.ByteString -> res --runParser :: Show res => (Cursor -> res) -> LBS.ByteString -> res
--runParser parser = parser . fromDocument . parseLBS_ def --runParser parser = parser . fromDocument . parseLBS_ def
-- | TODO this parser need at least one subs at the end
-- (use endOfInput)
removeSub :: Parser ByteString
removeSub = do
dt <- many textWithBalise
eo <- manyTill anyChar endOfInput
pure $ LBS.fromStrict $ pack $ concat dt <> eo
where
textWithBalise =
manyTill anyChar (sub <|> asub)
sub = string "<sub>"
<|> string "<sup>"
<|> string "<i>"
<|> string "<b>"
asub = string "</sub>"
<|> string "</sup>"
<|> string "</i>"
<|> string "</b>"
...@@ -7,6 +7,8 @@ import Servant.Client ...@@ -7,6 +7,8 @@ import Servant.Client
import qualified Data.Text as T import qualified Data.Text as T
import qualified Network.HTTP.Media as M import qualified Network.HTTP.Media as M
import PUBMED.Types (APIKey, Query, PerPage)
data DB = PUBMED data DB = PUBMED
newtype BsXml = BsXml ByteString newtype BsXml = BsXml ByteString
...@@ -22,14 +24,14 @@ type PUBMEDAPI = ...@@ -22,14 +24,14 @@ type PUBMEDAPI =
"esearch.fcgi" "esearch.fcgi"
-- :> QueryParam "db" DB -- :> QueryParam "db" DB
-- not mandatory since the base db is pubmed -- not mandatory since the base db is pubmed
:> QueryParam "api_key" T.Text :> QueryParam "api_key" APIKey
:> QueryParam "term" T.Text :> QueryParam "term" T.Text
:> QueryParam "retstart" Integer :> QueryParam "retstart" Integer
:> QueryParam "retmax" Integer :> QueryParam "retmax" Integer
:> Get '[BsXml] BsXml :> Get '[BsXml] BsXml
:<|> :<|>
"efetch.fcgi" "efetch.fcgi"
:> QueryParam "api_key" T.Text :> QueryParam "api_key" APIKey
:> QueryParam "db" T.Text :> QueryParam "db" T.Text
:> QueryParam "rettype" T.Text :> QueryParam "rettype" T.Text
:> QueryParams "id" Integer :> QueryParams "id" Integer
...@@ -38,13 +40,13 @@ type PUBMEDAPI = ...@@ -38,13 +40,13 @@ type PUBMEDAPI =
pubmedApi :: Proxy PUBMEDAPI pubmedApi :: Proxy PUBMEDAPI
pubmedApi = Proxy pubmedApi = Proxy
search :: Maybe T.Text search :: Maybe APIKey
-> Maybe T.Text -> Maybe T.Text
-> Maybe Integer -> Maybe Integer
-> Maybe Integer -> Maybe Integer
-> ClientM BsXml -> ClientM BsXml
fetch :: Maybe T.Text fetch :: Maybe APIKey
-> Maybe T.Text -> Maybe T.Text
-> Maybe T.Text -> Maybe T.Text
-> [Integer] -> [Integer]
......
...@@ -16,7 +16,7 @@ import Panic (panic) ...@@ -16,7 +16,7 @@ import Panic (panic)
import qualified Text.Read as TR import qualified Text.Read as TR
import qualified Text.Taggy.Lens as TTL import qualified Text.Taggy.Lens as TTL
namedEl name = TTL.elements . TTL.named (only name) namedEl name = TTL.elements . TTL.named (only name)
contentWithChildren :: Prism' Node T.Text contentWithChildren :: Prism' Node T.Text
contentWithChildren = prism' NodeContent $ \case { NodeContent c -> Just c contentWithChildren = prism' NodeContent $ \case { NodeContent c -> Just c
......
...@@ -3,8 +3,19 @@ module PUBMED.Types where ...@@ -3,8 +3,19 @@ module PUBMED.Types where
import Control.Monad.Reader (ReaderT) import Control.Monad.Reader (ReaderT)
import Data.Text (Text) import Data.Text (Text)
type APIKey = Text
type Query = Text
type PerPage = Integer
data Config = Config { data Config = Config {
mAPIKey :: Maybe Text apiKey :: Maybe APIKey
, query :: Query
, perPage :: Maybe PerPage
} }
type Env = ReaderT Config IO type Env = ReaderT Config IO
defaultPerPage :: PerPage
defaultPerPage = 200
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment