Commit 7946fe77 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge remote-tracking branch 'origin/151-dev-pubmed-api-key' into dev

parents b19bcd6a 8d844d13
...@@ -22,6 +22,7 @@ DATA_FILEPATH = FILEPATH_TO_CHANGE ...@@ -22,6 +22,7 @@ DATA_FILEPATH = FILEPATH_TO_CHANGE
# Data path to local files (do not use quotes) # Data path to local files (do not use quotes)
REPO_FILEPATH = FILEPATH_TO_CHANGE REPO_FILEPATH = FILEPATH_TO_CHANGE
PUBMED_API_KEY = ENTER_PUBMED_API_KEY
# [external] # [external]
......
...@@ -6,6 +6,7 @@ module Gargantext.API.Admin.Orchestrator.Types ...@@ -6,6 +6,7 @@ module Gargantext.API.Admin.Orchestrator.Types
where where
import Control.Lens hiding (elements) import Control.Lens hiding (elements)
import Control.Monad.Reader (MonadReader)
import Data.Aeson import Data.Aeson
import Data.Morpheus.Types import Data.Morpheus.Types
( GQLType ( GQLType
...@@ -23,7 +24,9 @@ import Test.QuickCheck.Arbitrary ...@@ -23,7 +24,9 @@ import Test.QuickCheck.Arbitrary
import qualified Gargantext.API.GraphQL.Utils as GQLU import qualified Gargantext.API.GraphQL.Utils as GQLU
import Gargantext.Core.Types (TODO(..)) import Gargantext.Core.Types (TODO(..))
import Gargantext.Database.Prelude (HasConfig(..))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Config (gc_pubmed_api_key)
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance Arbitrary a => Arbitrary (JobStatus 'Safe a) where instance Arbitrary a => Arbitrary (JobStatus 'Safe a) where
...@@ -35,26 +38,41 @@ instance Arbitrary a => Arbitrary (JobOutput a) where ...@@ -35,26 +38,41 @@ instance Arbitrary a => Arbitrary (JobOutput a) where
-- | Main Types -- | Main Types
-- TODO IsidoreAuth -- TODO IsidoreAuth
data ExternalAPIs = All data ExternalAPIs = All
| PubMed | PubMed { mAPIKey :: Maybe Text }
| Arxiv | Arxiv
| HAL | HAL
| IsTex | IsTex
| Isidore | Isidore
deriving (Show, Eq, Enum, Bounded, Generic) deriving (Show, Eq, Generic)
-- | Main Instances -- | Main Instances
instance FromJSON ExternalAPIs instance FromJSON ExternalAPIs
instance ToJSON ExternalAPIs instance ToJSON ExternalAPIs
externalAPIs :: [ExternalAPIs] externalAPIs :: ( MonadReader env m
externalAPIs = [minBound..maxBound] , HasConfig env) => m [ExternalAPIs]
externalAPIs = do
pubmed_api_key <- view $ hasConfig . gc_pubmed_api_key
pure [ All
, PubMed { mAPIKey = Just pubmed_api_key }
, Arxiv
, HAL
, IsTex
, Isidore ]
instance Arbitrary ExternalAPIs instance Arbitrary ExternalAPIs
where where
arbitrary = elements externalAPIs arbitrary = elements [ All
, PubMed { mAPIKey = Nothing }
instance ToSchema ExternalAPIs , Arxiv
, HAL
, IsTex
, Isidore ]
instance ToSchema ExternalAPIs where
declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
instance ToSchema URL where instance ToSchema URL where
declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO) declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
......
...@@ -130,7 +130,10 @@ deriveJSON (unPrefix "") 'ApiInfo ...@@ -130,7 +130,10 @@ deriveJSON (unPrefix "") 'ApiInfo
instance ToSchema ApiInfo instance ToSchema ApiInfo
info :: FlowCmdM env err m => UserId -> m ApiInfo info :: FlowCmdM env err m => UserId -> m ApiInfo
info _u = pure $ ApiInfo API.externalAPIs info _u = do
ext <- API.externalAPIs
pure $ ApiInfo ext
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -217,7 +220,8 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q ...@@ -217,7 +220,8 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
-- if cid is corpus -> add to corpus -- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private -- if cid is root -> create corpus in Private
printDebug "[G.A.N.C.New] getDataText with query" q printDebug "[G.A.N.C.New] getDataText with query" q
eTxts <- mapM (\db -> getDataText db (Multi l) q maybeLimit) [database2origin dbs] databaseOrigin <- database2origin dbs
eTxts <- mapM (\db -> getDataText db (Multi l) q maybeLimit) [databaseOrigin]
let lTxts = lefts eTxts let lTxts = lefts eTxts
printDebug "[G.A.N.C.New] lTxts" lTxts printDebug "[G.A.N.C.New] lTxts" lTxts
...@@ -242,7 +246,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q ...@@ -242,7 +246,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
, _scst_remaining = Just 0 , _scst_remaining = Just 0
, _scst_events = Just [] , _scst_events = Just []
} }
(err:_) -> do (err:_) -> do
printDebug "Error: " err printDebug "Error: " err
let jl = addEvent "ERROR" (T.pack $ show err) $ let jl = addEvent "ERROR" (T.pack $ show err) $
...@@ -283,7 +287,7 @@ addToCorpusWithForm user cid (NewWithForm ft ff d l _n) logStatus jobLog = do ...@@ -283,7 +287,7 @@ addToCorpusWithForm user cid (NewWithForm ft ff d l _n) logStatus jobLog = do
CSV -> Parser.parseFormatC Parser.CsvGargV3 CSV -> Parser.parseFormatC Parser.CsvGargV3
WOS -> Parser.parseFormatC Parser.WOS WOS -> Parser.parseFormatC Parser.WOS
PresseRIS -> Parser.parseFormatC Parser.RisPresse PresseRIS -> Parser.parseFormatC Parser.RisPresse
-- TODO granularity of the logStatus -- TODO granularity of the logStatus
let data' = case ff of let data' = case ff of
Plain -> cs d Plain -> cs d
...@@ -422,4 +426,3 @@ addToCorpusWithFile user cid nwf@(NewWithFile _d _l fName) logStatus = do ...@@ -422,4 +426,3 @@ addToCorpusWithFile user cid nwf@(NewWithFile _d _l fName) logStatus = do
, _scst_remaining = Just 0 , _scst_remaining = Just 0
, _scst_events = Just [] , _scst_events = Just []
} }
...@@ -4,6 +4,7 @@ module Gargantext.API.Node.Corpus.Types where ...@@ -4,6 +4,7 @@ module Gargantext.API.Node.Corpus.Types where
import Control.Lens hiding (elements, Empty) import Control.Lens hiding (elements, Empty)
import Control.Monad.Fail (fail) import Control.Monad.Fail (fail)
import Control.Monad.Reader (MonadReader)
import Data.Aeson import Data.Aeson
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Monoid (mempty) import Data.Monoid (mempty)
...@@ -19,6 +20,8 @@ import Gargantext.Prelude ...@@ -19,6 +20,8 @@ import Gargantext.Prelude
import qualified Gargantext.API.Admin.Orchestrator.Types as T import qualified Gargantext.API.Admin.Orchestrator.Types as T
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow (DataOrigin(..)) import Gargantext.Database.Action.Flow (DataOrigin(..))
import Gargantext.Database.Prelude (HasConfig(..))
import Gargantext.Prelude.Config (gc_pubmed_api_key)
data Database = Empty data Database = Empty
| PubMed | PubMed
...@@ -31,13 +34,17 @@ data Database = Empty ...@@ -31,13 +34,17 @@ data Database = Empty
deriveJSON (unPrefix "") ''Database deriveJSON (unPrefix "") ''Database
instance ToSchema Database instance ToSchema Database
database2origin :: Database -> DataOrigin database2origin :: ( MonadReader env m
database2origin Empty = InternalOrigin T.IsTex , HasConfig env ) => Database -> m DataOrigin
database2origin PubMed = ExternalOrigin T.PubMed database2origin Empty = pure $ InternalOrigin T.IsTex
database2origin Arxiv = ExternalOrigin T.Arxiv database2origin PubMed = do
database2origin HAL = ExternalOrigin T.HAL pubmed_api_key <- view $ hasConfig . gc_pubmed_api_key
database2origin IsTex = ExternalOrigin T.IsTex
database2origin Isidore = ExternalOrigin T.Isidore pure $ ExternalOrigin $ T.PubMed { mAPIKey = Just pubmed_api_key }
database2origin Arxiv = pure $ ExternalOrigin T.Arxiv
database2origin HAL = pure $ ExternalOrigin T.HAL
database2origin IsTex = pure $ ExternalOrigin T.IsTex
database2origin Isidore = pure $ ExternalOrigin T.Isidore
------------------------------------------------------------------------ ------------------------------------------------------------------------
data Datafield = Gargantext data Datafield = Gargantext
...@@ -66,4 +73,3 @@ instance ToSchema Datafield where ...@@ -66,4 +73,3 @@ instance ToSchema Datafield where
declareNamedSchema _ = do declareNamedSchema _ = do
return $ NamedSchema (Just "Datafield") $ mempty return $ NamedSchema (Just "Datafield") $ mempty
& type_ ?~ SwaggerObject & type_ ?~ SwaggerObject
...@@ -38,7 +38,7 @@ get :: ExternalAPIs ...@@ -38,7 +38,7 @@ get :: ExternalAPIs
-> Maybe Limit -> Maybe Limit
-- -> IO [HyperdataDocument] -- -> IO [HyperdataDocument]
-> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ())) -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get PubMed _la q limit = PUBMED.get q limit get PubMed { mAPIKey = mAPIKey } _la q limit = PUBMED.get mAPIKey q limit
--docs <- PUBMED.get q default_limit -- EN only by default --docs <- PUBMED.get q default_limit -- EN only by default
--pure (Just $ fromIntegral $ length docs, yieldMany docs) --pure (Just $ fromIntegral $ length docs, yieldMany docs)
get Arxiv la q limit = Arxiv.get la q (fromIntegral <$> limit) get Arxiv la q limit = Arxiv.get la q (fromIntegral <$> limit)
......
...@@ -14,6 +14,7 @@ module Gargantext.Core.Text.Corpus.API.Pubmed ...@@ -14,6 +14,7 @@ module Gargantext.Core.Text.Corpus.API.Pubmed
where where
import Conduit import Conduit
import Control.Monad.Reader (runReaderT)
import Data.Either (Either) import Data.Either (Either)
import Data.Maybe import Data.Maybe
import Data.Text (Text) import Data.Text (Text)
...@@ -26,18 +27,27 @@ import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) ...@@ -26,18 +27,27 @@ import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import qualified PUBMED as PubMed import qualified PUBMED as PubMed
import qualified PUBMED.Parser as PubMedDoc import qualified PUBMED.Parser as PubMedDoc
import PUBMED.Types (Config(..))
type Query = Text type Query = Text
type Limit = PubMed.Limit type Limit = Integer
-- | TODO put default pubmed query in gargantext.ini -- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs -- by default: 10K docs
get :: Query -> Maybe Limit -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ())) get :: Maybe Text
get q l = do -> Query
eRes <- PubMed.getMetadataWithC q l -> Maybe Limit
pure $ (\(len, docsC) -> (len, docsC .| mapC (toDoc EN))) <$> eRes -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get mAPIKey q l = do
eRes <- runReaderT PubMed.getMetadataWithC (Config { apiKey = mAPIKey
, query = q
, perPage = Nothing })
let takeLimit = case l of
Nothing -> mapC identity
Just l' -> takeC $ fromIntegral l'
pure $ (\(len, docsC) -> (len, docsC .| takeLimit .| mapC (toDoc EN))) <$> eRes
--either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN)) --either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
-- <$> PubMed.getMetadataWithC q l -- <$> PubMed.getMetadataWithC q l
...@@ -82,4 +92,3 @@ toDoc l (PubMedDoc.PubMed { pubmed_id ...@@ -82,4 +92,3 @@ toDoc l (PubMedDoc.PubMed { pubmed_id
abstract :: [Text] -> Maybe Text abstract :: [Text] -> Maybe Text
abstract [] = Nothing abstract [] = Nothing
abstract as' = Just $ Text.intercalate ", " as' abstract as' = Just $ Text.intercalate ", " as'
...@@ -50,6 +50,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list) ...@@ -50,6 +50,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
import Conduit import Conduit
import Control.Lens ((^.), view, _Just, makeLenses, over, traverse) import Control.Lens ((^.), view, _Just, makeLenses, over, traverse)
import Control.Monad.Reader (MonadReader)
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Conduit.Internal (zipSources) import Data.Conduit.Internal (zipSources)
import Data.Either import Data.Either
...@@ -129,9 +130,13 @@ deriveJSON (unPrefix "_do_") ''DataOrigin ...@@ -129,9 +130,13 @@ deriveJSON (unPrefix "_do_") ''DataOrigin
instance ToSchema DataOrigin where instance ToSchema DataOrigin where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_do_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_do_")
allDataOrigins :: [DataOrigin] allDataOrigins :: ( MonadReader env m
allDataOrigins = map InternalOrigin API.externalAPIs , HasConfig env) => m [DataOrigin]
<> map ExternalOrigin API.externalAPIs allDataOrigins = do
ext <- API.externalAPIs
pure $ map InternalOrigin ext
<> map ExternalOrigin ext
--------------- ---------------
data DataText = DataOld ![NodeId] data DataText = DataOld ![NodeId]
......
...@@ -115,7 +115,7 @@ toFilePath fp1 fp2 = fp1 <> "/" <> fp2 ...@@ -115,7 +115,7 @@ toFilePath fp1 fp2 = fp1 <> "/" <> fp2
-- | Disk operations -- | Disk operations
-- | For example, this write file with a random filepath -- | For example, this write file with a random filepath
-- better use a hash of json of Type used to parameter as input -- better use a hash of json of Type used to parameter as input
-- the functions -- the functions
writeFile :: ( MonadReader env m writeFile :: ( MonadReader env m
, HasConfig env , HasConfig env
...@@ -139,7 +139,7 @@ writeFile a = do ...@@ -139,7 +139,7 @@ writeFile a = do
--- ---
-- | Example to read a file with Type -- | Example to read a file with Type
readGargFile :: ( MonadReader env m readGargFile :: ( MonadReader env m
, HasConfig env , HasConfig env
, MonadBase IO m , MonadBase IO m
...@@ -205,4 +205,3 @@ onDisk_2 action fp1 fp2 = do ...@@ -205,4 +205,3 @@ onDisk_2 action fp1 fp2 = do
| isDoesNotExistError e = return () | isDoesNotExistError e = return ()
| otherwise = throwIO e | otherwise = throwIO e
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -32,8 +32,10 @@ ghc-options: ...@@ -32,8 +32,10 @@ ghc-options:
# "$everything": -haddock # "$everything": -haddock
extra-deps: extra-deps:
- git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git #- git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
commit: 03c3c381ba9df6da02a7a3c8d7b78cde9a380d04 # commit: 03c3c381ba9df6da02a7a3c8d7b78cde9a380d04
- git: https://gitlab.iscpif.fr/cgenie/haskell-gargantext-prelude
commit: 7b9656ce5180d2723301561b0715f06c7b905182
- git: https://gitlab.iscpif.fr/gargantext/gargantext-graph.git - git: https://gitlab.iscpif.fr/gargantext/gargantext-graph.git
commit: 588e104fe7593210956610cab0041fd16584a4ce commit: 588e104fe7593210956610cab0041fd16584a4ce
# Data Mining Libs # Data Mining Libs
...@@ -72,7 +74,8 @@ extra-deps: ...@@ -72,7 +74,8 @@ extra-deps:
# External Data API connectors # External Data API connectors
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: 364885c891cbadcd4d8a623d2e41394b09f653aa commit: 31cb4d28dcb5d17274cede5e67b2a01914379129
#commit: 364885c891cbadcd4d8a623d2e41394b09f653aa
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: a34bb341236d82cf3d488210bc1d8448a98f5808 commit: a34bb341236d82cf3d488210bc1d8448a98f5808
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment