Commit 7946fe77 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge remote-tracking branch 'origin/151-dev-pubmed-api-key' into dev

parents b19bcd6a 8d844d13
......@@ -22,6 +22,7 @@ DATA_FILEPATH = FILEPATH_TO_CHANGE
# Data path to local files (do not use quotes)
REPO_FILEPATH = FILEPATH_TO_CHANGE
PUBMED_API_KEY = ENTER_PUBMED_API_KEY
# [external]
......
......@@ -6,6 +6,7 @@ module Gargantext.API.Admin.Orchestrator.Types
where
import Control.Lens hiding (elements)
import Control.Monad.Reader (MonadReader)
import Data.Aeson
import Data.Morpheus.Types
( GQLType
......@@ -23,7 +24,9 @@ import Test.QuickCheck.Arbitrary
import qualified Gargantext.API.GraphQL.Utils as GQLU
import Gargantext.Core.Types (TODO(..))
import Gargantext.Database.Prelude (HasConfig(..))
import Gargantext.Prelude
import Gargantext.Prelude.Config (gc_pubmed_api_key)
------------------------------------------------------------------------
instance Arbitrary a => Arbitrary (JobStatus 'Safe a) where
......@@ -35,26 +38,41 @@ instance Arbitrary a => Arbitrary (JobOutput a) where
-- | Main Types
-- TODO IsidoreAuth
data ExternalAPIs = All
| PubMed
| PubMed { mAPIKey :: Maybe Text }
| Arxiv
| HAL
| IsTex
| Isidore
deriving (Show, Eq, Enum, Bounded, Generic)
deriving (Show, Eq, Generic)
-- | Main Instances
instance FromJSON ExternalAPIs
instance ToJSON ExternalAPIs
externalAPIs :: [ExternalAPIs]
externalAPIs = [minBound..maxBound]
externalAPIs :: ( MonadReader env m
, HasConfig env) => m [ExternalAPIs]
externalAPIs = do
pubmed_api_key <- view $ hasConfig . gc_pubmed_api_key
pure [ All
, PubMed { mAPIKey = Just pubmed_api_key }
, Arxiv
, HAL
, IsTex
, Isidore ]
instance Arbitrary ExternalAPIs
where
arbitrary = elements externalAPIs
instance ToSchema ExternalAPIs
arbitrary = elements [ All
, PubMed { mAPIKey = Nothing }
, Arxiv
, HAL
, IsTex
, Isidore ]
instance ToSchema ExternalAPIs where
declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
instance ToSchema URL where
declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
......
......@@ -130,7 +130,10 @@ deriveJSON (unPrefix "") 'ApiInfo
instance ToSchema ApiInfo
info :: FlowCmdM env err m => UserId -> m ApiInfo
info _u = pure $ ApiInfo API.externalAPIs
info _u = do
ext <- API.externalAPIs
pure $ ApiInfo ext
------------------------------------------------------------------------
------------------------------------------------------------------------
......@@ -217,7 +220,8 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
-- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private
printDebug "[G.A.N.C.New] getDataText with query" q
eTxts <- mapM (\db -> getDataText db (Multi l) q maybeLimit) [database2origin dbs]
databaseOrigin <- database2origin dbs
eTxts <- mapM (\db -> getDataText db (Multi l) q maybeLimit) [databaseOrigin]
let lTxts = lefts eTxts
printDebug "[G.A.N.C.New] lTxts" lTxts
......@@ -242,7 +246,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
, _scst_remaining = Just 0
, _scst_events = Just []
}
(err:_) -> do
printDebug "Error: " err
let jl = addEvent "ERROR" (T.pack $ show err) $
......@@ -283,7 +287,7 @@ addToCorpusWithForm user cid (NewWithForm ft ff d l _n) logStatus jobLog = do
CSV -> Parser.parseFormatC Parser.CsvGargV3
WOS -> Parser.parseFormatC Parser.WOS
PresseRIS -> Parser.parseFormatC Parser.RisPresse
-- TODO granularity of the logStatus
let data' = case ff of
Plain -> cs d
......@@ -422,4 +426,3 @@ addToCorpusWithFile user cid nwf@(NewWithFile _d _l fName) logStatus = do
, _scst_remaining = Just 0
, _scst_events = Just []
}
......@@ -4,6 +4,7 @@ module Gargantext.API.Node.Corpus.Types where
import Control.Lens hiding (elements, Empty)
import Control.Monad.Fail (fail)
import Control.Monad.Reader (MonadReader)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Monoid (mempty)
......@@ -19,6 +20,8 @@ import Gargantext.Prelude
import qualified Gargantext.API.Admin.Orchestrator.Types as T
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow (DataOrigin(..))
import Gargantext.Database.Prelude (HasConfig(..))
import Gargantext.Prelude.Config (gc_pubmed_api_key)
data Database = Empty
| PubMed
......@@ -31,13 +34,17 @@ data Database = Empty
deriveJSON (unPrefix "") ''Database
instance ToSchema Database
database2origin :: Database -> DataOrigin
database2origin Empty = InternalOrigin T.IsTex
database2origin PubMed = ExternalOrigin T.PubMed
database2origin Arxiv = ExternalOrigin T.Arxiv
database2origin HAL = ExternalOrigin T.HAL
database2origin IsTex = ExternalOrigin T.IsTex
database2origin Isidore = ExternalOrigin T.Isidore
database2origin :: ( MonadReader env m
, HasConfig env ) => Database -> m DataOrigin
database2origin Empty = pure $ InternalOrigin T.IsTex
database2origin PubMed = do
pubmed_api_key <- view $ hasConfig . gc_pubmed_api_key
pure $ ExternalOrigin $ T.PubMed { mAPIKey = Just pubmed_api_key }
database2origin Arxiv = pure $ ExternalOrigin T.Arxiv
database2origin HAL = pure $ ExternalOrigin T.HAL
database2origin IsTex = pure $ ExternalOrigin T.IsTex
database2origin Isidore = pure $ ExternalOrigin T.Isidore
------------------------------------------------------------------------
data Datafield = Gargantext
......@@ -66,4 +73,3 @@ instance ToSchema Datafield where
declareNamedSchema _ = do
return $ NamedSchema (Just "Datafield") $ mempty
& type_ ?~ SwaggerObject
......@@ -38,7 +38,7 @@ get :: ExternalAPIs
-> Maybe Limit
-- -> IO [HyperdataDocument]
-> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get PubMed _la q limit = PUBMED.get q limit
get PubMed { mAPIKey = mAPIKey } _la q limit = PUBMED.get mAPIKey q limit
--docs <- PUBMED.get q default_limit -- EN only by default
--pure (Just $ fromIntegral $ length docs, yieldMany docs)
get Arxiv la q limit = Arxiv.get la q (fromIntegral <$> limit)
......
......@@ -14,6 +14,7 @@ module Gargantext.Core.Text.Corpus.API.Pubmed
where
import Conduit
import Control.Monad.Reader (runReaderT)
import Data.Either (Either)
import Data.Maybe
import Data.Text (Text)
......@@ -26,18 +27,27 @@ import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import qualified PUBMED as PubMed
import qualified PUBMED.Parser as PubMedDoc
import PUBMED.Types (Config(..))
type Query = Text
type Limit = PubMed.Limit
type Limit = Integer
-- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs
get :: Query -> Maybe Limit -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get q l = do
eRes <- PubMed.getMetadataWithC q l
pure $ (\(len, docsC) -> (len, docsC .| mapC (toDoc EN))) <$> eRes
get :: Maybe Text
-> Query
-> Maybe Limit
-> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get mAPIKey q l = do
eRes <- runReaderT PubMed.getMetadataWithC (Config { apiKey = mAPIKey
, query = q
, perPage = Nothing })
let takeLimit = case l of
Nothing -> mapC identity
Just l' -> takeC $ fromIntegral l'
pure $ (\(len, docsC) -> (len, docsC .| takeLimit .| mapC (toDoc EN))) <$> eRes
--either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
-- <$> PubMed.getMetadataWithC q l
......@@ -82,4 +92,3 @@ toDoc l (PubMedDoc.PubMed { pubmed_id
abstract :: [Text] -> Maybe Text
abstract [] = Nothing
abstract as' = Just $ Text.intercalate ", " as'
......@@ -50,6 +50,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
import Conduit
import Control.Lens ((^.), view, _Just, makeLenses, over, traverse)
import Control.Monad.Reader (MonadReader)
import Data.Aeson.TH (deriveJSON)
import Data.Conduit.Internal (zipSources)
import Data.Either
......@@ -129,9 +130,13 @@ deriveJSON (unPrefix "_do_") ''DataOrigin
instance ToSchema DataOrigin where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_do_")
allDataOrigins :: [DataOrigin]
allDataOrigins = map InternalOrigin API.externalAPIs
<> map ExternalOrigin API.externalAPIs
allDataOrigins :: ( MonadReader env m
, HasConfig env) => m [DataOrigin]
allDataOrigins = do
ext <- API.externalAPIs
pure $ map InternalOrigin ext
<> map ExternalOrigin ext
---------------
data DataText = DataOld ![NodeId]
......
......@@ -115,7 +115,7 @@ toFilePath fp1 fp2 = fp1 <> "/" <> fp2
-- | Disk operations
-- | For example, this write file with a random filepath
-- better use a hash of json of Type used to parameter as input
-- better use a hash of json of Type used to parameter as input
-- the functions
writeFile :: ( MonadReader env m
, HasConfig env
......@@ -139,7 +139,7 @@ writeFile a = do
---
-- | Example to read a file with Type
-- | Example to read a file with Type
readGargFile :: ( MonadReader env m
, HasConfig env
, MonadBase IO m
......@@ -205,4 +205,3 @@ onDisk_2 action fp1 fp2 = do
| isDoesNotExistError e = return ()
| otherwise = throwIO e
------------------------------------------------------------------------
......@@ -32,8 +32,10 @@ ghc-options:
# "$everything": -haddock
extra-deps:
- git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
commit: 03c3c381ba9df6da02a7a3c8d7b78cde9a380d04
#- git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
# commit: 03c3c381ba9df6da02a7a3c8d7b78cde9a380d04
- git: https://gitlab.iscpif.fr/cgenie/haskell-gargantext-prelude
commit: 7b9656ce5180d2723301561b0715f06c7b905182
- git: https://gitlab.iscpif.fr/gargantext/gargantext-graph.git
commit: 588e104fe7593210956610cab0041fd16584a4ce
# Data Mining Libs
......@@ -72,7 +74,8 @@ extra-deps:
# External Data API connectors
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: 364885c891cbadcd4d8a623d2e41394b09f653aa
commit: 31cb4d28dcb5d17274cede5e67b2a01914379129
#commit: 364885c891cbadcd4d8a623d2e41394b09f653aa
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: a34bb341236d82cf3d488210bc1d8448a98f5808
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment