...
 
......@@ -42,31 +42,33 @@ workerCLI (CLIW_run (WorkerArgs { .. })) = do
$ List.take 72
$ List.cycle ["_"]) :: Prelude.String)
___
putText "GarganText worker"
putText $ "worker_name: " <> worker_name
putText $ "worker toml: " <> T.pack (_SettingsFile worker_toml)
___
withWorkerEnv worker_toml $ \env -> do
let ws = env ^. hasConfig . gc_worker
case findDefinitionByName ws worker_name of
Nothing -> do
let workerNames = _wdName <$> (_wsDefinitions ws)
let availableWorkers = T.intercalate ", " workerNames
putText $ "Worker definition not found! Available workers: " <> availableWorkers
Just wd -> do
putText $ "Starting worker '" <> worker_name <> "'"
putText $ "gc config: " <> show (env ^. hasConfig)
putText $ "Worker settings: " <> show ws
___
if worker_run_single then
withPGMQWorkerSingleCtrlC env wd $ \a _state -> do
wait a
else
withPGMQWorkerCtrlC env wd $ \a _state -> do
-- _ <- runReaderT (sendJob Ping) env
wait a
let log_cfg = env ^. hasConfig . gc_logging
withLogger log_cfg $ \ioLogger -> do
___
logMsg ioLogger INFO "GarganText worker"
logMsg ioLogger INFO $ "worker_name: " <> T.unpack worker_name
logMsg ioLogger INFO $ "worker toml: " <> _SettingsFile worker_toml
___
let ws = env ^. hasConfig . gc_worker
case findDefinitionByName ws worker_name of
Nothing -> do
let workerNames = _wdName <$> (_wsDefinitions ws)
let availableWorkers = T.intercalate ", " workerNames
putText $ "Worker definition not found! Available workers: " <> availableWorkers
Just wd -> do
logMsg ioLogger INFO $ "Starting worker '" <> T.unpack worker_name <> "'"
logMsg ioLogger DEBUG $ "gc config: " <> show (env ^. hasConfig)
logMsg ioLogger DEBUG $ "Worker settings: " <> show ws
___
if worker_run_single then
withPGMQWorkerSingleCtrlC env wd $ \a _state -> do
wait a
else
withPGMQWorkerCtrlC env wd $ \a _state -> do
-- _ <- runReaderT (sendJob Ping) env
wait a
workerCLI (CLIW_runAll (WorkerAllArgs { .. })) = withWorkerEnv worker_toml $ \env -> do
let log_cfg = env ^. hasConfig . gc_logging
withLogger log_cfg $ \ioLogger -> runAllWorkers ioLogger worker_toml
......
......@@ -16,7 +16,7 @@ fi
# with the `sha256sum` result calculated on the `cabal.project` and
# `cabal.project.freeze`. This ensures the files stay deterministic so that CI
# cache can kick in.
expected_cabal_project_hash="37352ca36ca5e69d9945da11439be4c3909297b338242855fa588dffdf1ba02b"
expected_cabal_project_hash="0ce011cd483078936fe3385b3e0c90231774fd2ac05bfb9f4c646345a0208d66"
expected_cabal_project_freeze_hash="cd52143d3a9d285360b59c6371d3e258552c1bc115bd612024db3de1f7593ff7"
......
......@@ -72,7 +72,7 @@ source-repository-package
source-repository-package
type: git
location: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
tag: 521ca54f1502b13f629eff2223aaf5007e6d52ec
tag: 894482ef97eadce6b1c13ebced1edfe394b5be5e
source-repository-package
type: git
......
version: '3'
name: 'gargantext'
services:
caddy:
image: caddy:alpine
ports:
- 8108:8108
volumes:
- ./Caddyfile:/etc/caddy/Caddyfile:ro
- ../../purescript-gargantext:/srv/purescript-gargantext:ro
# caddy:
# image: caddy:alpine
# ports:
# - 8108:8108
# volumes:
# - ./Caddyfile:/etc/caddy/Caddyfile:ro
# - ../../purescript-gargantext:/srv/purescript-gargantext:ro
#postgres11:
# #image: 'postgres:latest'
......
......@@ -134,6 +134,8 @@ library
Gargantext.API.Node
Gargantext.API.Node.Contact.Types
Gargantext.API.Node.Corpus.Annuaire
Gargantext.API.Node.Corpus.Export.Types
Gargantext.API.Node.Corpus.Export.Utils
Gargantext.API.Node.Corpus.New
Gargantext.API.Node.Corpus.New.Types
Gargantext.API.Node.Corpus.Types
......@@ -344,7 +346,6 @@ library
Gargantext.API.Ngrams.NgramsTree
Gargantext.API.Node.Contact
Gargantext.API.Node.Corpus.Export
Gargantext.API.Node.Corpus.Export.Types
Gargantext.API.Node.Corpus.Searx
Gargantext.API.Node.Corpus.Subcorpus
Gargantext.API.Node.Document.Export
......@@ -621,6 +622,7 @@ library
, singletons-th >= 3.1 && < 3.3
, smtp-mail >= 0.3.0.0
, split >= 0.2.3.4
, sqlite-simple >= 0.4.19 && < 0.5
, stemmer == 0.5.2
, stm >= 2.5.1.0 && < 2.6
, stm-containers >= 1.2.0.3 && < 1.3
......@@ -759,6 +761,7 @@ common commonTestDependencies
, servant-client >= 0.20 && < 0.21
, servant-client-core >= 0.20 && < 0.21
, servant-conduit >= 0.15 && < 0.17
, servant-server >= 0.20.1 && < 0.21
, shelly >= 1.9 && < 2
, stm >= 2.5.1.0 && < 2.6
, streaming-commons
......@@ -868,12 +871,15 @@ test-suite garg-test-hspec
main-is: drivers/hspec/Main.hs
build-depends:
process ^>= 1.6.18.0
, servant >= 0.20.1 && < 0.21
, sqlite-simple >= 0.4.19 && < 0.5
, unix >= 2.7.3 && < 2.9
other-modules:
Paths_gargantext
Test.API
Test.API.Authentication
Test.API.Errors
Test.API.Export
Test.API.GraphQL
Test.API.Notifications
Test.API.Private
......
......@@ -17,6 +17,7 @@ Portability : POSIX
module Gargantext.API.Ngrams.List
where
import Data.Aeson qualified as Aeson
import Data.ByteString.Lazy qualified as BSL
import Data.Csv qualified as Tsv
import Data.HashMap.Strict (HashMap)
......@@ -25,8 +26,10 @@ import Data.Map.Strict qualified as Map
import Data.Map.Strict (toList)
import Data.Set qualified as Set
import Data.Text (concat, pack, splitOn)
import Data.Text.Encoding qualified as TE
import Data.Vector qualified as Vec
import Data.Vector (Vector)
import Database.PostgreSQL.Simple.LargeObjects qualified as PSQL
import Gargantext.API.Admin.EnvTypes (Env)
import Gargantext.API.Errors.Types (BackendInternalError(InternalServerError))
import Gargantext.API.Ngrams.List.Types (_wjf_data, _wtf_data)
......@@ -35,25 +38,25 @@ import Gargantext.API.Ngrams (setListNgrams)
import Gargantext.API.Ngrams.Types
import Gargantext.API.Prelude (GargM, serverError, HasServerError)
import Gargantext.API.Routes.Named.List qualified as Named
import Gargantext.API.Worker (serveWorkerAPI, serveWorkerAPIEJob)
import Gargantext.API.Worker (serveWorkerAPIm)
import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType(NgramsTerms))
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Worker.Jobs.Types qualified as Jobs
import Gargantext.Database.Action.Flow (reIndexWith)
import Gargantext.Database.Admin.Types.Node ( NodeId(_NodeId), ListId )
import Gargantext.Database.Prelude (createLargeObject)
import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Schema.Ngrams ( text2ngrams, NgramsId )
import Gargantext.Database.Schema.Node (_node_parent_id)
import Gargantext.Database.Types (Indexed(..))
import Gargantext.Prelude hiding (concat, toList)
import Gargantext.System.Logging (logLocM, MonadLogger)
import Gargantext.System.Logging (logLocM, LogLevel(..), MonadLogger)
import Gargantext.Utils.Jobs.Monad (MonadJobStatus(..))
import Prelude qualified
import Protolude qualified as P
import Servant
import Servant.Server.Generic (AsServerT)
import Gargantext.System.Logging (LogLevel(..))
getAPI :: Named.GETAPI (AsServerT (GargM Env BackendInternalError))
......@@ -111,9 +114,11 @@ getTsv lId = do
------------------------------------------------------------------------
jsonPostAsync :: Named.JSONAPI (AsServerT (GargM Env BackendInternalError))
jsonPostAsync = Named.JSONAPI {
updateListJSONEp = \lId -> serveWorkerAPI $ \p ->
Jobs.JSONPost { _jp_list_id = lId
, _jp_ngrams_list = _wjf_data p }
updateListJSONEp = \lId -> serveWorkerAPIm $ \p -> do
(PSQL.Oid oId) <- createLargeObject $ TE.encodeUtf8 $ _wjf_data p
pure $ Jobs.JSONPost { _jp_list_id = lId
, _jp_ngrams_oid = fromIntegral oId }
-- , _jp_ngrams_list = _wjf_data p }
}
------------------------------------------------------------------------
......@@ -159,11 +164,14 @@ tsvAPI = tsvPostAsync
tsvPostAsync :: Named.TSVAPI (AsServerT (GargM Env BackendInternalError))
tsvPostAsync =
Named.TSVAPI {
updateListTSVEp = \lId -> serveWorkerAPIEJob $ \p ->
updateListTSVEp = \lId -> serveWorkerAPIm $ \p -> do
case ngramsListFromTSVData (_wtf_data p) of
Left err -> Left $ InternalServerError $ err500 { errReasonPhrase = err }
Right ngramsList -> Right $ Jobs.JSONPost { _jp_list_id = lId
, _jp_ngrams_list = ngramsList }
Left err -> throwError $ InternalServerError $ err500 { errReasonPhrase = err }
Right ngramsList -> do
(PSQL.Oid oId) <- createLargeObject $ BSL.toStrict $ Aeson.encode ngramsList
pure $ Jobs.JSONPost { _jp_list_id = lId
, _jp_ngrams_oid = fromIntegral oId }
-- , _jp_ngrams_list = ngramsList }
}
-- | Tries converting a text file into an 'NgramList', so that we can reuse the
......
......@@ -9,15 +9,13 @@ Portability : POSIX
-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Ngrams.List.Types where
import Data.Aeson
import Data.ByteString.Lazy qualified as BSL
-- import Data.ByteString.Lazy qualified as BSL
import Data.Swagger (ToSchema, declareNamedSchema, genericDeclareNamedSchema)
import Data.Text
import Data.Text.Encoding qualified as E
-- import Data.Text.Encoding qualified as E
import Gargantext.API.Ngrams.Types (NgramsList)
import Gargantext.API.Node.Corpus.New.Types (FileType(..))
import Gargantext.Core.Utils.Aeson (jsonOptions)
......@@ -46,16 +44,18 @@ instance ToSchema WithFile where
------------------------------------------------------------------------
data WithJsonFile = WithJsonFile
{ _wjf_data :: !NgramsList
{ -- _wjf_data :: !NgramsList
_wjf_data :: !Text
, _wjf_name :: !Text
} deriving (Eq, Show, Generic)
instance FromForm WithJsonFile where
fromForm f = do
d' <- parseUnique "_wjf_data" f
d <- case eitherDecode' (BSL.fromStrict $ E.encodeUtf8 d') of
Left s -> Left $ pack s
Right v -> Right v
d <- parseUnique "_wjf_data" f
-- d' <- parseUnique "_wjf_data" f
-- d <- case eitherDecode' (BSL.fromStrict $ E.encodeUtf8 d') of
-- Left s -> Left $ pack s
-- Right v -> Right v
n <- parseUnique "_wjf_name" f
pure $ WithJsonFile { _wjf_data = d
, _wjf_name = n }
......
......@@ -34,7 +34,7 @@ data NgramsTree = NgramsTree { mt_label :: Text
, mt_value :: Double
, mt_children :: [NgramsTree]
}
deriving (Generic, Show)
deriving (Generic, Show, Eq)
toNgramsTree :: Tree (NgramsTerm,Double) -> NgramsTree
toNgramsTree (Node (NgramsTerm l,v) xs) = NgramsTree l v (map toNgramsTree xs)
......
......@@ -22,7 +22,6 @@ Node API
-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node
......@@ -271,7 +270,7 @@ genericNodeAPI' _ authenticatedUser targetNode = Named.NodeAPI
, moveAPI = Named.MoveAPI $ \parentId ->
withPolicy authenticatedUser (moveChecks (SourceId targetNode) (TargetId parentId)) $
moveNode loggedInUserId targetNode parentId
, fileAPI = Named.FileAPI $ fileApi targetNode
, fileAPI = Named.FileAPI { fileDownloadEp = fileApi targetNode }
, fileAsyncAPI = fileAsyncApi authenticatedUser targetNode
, dfwnAPI = DFWN.api authenticatedUser targetNode
, documentUploadAPI = DocumentUpload.api targetNode
......
{-# LANGUAGE TypeOperators #-}
{-|
Module : Gargantext.API.Node.Corpus.Export
Description : Corpus export
......@@ -17,27 +16,22 @@ Main exports of Gargantext:
module Gargantext.API.Node.Corpus.Export
where
import Data.HashMap.Strict qualified as HashMap
import Control.Exception.Safe qualified as CES
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text (pack)
import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
import Gargantext.API.Ngrams.Tools (getRepo)
import Gargantext.API.Ngrams.Types ( NgramsTerm(unNgramsTerm) )
import Gargantext.API.Node.Corpus.Export.Types ( Corpus(..) )
import Gargantext.API.Node.Corpus.Export.Types ( Corpus(..), CorpusSQLite(..) )
import Gargantext.API.Node.Corpus.Export.Utils (getContextNgrams, mkCorpusSQLite, mkCorpusSQLiteData)
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.API.Prelude (IsGargServer)
import Gargantext.Core.NodeStory.Types ( NodeListStory )
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Types.Main ( ListType(MapTerm) )
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Prelude (DBCmdExtra)
import Gargantext.Database.Query.Table.Node ( defaultList )
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Context (_context_id)
import Gargantext.Prelude hiding (hash)
......@@ -48,10 +42,13 @@ import qualified Gargantext.API.Routes.Named.Corpus as Named
--------------------------------------------------
-- | Hashes are ordered by Set
getCorpus :: forall env err m. IsGargServer env err m
getCorpus :: (CES.MonadMask m, IsGargServer env err m)
=> CorpusId
-> Named.CorpusExportAPI (AsServerT m)
getCorpus cId = Named.CorpusExportAPI $ \lId nt' -> get_corpus lId nt'
getCorpus cId = Named.CorpusExportAPI {
Named.corpusExportEp = get_corpus
, Named.corpusSQLiteEp = getCorpusSQLite cId
}
where
get_corpus :: IsGargServer env err m
......@@ -89,23 +86,16 @@ getCorpus cId = Named.CorpusExportAPI $ \lId nt' -> get_corpus lId nt'
$ Corpus { _c_corpus = Map.elems r
, _c_hash = hash $ List.map DocumentExport._d_hash $ Map.elems r }
getContextNgrams :: HasNodeError err
=> CorpusId
-> ListId
-> ListType
-> NgramsType
-> NodeListStory
-> DBCmdExtra err (Map ContextId (Set NgramsTerm))
getContextNgrams cId lId listType nt repo = do
-- lId <- case lId' of
-- Nothing -> defaultList cId
-- Just l -> pure l
lIds <- selectNodesWithUsername NodeList userMaster
let ngs = filterListWithRoot [listType] $ mapTermListRoot [lId] nt repo
-- TODO HashMap
r <- getNgramsByContextOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
pure r
getCorpusSQLite :: (CES.MonadMask m, IsGargServer env err m)
=> CorpusId
-> Maybe ListId
-> m (Headers '[Header "Content-Disposition" Text] CorpusSQLite)
getCorpusSQLite cId lId = do
corpusSQLiteData <- mkCorpusSQLiteData cId lId
corpusSQLite <- liftBase $ mkCorpusSQLite corpusSQLiteData
pure $ addHeader ("attachment; filename=GarganText_corpus-" <> pack (show cId) <> ".sqlite")
$ corpusSQLite
-- TODO
-- Exports List
......
......@@ -13,14 +13,21 @@ Portability : POSIX
module Gargantext.API.Node.Corpus.Export.Types where
import Data.Aeson.TH (deriveJSON)
import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema, ToParamSchema(..) )
import Data.Text (Text)
import GHC.Generics (Generic)
import Data.ByteString.Lazy qualified as BSL
import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema, ToParamSchema(..), NamedSchema(..), binarySchema )
import Data.Time.Clock (UTCTime)
import Data.Version (Version)
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.Core.Types ( TODO )
import Gargantext.API.Ngrams.Types (NgramsTerm)
import Gargantext.Core.Types ( CorpusId, ListId, TODO )
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Servant
import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument)
import Gargantext.Database.Admin.Types.Hyperdata.List (HyperdataList)
import Gargantext.Database.Admin.Types.Node (ContextId, NodeId)
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash qualified as H
import Servant (Accept(..), MimeRender(mimeRender), MimeUnrender(mimeUnrender), OctetStream)
-- Corpus Export
......@@ -37,3 +44,46 @@ instance ToSchema Corpus where
instance ToParamSchema Corpus where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
$(deriveJSON (unPrefix "_c_") ''Corpus)
-- | Wrapper around 'ByteString' to return an SQLite db containing
-- corpus
newtype CorpusSQLite =
CorpusSQLite { _cs_bs :: BSL.ByteString }
deriving (Generic, NFData)
instance Accept CorpusSQLite where
contentType _ = contentType (Proxy :: Proxy OctetStream)
instance MimeRender OctetStream CorpusSQLite where
mimeRender _ (CorpusSQLite bs) = bs
-- | Needed for tests
instance MimeUnrender OctetStream CorpusSQLite where
mimeUnrender _ bs = Right $ CorpusSQLite { _cs_bs = bs }
instance ToSchema CorpusSQLite where
declareNamedSchema _ = pure $ NamedSchema (Just "CorpusSQLite") binarySchema
-- | Contents of the SQLite export DB
-- (having such datatype makes it easier to coherently implement import/export)
data CorpusSQLiteData =
CorpusSQLiteData { _csd_version :: Version
, _csd_cId :: CorpusId
, _csd_lId :: ListId
, _csd_created :: UTCTime
, _csd_corpus_name :: Text
, _csd_corpus_hash :: Maybe H.Hash
, _csd_corpus_parent :: Maybe NodeId
, _csd_corpus_hyperdata :: HyperdataCorpus
, _csd_list_name :: Text
, _csd_list_parent :: Maybe NodeId
, _csd_list_hyperdata :: HyperdataList
, _csd_contexts :: [(NodeId, Text, UTCTime, HyperdataDocument)]
, _csd_map_context_ngrams :: Map ContextId (Set NgramsTerm)
, _csd_stop_context_ngrams :: Map ContextId (Set NgramsTerm)
, _csd_candidate_context_ngrams :: Map ContextId (Set NgramsTerm)
} deriving (Show, Eq, Generic)
This diff is collapsed.
......@@ -21,15 +21,15 @@ module Gargantext.API.Node.Corpus.New
import Conduit ((.|), yieldMany, mapMC, mapC, transPipe)
import Control.Exception.Safe (MonadMask)
import Control.Lens ( view, non )
import Data.ByteString.Base64 qualified as BSB64
import Data.Conduit.Internal (zipSources)
import Data.Swagger ( ToSchema(..) )
import Data.Text.Encoding qualified as TE
import Data.Text qualified as T
import Database.PostgreSQL.Simple.LargeObjects qualified as PSQL
import Gargantext.API.Admin.Orchestrator.Types qualified as API
import Gargantext.API.Ngrams (commitStatePatch, Versioned(..))
import Gargantext.API.Node.Corpus.New.Types ( FileFormat(..), FileType(..) )
import Gargantext.API.Node.Corpus.New.Types ( FileType(..) )
import Gargantext.API.Node.Corpus.Searx ( triggerSearxSearch )
import Gargantext.API.Node.Corpus.Types ( Datafield(Web), datafield2origin )
import Gargantext.API.Node.Corpus.Update (addLanguageToCorpus)
......@@ -51,7 +51,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document ( ToHyperdataDocument(
import Gargantext.Database.Admin.Types.Hyperdata.File ( HyperdataFile(..) )
import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..), ParentId)
import Gargantext.Database.GargDB qualified as GargDB
import Gargantext.Database.Prelude (IsDBCmd)
import Gargantext.Database.Prelude (readLargeObject, IsDBCmd)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node (getNodeWith, getOrMkList)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
......@@ -214,28 +214,29 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
$(logLocM) ERROR $ "[addToCorpusWithQuery] error: " <> show err -- log the full error
markFailed (Just err) jobHandle
addToCorpusWithForm :: ( FlowCmdM env err m
, MonadJobStatus m
, HasNodeStoryImmediateSaver env
, HasNodeArchiveStoryImmediateSaver env
)
=> User
-> CorpusId
-> NewWithForm
-> JobHandle m
-> m ()
addToCorpusWithForm user cid nwf jobHandle = do
addToCorpusWithTempFile :: ( MonadMask m
, FlowCmdM env err m
, MonadJobStatus m
, HasNodeStoryImmediateSaver env
, HasNodeArchiveStoryImmediateSaver env
)
=> User
-> CorpusId
-> NewWithTempFile
-> JobHandle m
-> m ()
addToCorpusWithTempFile user cid nwtf jobHandle = do
-- printDebug "[addToCorpusWithForm] Parsing corpus: " cid
-- printDebug "[addToCorpusWithForm] fileType" ft
-- printDebug "[addToCorpusWithForm] fileFormat" ff
let l = nwf ^. wf_lang . non defaultLanguage
let l = nwtf ^. wtf_lang . non defaultLanguage
addLanguageToCorpus cid l
limit' <- view $ hasConfig . gc_jobs . jc_max_docs_parsers
let limit = fromIntegral limit' :: Integer
let
parseC = case (nwf ^. wf_filetype) of
parseC = case nwtf ^. wtf_filetype of
TSV -> Parser.parseFormatC Parser.TsvGargV3
TSV_HAL -> Parser.parseFormatC Parser.TsvHal
Iramuteq -> Parser.parseFormatC Parser.Iramuteq
......@@ -245,12 +246,11 @@ addToCorpusWithForm user cid nwf jobHandle = do
WOS -> Parser.parseFormatC Parser.WOS
-- TODO granularity of the logStatus
let data' = case (nwf ^. wf_fileformat) of
Plain -> cs (nwf ^. wf_data)
ZIP -> case BSB64.decode $ TE.encodeUtf8 (nwf ^. wf_data) of
Left err -> panicTrace $ T.pack "[addToCorpusWithForm] error decoding base64: " <> T.pack err
Right decoded -> decoded
eDocsC <- liftBase $ parseC (nwf ^. wf_fileformat) data'
let oId = PSQL.Oid $ fromIntegral $ nwtf ^. wtf_file_oid
data' <- readLargeObject oId
-- $(logLocM) DEBUG $ "[addToCorpusWithTempFile] size: " <> show size
-- $(logLocM) DEBUG $ "[addToCorpusWithTempFile] data': " <> TE.decodeUtf8 data'
eDocsC <- liftBase $ parseC (nwtf ^. wtf_fileformat) data'
case eDocsC of
Right (count, docsC) -> do
-- TODO Add progress (jobStatus) update for docs - this is a
......@@ -260,7 +260,7 @@ addToCorpusWithForm user cid nwf jobHandle = do
.| mapMC (\(idx, doc) ->
if idx > limit then do
--printDebug "[addToCorpusWithForm] number of docs exceeds the limit" (show limit)
let panicMsg' = [ "[addToCorpusWithForm] number of docs "
let panicMsg' = [ "[addToCorpusWithTempFile] number of docs "
, "exceeds the MAX_DOCS_PARSERS limit ("
, show limit
, ")" ]
......@@ -280,7 +280,7 @@ addToCorpusWithForm user cid nwf jobHandle = do
_cid' <- flowCorpus (MkCorpusUserNormalCorpusIds user [cid])
(Multi l)
(Just (nwf ^. wf_selection))
(Just (nwtf ^. wtf_selection))
--(Just $ fromIntegral $ length docs, docsC')
(count, transPipe liftBase docsC') -- TODO fix number of docs
--(map (map toHyperdataDocument) docs)
......@@ -295,7 +295,7 @@ addToCorpusWithForm user cid nwf jobHandle = do
markComplete jobHandle
Left parseErr -> do
$(logLocM) ERROR $ "[addToCorpusWithForm] parse error: " <> (Parser._ParseFormatError parseErr)
$(logLocM) ERROR $ "[addToCorpusWithTempFile] parse error: " <> Parser._ParseFormatError parseErr
markFailed (Just parseErr) jobHandle
{-
......
......@@ -23,7 +23,7 @@ import Data.Tuple.Select (sel1, sel2, sel3)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Config (GargConfig(..), hasConfig)
import Gargantext.Core.Config.Types (FramesConfig(..))
import Gargantext.Core.NLP (HasNLPServer, nlpServerGet)
import Gargantext.Core.NLP (HasNLPServer)
import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Corpus.Query qualified as Query
import Gargantext.Core.Text.Terms (TermType(..))
......@@ -126,7 +126,6 @@ insertSearxResponse :: ( MonadBase IO m
-> m ()
insertSearxResponse _ _ _ _ (Left _) = pure ()
insertSearxResponse user cId listId l (Right (SearxResponse { _srs_results })) = do
server <- view (nlpServerGet l)
-- docs :: [Either Text HyperdataDocument]
let docs = hyperdataDocumentFromSearxResult l <$> _srs_results
--printDebug "[triggerSearxSearch] docs" docs
......@@ -141,7 +140,7 @@ insertSearxResponse user cId listId l (Right (SearxResponse { _srs_results })) =
-}
--_ <- flowDataText user (DataNew [docs']) (Multi l) cId Nothing logStatus
let mCorpus = Nothing :: Maybe HyperdataCorpus
void $ addDocumentsToHyperCorpus server mCorpus (Multi l) cId docs'
void $ addDocumentsToHyperCorpus mCorpus (Multi l) cId docs'
_ <- buildSocialList l user cId listId mCorpus Nothing
......
......@@ -27,7 +27,7 @@ import Gargantext.API.Prelude ( GargM )
import Gargantext.API.Routes.Named.Document qualified as Named
import Gargantext.API.Worker (serveWorkerAPI)
import Gargantext.Core (Lang(..))
import Gargantext.Core.NLP (nlpServerGet, HasNLPServer)
import Gargantext.Core.NLP (HasNLPServer)
import Gargantext.Core.NodeStory.Types (HasNodeStoryEnv, HasNodeArchiveStoryImmediateSaver)
import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit)
import Gargantext.Core.Text.Terms (TermType(..))
......@@ -99,8 +99,7 @@ documentUpload nId doc = do
, _hd_institutes_tree = Nothing }
let lang = EN
ncs <- view $ nlpServerGet lang
addDocumentsToHyperCorpus ncs (Nothing :: Maybe HyperdataCorpus) (Multi lang) cId [hd]
addDocumentsToHyperCorpus (Nothing :: Maybe HyperdataCorpus) (Multi lang) cId [hd]
-- | Imports the documents contained into this 'DocumentExport' into this (local) version
-- of the running node.
......@@ -122,9 +121,8 @@ remoteImportDocuments :: ( HasNodeError err
-> m [NodeId]
remoteImportDocuments loggedInUser corpusId nodeId WorkSplit{..} documents = do
let la = Multi EN
nlpServerConfig <- view $ nlpServerGet (_tt_lang la)
$(logLocM) INFO $ "Importing " <> T.pack (show _ws_current) <> "/" <> T.pack (show _ws_total) <> " documents for corpus node " <> T.pack (show nodeId)
docs <- addDocumentsToHyperCorpus nlpServerConfig (Nothing :: Maybe HyperdataCorpus) la corpusId (map (_node_hyperdata . _d_document) documents)
docs <- addDocumentsToHyperCorpus (Nothing :: Maybe HyperdataCorpus) la corpusId (map (_node_hyperdata . _d_document) documents)
_versioned <- commitCorpus corpusId (RootId $ _auth_node_id loggedInUser)
$(logLocM) INFO $ "Done importing " <> T.pack (show _ws_current) <> "/" <> T.pack (show _ws_total) <> " documents for corpus node " <> T.pack (show nodeId)
pure docs
......@@ -11,7 +11,6 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-unused-matches #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE IncoherentInstances #-}
module Gargantext.API.Node.File where
......@@ -60,9 +59,7 @@ fileDownload nId = do
Contents c <- GargDB.readGargFile $ T.unpack path
let (mMime, _) = DMT.guessType DMT.defaultmtd False $ T.unpack name'
mime = case mMime of
Just m -> m
Nothing -> "text/plain"
mime = fromMaybe "text/plain" mMime
pure $ addHeader (T.pack mime) $ BSResponse c
......@@ -118,3 +115,4 @@ addWithFile authenticatedUser nId nwf@(NewWithFile _d _l fName) jobHandle = do
markComplete jobHandle
where
userId = authenticatedUser ^. auth_user_id
{-|
Module : Gargantext.API.Node.File.Types
Description :
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.API.Node.File.Types where
......@@ -10,27 +19,34 @@ import Gargantext.Prelude
import Network.HTTP.Media qualified as M
import Servant
data RESPONSE deriving Typeable
instance Accept RESPONSE where
contentType _ = "text" M.// "*"
instance MimeRender RESPONSE BSResponse where
mimeRender _ (BSResponse val) = BSL.fromStrict $ val
instance MimeUnrender RESPONSE BSResponse where
mimeUnrender _ lbs = Right $ BSResponse (BSL.toStrict lbs)
newtype Contents = Contents BS.ByteString
instance GargDB.ReadFile Contents where
readFile' fp = do
c <- BS.readFile fp
pure $ Contents c
instance ToSchema Contents where
declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
instance MimeUnrender OctetStream Contents where
mimeUnrender _ lbs = Right $ Contents (BSL.toStrict lbs)
newtype BSResponse = BSResponse BS.ByteString
deriving (Generic)
instance ToSchema BSResponse where
declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
-- | Temporary file, held in database, return it's OID
newtype DBTempFile = DBTempFile Int
deriving (Generic, ToJSON)
instance ToSchema DBTempFile where
declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
......@@ -14,16 +14,17 @@ Portability : POSIX
module Gargantext.API.Node.FrameCalcUpload where
import Control.Exception.Safe (MonadMask)
import Data.ByteString.Lazy qualified as BSL
import Data.ByteString.UTF8 qualified as BSU8
import Data.Text qualified as T
import Database.PostgreSQL.Simple.LargeObjects qualified as PSQL
import Gargantext.API.Admin.Auth.Types ( auth_node_id, AuthenticatedUser )
import Gargantext.API.Admin.EnvTypes (Env)
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Node.Corpus.New (addToCorpusWithForm)
import Gargantext.API.Node.Corpus.New (addToCorpusWithTempFile)
import Gargantext.API.Node.Corpus.New.Types (FileFormat(..), FileType(..))
import Gargantext.API.Node.FrameCalcUpload.Types
import Gargantext.API.Node.Types (NewWithForm(..))
import Gargantext.API.Node.Types (NewWithTempFile(..))
import Gargantext.API.Prelude ( GargM )
import Gargantext.API.Routes.Named.FrameCalc qualified as Named
import Gargantext.API.Worker (serveWorkerAPI)
......@@ -35,6 +36,7 @@ import Gargantext.Database.Action.Flow.Types ( FlowCmdM )
import Gargantext.Database.Admin.Types.Hyperdata.Frame ( HyperdataFrame(..) )
import Gargantext.Database.Admin.Types.Node ( NodeId, NodeType(NodeCorpus) )
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType, getNodeWith)
import Gargantext.Database.Prelude (createLargeObject)
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude
import Gargantext.Utils.Jobs.Monad (MonadJobStatus(..), markFailureNoErr)
......@@ -54,7 +56,8 @@ api authenticatedUser nId =
frameCalcUploadAsync :: ( HasConfig env
frameCalcUploadAsync :: ( MonadMask m
, HasConfig env
, FlowCmdM env err m
, MonadJobStatus m
, HasNodeArchiveStoryImmediateSaver env
......@@ -65,7 +68,7 @@ frameCalcUploadAsync :: ( HasConfig env
-> FrameCalcUpload
-> JobHandle m
-> m ()
frameCalcUploadAsync authenticatedUser nId (FrameCalcUpload _wf_lang _wf_selection) jobHandle = do
frameCalcUploadAsync authenticatedUser nId (FrameCalcUpload _wtf_lang _wtf_selection) jobHandle = do
markStarted 5 jobHandle
-- printDebug "[frameCalcUploadAsync] uId" uId
......@@ -82,7 +85,8 @@ frameCalcUploadAsync authenticatedUser nId (FrameCalcUpload _wf_lang _wf_selecti
manager <- newManager tlsManagerSettings
req <- parseRequest $ T.unpack csvUrl
httpLbs req manager
let body = T.pack $ BSU8.toString $ BSL.toStrict $ responseBody res
let body = BSL.toStrict $ responseBody res
PSQL.Oid oId <- createLargeObject body
-- printDebug "body" body
mCId <- getClosestParentIdByType nId NodeCorpus
......@@ -92,14 +96,14 @@ frameCalcUploadAsync authenticatedUser nId (FrameCalcUpload _wf_lang _wf_selecti
Nothing -> markFailureNoErr 1 jobHandle
Just cId ->
-- FIXME(adn) Audit this conversion.
addToCorpusWithForm (RootId userNodeId)
cId
(NewWithForm { _wf_filetype = TSV
, _wf_fileformat = Plain
, _wf_data = body
, _wf_lang
, _wf_name = "calc-upload.csv"
, _wf_selection }) jobHandle
addToCorpusWithTempFile (RootId userNodeId)
cId
(NewWithTempFile { _wtf_filetype = TSV
, _wtf_fileformat = Plain
, _wtf_file_oid = fromIntegral oId
, _wtf_lang
, _wtf_name = "calc-upload.csv"
, _wtf_selection }) jobHandle
markComplete jobHandle
where
......
......@@ -31,6 +31,9 @@ import Gargantext.Prelude
import Web.FormUrlEncoded (FromForm, ToForm)
-------------------------------------------------------
-- | A file is uploaded with this type. Then, for internal job
-- creation for the worker, 'NewWithTempFile' is used with a large
-- object oid
data NewWithForm = NewWithForm
{ _wf_filetype :: !FileType
, _wf_fileformat :: !FileFormat
......@@ -50,6 +53,26 @@ instance ToJSON NewWithForm where
instance ToSchema NewWithForm where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_")
-------------------------------------------------------
data NewWithTempFile = NewWithTempFile
{ _wtf_filetype :: !FileType
, _wtf_fileformat :: !FileFormat
, _wtf_file_oid :: !Int
, _wtf_lang :: !(Maybe Lang)
, _wtf_name :: !Text
, _wtf_selection :: !FlowSocialListWith
} deriving (Eq, Show, Generic)
makeLenses ''NewWithTempFile
instance FromForm NewWithTempFile
instance ToForm NewWithTempFile
instance FromJSON NewWithTempFile where
parseJSON = genericParseJSON $ jsonOptions "_wtf_"
instance ToJSON NewWithTempFile where
toJSON = genericToJSON $ jsonOptions "_wtf_"
instance ToSchema NewWithTempFile where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wtf_")
-------------------------------------------------------
data NewWithFile = NewWithFile
......
......@@ -11,21 +11,30 @@ Portability : POSIX
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UndecidableInstances #-}
module Gargantext.API.Routes
where
module Gargantext.API.Routes where
import Data.ByteString.Base64 qualified as BSB64
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Database.PostgreSQL.Simple.LargeObjects qualified as PSQL
import Gargantext.API.Admin.Auth.Types (AuthenticatedUser, auth_user_id)
import Gargantext.API.Admin.EnvTypes (Env)
import Gargantext.API.Errors.Types (BackendInternalError)
import Gargantext.API.Node.Corpus.New.Types ( FileFormat(..) )
import Gargantext.API.Node.Types (NewWithForm(..), NewWithTempFile(..))
import Gargantext.API.Prelude (GargServer, GargM)
import Gargantext.API.Routes.Named.Annuaire qualified as Named
import Gargantext.API.Routes.Named.Corpus qualified as Named
import Gargantext.API.Worker (serveWorkerAPI)
import Gargantext.API.Worker (serveWorkerAPI, serveWorkerAPIm)
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Worker.Jobs.Types qualified as Jobs
import Gargantext.Database.Prelude (createLargeObject)
import Gargantext.Prelude
import Gargantext.System.Logging ( logLocM, LogLevel(..) )
import Servant (Get, JSON)
import Servant.Server.Generic (AsServerT)
......@@ -63,19 +72,34 @@ addCorpusWithQuery user =
, Jobs._acq_cid = cId }
}
addCorpusWithForm :: User -> Named.AddWithForm (AsServerT (GargM Env BackendInternalError))
addCorpusWithForm user =
Named.AddWithForm {
addWithFormEp = \cId -> serveWorkerAPI $ \p ->
-- /NOTE(adinapoli)/ Track the initial steps outside 'addToCorpusWithForm', because it's
-- called in a few places, and the job status might be different between invocations.
-- markStarted 3 jHandle
-- New.addToCorpusWithForm user cid i jHandle
Jobs.AddCorpusFormAsync { Jobs._acf_args = p
, Jobs._acf_user = user
, Jobs._acf_cid = cId }
-- | Uses temporary file stored in postgres to add that file to a corpus
addWithTempFileApi :: AuthenticatedUser
-> Named.AddWithTempFile (AsServerT (GargM Env BackendInternalError))
addWithTempFileApi authenticatedUser =
Named.AddWithTempFile {
addWithTempFileEp = \cId ->
serveWorkerAPIm $ \(NewWithForm { .. }) -> do
let bs = case _wf_fileformat of
Plain -> cs _wf_data
ZIP -> case BSB64.decode $ TE.encodeUtf8 _wf_data of
Left err -> panicTrace $ T.pack "[addWithTempFileApi] error decoding base64: " <> T.pack err
Right decoded -> decoded
(PSQL.Oid oId) <- createLargeObject bs
$(logLocM) DEBUG $ "[addWithTempFileApi] oId': " <> show oId
let args = NewWithTempFile { _wtf_filetype = _wf_filetype
, _wtf_fileformat = _wf_fileformat
, _wtf_file_oid = fromIntegral oId
, _wtf_lang = _wf_lang
, _wtf_name = _wf_name
, _wtf_selection = _wf_selection }
pure $ Jobs.AddCorpusTempFileAsync { _actf_args = args
, _actf_cid = cId
, _actf_user = userId }
}
where
userId = UserDBId $ authenticatedUser ^. auth_user_id
addAnnuaireWithForm :: Named.AddAnnuaireWithForm (AsServerT (GargM Env BackendInternalError))
addAnnuaireWithForm =
Named.AddAnnuaireWithForm {
......
......@@ -14,7 +14,7 @@ Portability : POSIX
module Gargantext.API.Routes.Named.Corpus (
-- * Routes types
CorpusExportAPI(..)
, AddWithForm(..)
, AddWithTempFile(..)
, AddWithQuery(..)
, MakeSubcorpusAPI(..)
-- * Others
......@@ -25,7 +25,7 @@ import Data.Aeson.TH (deriveJSON)
import Data.Swagger (ToSchema(..), genericDeclareNamedSchema)
import Data.Text (Text)
import GHC.Generics
import Gargantext.API.Node.Corpus.Export.Types (Corpus)
import Gargantext.API.Node.Corpus.Export.Types (Corpus, CorpusSQLite)
import Gargantext.API.Node.Types (NewWithForm, WithQuery)
import Gargantext.API.Worker (WorkerAPI)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
......@@ -35,22 +35,27 @@ import Gargantext.Prelude (Bool)
import Servant
--------------------------------------------------
newtype CorpusExportAPI mode = CorpusExportAPI
data CorpusExportAPI mode = CorpusExportAPI
{ corpusExportEp :: mode :- Summary "Corpus Export"
:> "export"
:> QueryParam "listId" ListId
:> QueryParam "ngramsType" NgramsType
:> Get '[JSON] (Headers '[Servant.Header "Content-Disposition" Text] Corpus)
, corpusSQLiteEp :: mode :- Summary "Corpus SQLite export"
:> "sqlite"
:> QueryParam "listId" ListId
:> Get '[OctetStream] (Headers '[Servant.Header "Content-Disposition" Text] CorpusSQLite)
} deriving Generic
newtype AddWithForm mode = AddWithForm
{ addWithFormEp :: mode :- Summary "Add with FormUrlEncoded to corpus endpoint"
:> "corpus"
:> Capture "corpus_id" CorpusId
:> "add"
:> "form"
:> "async"
:> NamedRoutes (WorkerAPI '[FormUrlEncoded] NewWithForm)
data AddWithTempFile mode = AddWithTempFile
{ addWithTempFileEp :: mode :- Summary "Add with form via temp file"
:> "corpus"
:> Capture "corpus_id" CorpusId
:> "add"
:> "form"
:> "async"
:> NamedRoutes (WorkerAPI '[FormUrlEncoded] NewWithForm)
} deriving Generic
newtype AddWithQuery mode = AddWithQuery
......
{-|
Module : Gargantext.API.Routes.Named.File
Description :
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Routes.Named.File (
......@@ -6,6 +16,7 @@ module Gargantext.API.Routes.Named.File (
, FileAsyncAPI(..)
) where
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.API.Node.File.Types (BSResponse, RESPONSE)
......@@ -13,6 +24,7 @@ import Gargantext.API.Node.Types (NewWithFile)
import Gargantext.API.Worker (WorkerAPI)
import Servant
data FileAPI mode = FileAPI
{ fileDownloadEp :: mode :- Summary "File download"
:> "download"
......@@ -26,4 +38,3 @@ data FileAsyncAPI mode = FileAsyncAPI
:> "add"
:> NamedRoutes (WorkerAPI '[FormUrlEncoded] NewWithFile)
} deriving Generic
{-|
Module : Gargantext.API.Routes.Named.Node
Description :
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Routes.Named.Node (
......@@ -84,7 +94,7 @@ data NodeAPI a mode = NodeAPI
, searchAPI :: mode :- "search" :> NamedRoutes (SearchAPI SearchResult)
, shareAPI :: mode :- "share" :> NamedRoutes ShareNode
, unshareEp :: mode :- "unshare" :> NamedRoutes UnshareNode
, publishAPI :: mode :- "publish" :> (PolicyChecked (NamedRoutes PublishAPI))
, publishAPI :: mode :- "publish" :> PolicyChecked (NamedRoutes PublishAPI)
---- Pairing utilities
, pairWithEp :: mode :- "pairwith" :> NamedRoutes PairWith
, pairsEp :: mode :- "pairs" :> NamedRoutes Pairs
......
......@@ -8,9 +8,8 @@ Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Routes.Named.Private (
-- * Routes types
......@@ -26,13 +25,13 @@ module Gargantext.API.Routes.Named.Private (
import Data.Text (Text)
import Gargantext.API.Admin.Auth.Types
import Gargantext.API.Auth.PolicyCheck
import Gargantext.API.Routes.Named.Contact
import Gargantext.API.Routes.Named.Context
import Gargantext.API.Routes.Named.Corpus
import Gargantext.API.Routes.Named.Count
import Gargantext.API.Routes.Named.Document
import Gargantext.API.Admin.Auth.Types (AuthenticatedUser)
import Gargantext.API.Auth.PolicyCheck (PolicyChecked)
import Gargantext.API.Routes.Named.Contact (ContactAPI)
import Gargantext.API.Routes.Named.Context (ContextAPI)
import Gargantext.API.Routes.Named.Corpus (AddWithTempFile, AddWithQuery, CorpusExportAPI, MakeSubcorpusAPI)
import Gargantext.API.Routes.Named.Count (CountAPI, Query)
import Gargantext.API.Routes.Named.Document (DocumentExportAPI)
import Gargantext.API.Routes.Named.List (GETAPI, JSONAPI, TSVAPI)
import Gargantext.API.Routes.Named.Node
import Gargantext.API.Routes.Named.Remote
......@@ -95,7 +94,7 @@ data GargPrivateAPI' mode = GargPrivateAPI'
:> Capture "tree_id" NodeId
:> NamedRoutes TreeFlatAPI
, membersAPI :: mode :- "members" :> Summary "Team node members" :> NamedRoutes MembersAPI
, addWithFormAPI :: mode :- NamedRoutes AddWithForm
, addWithTempFile :: mode :- NamedRoutes AddWithTempFile
, addWithQueryEp :: mode :- NamedRoutes AddWithQuery
, makeSubcorpusAPI :: mode :- NamedRoutes MakeSubcorpusAPI
, listGetAPI :: mode :- NamedRoutes GETAPI
......
......@@ -18,7 +18,7 @@ import Gargantext.API.Node.Phylo.Export qualified as PhyloExport
import Gargantext.API.Node qualified as Tree
import Gargantext.API.Node.ShareURL ( shareURL )
import Gargantext.API.Prelude (GargM)
import Gargantext.API.Routes (addCorpusWithForm, addCorpusWithQuery)
import Gargantext.API.Routes (addWithTempFileApi, addCorpusWithQuery)
import Gargantext.API.Routes.Named.Private qualified as Named
import Gargantext.API.Server.Named.Ngrams
import Gargantext.API.Server.Named.Viz qualified as Viz
......@@ -59,7 +59,7 @@ serverPrivateGargAPI' authenticatedUser@(AuthenticatedUser userNodeId userId)
, treeAPI = Tree.treeAPI authenticatedUser
, treeFlatAPI = Tree.treeFlatAPI authenticatedUser
, membersAPI = members
, addWithFormAPI = addCorpusWithForm (RootId userNodeId)
, addWithTempFile = addWithTempFileApi authenticatedUser
, addWithQueryEp = addCorpusWithQuery (RootId userNodeId)
, makeSubcorpusAPI = Subcorpus.makeSubcorpus userId
, listGetAPI = List.getAPI
......
......@@ -32,14 +32,7 @@ data WorkerAPI contentType input mode = WorkerAPI
serveWorkerAPI :: IsGargServer env err m
=> (input -> Job)
-> WorkerAPI contentType input (AsServerT m)
serveWorkerAPI f = WorkerAPI { workerAPIPost }
where
workerAPIPost i = do
let job = f i
logM DEBUG $ "[serveWorkerAPI] sending job " <> show job
mId <- sendJob job
pure $ JobInfo { _ji_message_id = mId
, _ji_mNode_id = getWorkerMNodeId job }
serveWorkerAPI f = serveWorkerAPIm (pure . f)
serveWorkerAPIEJob :: (MonadError err m, IsGargServer env err m)
=> (input -> Either err Job)
......@@ -54,4 +47,15 @@ serveWorkerAPIEJob f = WorkerAPI { workerAPIPost }
mId <- sendJob job
pure $ JobInfo { _ji_message_id = mId
, _ji_mNode_id = getWorkerMNodeId job }
serveWorkerAPIm :: IsGargServer env err m
=> (input -> m Job)
-> WorkerAPI contentType input (AsServerT m)
serveWorkerAPIm f = WorkerAPI { workerAPIPost }
where
workerAPIPost i = do
job <- f i
logM DEBUG $ "[serveWorkerAPIm] sending job " <> show job
mId <- sendJob job
pure $ JobInfo { _ji_message_id = mId
, _ji_mNode_id = getWorkerMNodeId job }
......@@ -20,7 +20,7 @@ import Data.Bimap (Bimap)
import Data.Bimap qualified as Bimap
import Data.LanguageCodes qualified as ISO639
import Data.Morpheus.Types (GQLType)
import Data.Swagger (ToSchema(..), defaultSchemaOptions, genericDeclareNamedSchemaUnrestricted)
import Data.Swagger (ToParamSchema, ToSchema(..), defaultSchemaOptions, genericDeclareNamedSchemaUnrestricted)
import Data.Text (pack)
import Gargantext.Prelude hiding (All)
import Prelude (userError)
......@@ -70,6 +70,7 @@ defaultLanguage = EN
instance ToJSON Lang
instance FromJSON Lang
instance ToParamSchema Lang
instance ToSchema Lang where
declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
instance FromHttpApiData Lang
......
......@@ -9,8 +9,8 @@ Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
{-# LANGUAGE ViewPatterns #-}
{-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
module Gargantext.Core.Text.Corpus.API.Arxiv
( get
......@@ -18,7 +18,7 @@ module Gargantext.Core.Text.Corpus.API.Arxiv
, convertQuery
) where
import Arxiv qualified as Arxiv
import Arxiv qualified
import Conduit
import Data.Text qualified as Text
import Gargantext.Core (Lang(..))
......
......@@ -312,7 +312,8 @@ getMultipleLinefile bl del headers res x = do
then checkNextLine bl del headers res x
else
if (length tmp > length headers) || (V.length bl == (x + 1))
then Left (pack $ "Cannot parse the file at line " <> show x <> ". Maybe because of a delimiter")
then
Left (pack $ "Cannot parse the file at line " <> show x <> ". Maybe because of a delimiter")
else do
case BL.append res <$> ((V.!?) bl (x+1)) of
Nothing -> Left "getMultipleLinefile"
......@@ -591,7 +592,7 @@ parseTsv' bs = (V.toList . V.map tsv2doc . snd) <$> readTsvLazyBS Comma bs
parseTsv' :: BL.ByteString -> Either Text [HyperdataDocument]
parseTsv' bs = do
let
result = case (testCorrectFile bs) of
result = case testCorrectFile bs of
Left _err -> Left _err
Right del -> readTsvLazyBS del bs
V.toList . V.map tsv2doc . snd <$> result
......@@ -601,7 +602,7 @@ parseTsvC :: BL.ByteString
parseTsvC bs =
(\(_h, rs) -> (fromIntegral $ V.length rs, yieldMany rs .| mapC tsv2doc)) <$> eResult
where
eResult = case (testCorrectFile bs) of
eResult = case testCorrectFile bs of
Left _err -> Left _err
Right del -> readTsvLazyBS del bs
......
......@@ -22,7 +22,7 @@ data Chart = ChartHisto | ChartScatter | ChartPie
data Histo = Histo { histo_dates :: !(Vector Text)
, histo_count :: !(Vector Int)
}
deriving (Show, Generic)
deriving (Show, Generic, Eq)
instance ToSchema Histo where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "histo_")
......
......@@ -21,19 +21,24 @@ module Gargantext.Core.Worker where
import Async.Worker.Broker.Types (toA, getMessage, messageId)
import Async.Worker qualified as W
import Async.Worker.Types qualified as W
import Control.Exception.Safe qualified as CES
import Control.Lens (to)
import Data.Aeson qualified as Aeson
import Data.ByteString.Lazy qualified as BSL
import Data.Text qualified as T
import Database.PostgreSQL.Simple.LargeObjects qualified as PSQL
import Gargantext.API.Admin.Auth (forgotUserPassword)
import Gargantext.API.Admin.Auth.Types (ForgotPasswordAsyncParams(..))
import Gargantext.API.Ngrams.List (postAsyncJSON)
import Gargantext.API.Node.Contact (addContact)
import Gargantext.API.Node.Corpus.Annuaire qualified as Annuaire
import Gargantext.API.Node.Corpus.New (addToCorpusWithForm, addToCorpusWithQuery)
import Gargantext.API.Node.Corpus.New (addToCorpusWithTempFile, addToCorpusWithQuery)
import Gargantext.API.Node.DocumentsFromWriteNodes (documentsFromWriteNodes)
import Gargantext.API.Node.DocumentUpload (documentUploadAsync, remoteImportDocuments)
import Gargantext.API.Node.File (addWithFile)
import Gargantext.API.Node.FrameCalcUpload (frameCalcUploadAsync)
import Gargantext.API.Node.New (postNode')
import Gargantext.API.Node.Types (_wtf_file_oid)
import Gargantext.API.Node.Update.Types (UpdateNodeParams(..), Granularity (..))
import Gargantext.API.Node.Update (updateNode)
import Gargantext.API.Server.Named.Ngrams (tableNgramsPostChartsAsync)
......@@ -48,6 +53,7 @@ import Gargantext.Core.Worker.Env
import Gargantext.Core.Worker.Jobs.Types (Job(..), getWorkerMNodeId, ImportRemoteDocumentsPayload(..), ImportRemoteTermsPayload(..))
import Gargantext.Core.Worker.PGMQTypes (BrokerMessage, HasWorkerBroker, WState)
import Gargantext.Core.Worker.Types (JobInfo(..))
import Gargantext.Database.Prelude (readLargeObject, removeLargeObject)
import Gargantext.Database.Query.Table.User (getUsersWithEmail)
import Gargantext.Prelude hiding (to)
import Gargantext.System.Logging ( logLocM, LogLevel(..), logMsg, withLogger )
......@@ -223,12 +229,14 @@ performAction env _state bm = do
AddContact { .. } -> runWorkerMonad env $ do
$(logLocM) DEBUG $ "[performAction] add contact"
addContact _ac_user _ac_node_id _ac_args jh
-- | Send a file with documents and index them in corpus
AddCorpusFormAsync { .. } -> runWorkerMonad env $ do
$(logLocM) DEBUG $ "[performAction] add corpus form"
addToCorpusWithForm _acf_user _acf_cid _acf_args jh
-- | Uses temporary file to add documents into corpus
AddCorpusTempFileAsync { .. } -> runWorkerMonad env $ do
-- TODO CES.filnally
$(logLocM) DEBUG "[performAction] add to corpus with temporary file"
CES.finally (addToCorpusWithTempFile _actf_user _actf_cid _actf_args jh)
(removeLargeObject $ _wtf_file_oid _actf_args)
-- | Perform external API search query and index documents in corpus
AddCorpusWithQuery { .. } -> runWorkerMonad env $ do
$(logLocM) DEBUG "[performAction] add corpus with query"
......@@ -266,7 +274,12 @@ performAction env _state bm = do
-- | Process uploaded JSON file
JSONPost { .. } -> runWorkerMonad env $ do
$(logLocM) DEBUG $ "[performAction] json post"
void $ postAsyncJSON _jp_list_id _jp_ngrams_list jh
CES.finally (do
_jp_ngrams_list' <- readLargeObject (PSQL.Oid $ fromIntegral _jp_ngrams_oid)
case Aeson.eitherDecode (BSL.fromStrict _jp_ngrams_list') of
Left err -> CES.throwString err
Right _jp_ngrams_list -> void $ postAsyncJSON _jp_list_id _jp_ngrams_list jh)
(removeLargeObject _jp_ngrams_oid)
-- | Task for updating metrics charts
NgramsPostCharts { .. } -> runWorkerMonad env $ do
......
......@@ -19,6 +19,7 @@ module Gargantext.Core.Worker.Env where
import Control.Concurrent.STM.TVar (TVar, modifyTVar, newTVarIO, readTVarIO)
import Control.Exception.Safe qualified as CES
import Control.Lens (prism', to, view)
import Control.Lens.TH
import Control.Monad.Trans.Control (MonadBaseControl)
......@@ -179,7 +180,10 @@ newtype WorkerMonad a =
, MonadBase IO
, MonadBaseControl IO
, MonadError IOException
, MonadFail )
, MonadFail
, CES.MonadThrow
, CES.MonadCatch
, CES.MonadMask )
instance HasLogger WorkerMonad where
data instance Logger WorkerMonad =
......
......@@ -50,7 +50,6 @@ sendJobWithCfg gcConfig job = do
-- | We want to fine-tune job metadata parameters, for each job type
updateJobData :: Job -> SendJob -> SendJob
updateJobData (AddCorpusFormAsync {}) sj = sj { W.timeout = 3000 }
updateJobData (AddCorpusWithQuery {}) sj = sj { W.timeout = 3000 }
updateJobData (AddToAnnuaireWithForm {}) sj = sj { W.timeout = 3000 }
updateJobData (AddWithFile {}) sj = sj { W.timeout = 3000 }
......
......@@ -28,7 +28,7 @@ import Gargantext.API.Node.DocumentsFromWriteNodes.Types qualified as DFWN
import Gargantext.API.Node.DocumentUpload.Types (DocumentUpload)
import Gargantext.API.Node.FrameCalcUpload.Types (FrameCalcUpload)
import Gargantext.API.Node.New.Types ( PostNode(..) )
import Gargantext.API.Node.Types (NewWithFile, NewWithForm, WithQuery(..))
import Gargantext.API.Node.Types (NewWithFile, NewWithTempFile, WithQuery(..))
import Gargantext.API.Node.Update.Types (UpdateNodeParams)
import Gargantext.Core.Types.Individu (User)
import Gargantext.Database.Admin.Types.Node (AnnuaireId, CorpusId, ListId, NodeId(UnsafeMkNodeId), ParentId)
......@@ -101,9 +101,9 @@ data Job =
| AddContact { _ac_args :: AddContactParams
, _ac_node_id :: NodeId
, _ac_user :: User }
| AddCorpusFormAsync { _acf_args :: NewWithForm
, _acf_user :: User
, _acf_cid :: CorpusId }
| AddCorpusTempFileAsync { _actf_args :: NewWithTempFile
, _actf_user :: User
, _actf_cid :: CorpusId }
| AddCorpusWithQuery { _acq_args :: WithQuery
, _acq_user :: User
, _acq_cid :: CorpusId }
......@@ -120,7 +120,8 @@ data Job =
, _fca_authenticatedUser :: AuthenticatedUser
, _fca_node_id :: NodeId }
| JSONPost { _jp_list_id :: ListId
, _jp_ngrams_list :: NgramsList }
, _jp_ngrams_oid :: Int }
-- , _jp_ngrams_list :: NgramsList }
| NgramsPostCharts { _npc_node_id :: NodeId
, _npc_args :: UpdateTableNgramsCharts }
| PostNodeAsync { _pna_node_id :: NodeId
......@@ -144,11 +145,11 @@ instance FromJSON Job where
_ac_node_id <- o .: "node_id"
_ac_user <- o .: "user"
return $ AddContact { .. }
"AddCorpusFormAsync" -> do
_acf_args <- o .: "args"
_acf_user <- o .: "user"
_acf_cid <- o .: "cid"
return $ AddCorpusFormAsync { .. }
"AddCorpusTempFileAsync" -> do
_actf_args <- o .: "args"
_actf_user <- o .: "user"
_actf_cid <- o .: "cid"
return $ AddCorpusTempFileAsync { .. }
"AddCorpusWithQuery" -> do
_acq_args <- o .: "args"
_acq_user <- o .: "user"
......@@ -178,7 +179,8 @@ instance FromJSON Job where
return $ FrameCalcUpload { .. }
"JSONPost" -> do
_jp_list_id <- o .: "list_id"
_jp_ngrams_list <- o .: "ngrams_list"
-- _jp_ngrams_list <- o .: "ngrams_list"
_jp_ngrams_oid <- o .: "ngrams_oid"
return $ JSONPost { .. }
"NgramsPostCharts" -> do
_npc_node_id <- o .: "node_id"
......@@ -212,11 +214,11 @@ instance ToJSON Job where
, "args" .= _ac_args
, "user" .= _ac_user
, "node_id" .= _ac_node_id ]
toJSON (AddCorpusFormAsync { .. }) =
object [ "type" .= ("AddCorpusFormAsync" :: Text)
, "args" .= _acf_args
, "user" .= _acf_user
, "cid" .= _acf_cid ]
toJSON (AddCorpusTempFileAsync { .. }) =
object [ "type" .= ("AddCorpusTempFileAsync" :: Text)
, "args" .= _actf_args
, "user" .= _actf_user
, "cid" .= _actf_cid ]
toJSON (AddCorpusWithQuery { .. }) =
object [ "type" .= ("AddCorpusWithQuery" :: Text)
, "args" .= _acq_args
......@@ -247,7 +249,8 @@ instance ToJSON Job where
toJSON (JSONPost { .. }) =
object [ "type" .= ("JSONPost" :: Text)
, "list_id" .= _jp_list_id
, "ngrams_list" .= _jp_ngrams_list ]
, "ngrams_oid" .= _jp_ngrams_oid ]
-- , "ngrams_list" .= _jp_ngrams_list ]
toJSON (NgramsPostCharts { .. }) =
object [ "type" .= ("NgramsPostCharts" :: Text)
, "node_id" .= _npc_node_id
......@@ -290,7 +293,7 @@ instance ToJSON Job where
getWorkerMNodeId :: Job -> Maybe NodeId
getWorkerMNodeId Ping = Nothing
getWorkerMNodeId (AddContact { _ac_node_id }) = Just _ac_node_id
getWorkerMNodeId (AddCorpusFormAsync { _acf_args, _acf_cid }) = Just _acf_cid
getWorkerMNodeId (AddCorpusTempFileAsync { _actf_cid }) = Just _actf_cid
getWorkerMNodeId (AddCorpusWithQuery { _acq_args = WithQuery { _wq_node_id }}) = Just $ UnsafeMkNodeId _wq_node_id
getWorkerMNodeId (AddToAnnuaireWithForm { _aawf_annuaire_id }) = Just _aawf_annuaire_id
getWorkerMNodeId (AddWithFile { _awf_node_id }) = Just _awf_node_id
......
......@@ -64,7 +64,7 @@ import Data.Set qualified as Set
import Data.Text qualified as T
import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types (NgramsTerm)
import Gargantext.Core (Lang(..), NLPServerConfig, withDefaultLanguage)
import Gargantext.Core (Lang(..), withDefaultLanguage)
import Gargantext.Core.Notifications.CentralExchange.Types (HasCentralExchangeNotification(ce_notify), CEMessage(..))
import Gargantext.Core.Config (GargConfig(..), hasConfig)
import Gargantext.Core.Config.Types (APIsConfig(..))
......@@ -274,10 +274,9 @@ flow :: forall env err m a c.
flow c mkCorpusUser la mfslw (count, docsC) jobHandle = do
(_userId, userCorpusId, listId) <- createNodes mkCorpusUser c
-- TODO if public insertMasterDocs else insertUserDocs
nlpServer <- view $ nlpServerGet (_tt_lang la)
runConduit $ zipSources (yieldMany ([1..] :: [Int])) docsC
.| CList.chunksOf 5
.| mapM_C (addDocumentsWithProgress nlpServer userCorpusId)
.| mapM_C (addDocumentsWithProgress userCorpusId)
.| sinkNull
let u = userFromMkCorpusUser mkCorpusUser
......@@ -286,10 +285,10 @@ flow c mkCorpusUser la mfslw (count, docsC) jobHandle = do
flowCorpusUser (la ^. tt_lang) u userCorpusId listId c mfslw
where
addDocumentsWithProgress :: NLPServerConfig -> CorpusId -> [(Int, a)] -> m ()
addDocumentsWithProgress nlpServer userCorpusId docsChunk = do
addDocumentsWithProgress :: CorpusId -> [(Int, a)] -> m ()
addDocumentsWithProgress userCorpusId docsChunk = do
$(logLocM) DEBUG $ T.pack $ "calling insertDoc, ([idx], mLength) = " <> show (fst <$> docsChunk, count)
docs <- addDocumentsToHyperCorpus nlpServer c la userCorpusId (map snd docsChunk)
docs <- addDocumentsToHyperCorpus c la userCorpusId (map snd docsChunk)
markProgress (length docs) jobHandle
......@@ -297,17 +296,17 @@ flow c mkCorpusUser la mfslw (count, docsC) jobHandle = do
-- the given documents to the corpus. Returns the Ids of the inserted documents.
addDocumentsToHyperCorpus :: ( IsDBCmd env err m
, HasNodeError err
, HasNLPServer env
, FlowCorpus document
, MkCorpus corpus
)
=> NLPServerConfig
-> Maybe corpus
=> Maybe corpus
-> TermType Lang
-> CorpusId
-> [document]
-> m [DocId]
addDocumentsToHyperCorpus ncs mb_hyper la corpusId docs = do
ids <- insertMasterDocs ncs mb_hyper la docs
addDocumentsToHyperCorpus mb_hyper la corpusId docs = do
ids <- insertMasterDocs mb_hyper la docs
void $ Doc.add corpusId (map nodeId2ContextId ids)
pure ids
......@@ -401,15 +400,16 @@ buildSocialList l user userCorpusId listId ctype mfslw = do
insertMasterDocs :: ( IsDBCmd env err m
, HasNodeError err
, HasNLPServer env
, FlowCorpus a
, MkCorpus c
)
=> NLPServerConfig
-> Maybe c
=> Maybe c
-> TermType Lang
-> [a]
-> m [DocId]
insertMasterDocs ncs c lang hs = do
insertMasterDocs c lang hs = do
nlpServer <- view $ nlpServerGet (_tt_lang lang)
(masterUserId, _, masterCorpusId) <- getOrMkRootWithCorpus MkCorpusUserMaster c
(ids', documentsWithId) <- insertDocs masterUserId masterCorpusId (map (toNode masterUserId Nothing) hs )
_ <- Doc.add masterCorpusId ids'
......@@ -418,11 +418,10 @@ insertMasterDocs ncs c lang hs = do
-- add documents to the corpus (create node_node link)
-- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs' :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
<- mapNodeIdNgrams
<$> documentIdWithNgrams
(extractNgramsT ncs $ withLang lang documentsWithId)
(extractNgramsT nlpServer $ withLang lang documentsWithId)
(map (B.first contextId2NodeId) documentsWithId)
lId <- getOrMkList masterCorpusId masterUserId
......@@ -489,4 +488,3 @@ reIndexWith cId lId nt lts = do
-- Saving the indexation in database
mapM_ (saveDocNgramsWith lId . ngramsByDoc corpusLang nt ts) docs
......@@ -28,7 +28,7 @@ data HyperdataCorpus =
-- 'defaultLanguage' if we don't know which language it is.
, _hc_lang :: Maybe Lang
}
deriving (Generic, Show)
deriving (Generic, Eq, Show)
defaultHyperdataCorpus :: HyperdataCorpus
defaultHyperdataCorpus =
......
......@@ -34,7 +34,7 @@ data CorpusField = MarkdownField { _cf_text :: !Text }
, _cf_authors :: !Text
-- , _cf_resources :: ![Resource]
}
deriving (Show, Generic)
deriving (Show, Generic, Eq)
defaultCorpusField :: CorpusField
defaultCorpusField = MarkdownField "# Title"
......@@ -56,7 +56,7 @@ data HyperdataField a =
HyperdataField { _hf_type :: !CodeType
, _hf_name :: !Text
, _hf_data :: !a
} deriving (Generic, Show)
} deriving (Generic, Show, Eq)
defaultHyperdataField :: HyperdataField CorpusField
defaultHyperdataField = HyperdataField Markdown "name" defaultCorpusField
......
......@@ -34,7 +34,7 @@ data HyperdataList =
, _hl_pie :: !(HashMap TabType (ChartMetrics Histo))
, _hl_scatter :: !(HashMap TabType Metrics)
, _hl_tree :: !(HashMap TabType (ChartMetrics (Vector NgramsTree)))
} deriving (Show, Generic)
} deriving (Show, Generic, Eq)
-- HyperdataList { _hl_chart :: !(Maybe (ChartMetrics Histo))
-- , _hl_list :: !(Maybe Text)
-- , _hl_pie :: !(Maybe (ChartMetrics Histo))
......
......@@ -25,7 +25,7 @@ import Test.QuickCheck.Arbitrary
newtype Metrics = Metrics
{ metrics_data :: Vector Metric}
deriving (Generic, Show)
deriving (Generic, Show, Eq)
instance ToSchema Metrics where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "metrics_")
......@@ -38,7 +38,7 @@ data Metric = Metric
, m_x :: !Double
, m_y :: !Double
, m_cat :: !ListType
} deriving (Generic, Show)
} deriving (Generic, Show, Eq)
instance ToSchema Metric where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "m_")
......@@ -54,7 +54,7 @@ deriveJSON (unPrefix "metrics_") ''Metrics
newtype ChartMetrics a = ChartMetrics { chartMetrics_data :: a }
deriving (Generic, Show)
deriving (Generic, Show, Eq)
instance (Typeable a, ToSchema a) => ToSchema (ChartMetrics a) where
declareNamedSchema = wellNamedSchema "chartMetrics_"
......
......@@ -9,6 +9,7 @@ Portability : POSIX
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ScopedTypeVariables #-}
......@@ -51,15 +52,20 @@ module Gargantext.Database.Prelude
, fromField'
, mkCmd
, restrictMaybe
, createLargeObject
, readLargeObject
, readLargeObjectViaTempFile
, removeLargeObject
)
where
import Control.Exception.Safe (throw)
import Control.Exception.Safe qualified as CES
import Control.Lens (Getter, view)
import Control.Monad.Random ( MonadRandom )
import Control.Monad.Trans.Control (MonadBaseControl)
import Data.Aeson (Result(..))
import Data.ByteString qualified as DB
import Data.ByteString qualified as BS
import Data.ByteString.Lazy qualified as BSL
import Data.List qualified as DL
import Data.Pool (Pool, withResource)
import Data.Profunctor.Product.Default (Default)
......@@ -67,6 +73,7 @@ import Database.PostgreSQL.Simple (Connection)
import Database.PostgreSQL.Simple qualified as PGS
import Database.PostgreSQL.Simple.FromField ( Conversion, ResultError(ConversionFailed), fromField, returnError)
import Database.PostgreSQL.Simple.Internal (Field)
import Database.PostgreSQL.Simple.LargeObjects qualified as PSQL
import Database.PostgreSQL.Simple.Types (Query(..))
import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET
import Gargantext.Core.Config (HasConfig(..))
......@@ -78,6 +85,8 @@ import Opaleye.Aggregate (countRows)
import Opaleye.Internal.Constant qualified
import Opaleye.Internal.Operators qualified
import Shelly qualified as SH
import System.Directory (removeFile)
import System.IO.Temp (emptySystemTempFile)
-- $typesAndConstraints
......@@ -199,7 +208,7 @@ runCountOpaQuery q = do
-- countRows is guaranteed to return a list with exactly one row so DL.head is safe here
pure $ fromInt64ToInt $ DL.head counts
formatPGSQuery :: PGS.ToRow a => PGS.Query -> a -> DBCmd err DB.ByteString
formatPGSQuery :: PGS.ToRow a => PGS.Query -> a -> DBCmd err BS.ByteString
formatPGSQuery q a = mkCmd $ \conn -> PGS.formatQuery conn q a
runPGSQuery :: ( PGS.FromRow r, PGS.ToRow q )
......@@ -209,7 +218,7 @@ runPGSQuery q a = mkCmd $ \conn -> catch (PGS.query conn q a) (printError conn)
printError c (SomeException e) = do
q' <- PGS.formatQuery c q a
hPutStrLn stderr q'
throw (SomeException e)
CES.throw (SomeException e)
-- | TODO catch error
runPGSQuery_ :: ( PGS.FromRow r )
......@@ -218,13 +227,13 @@ runPGSQuery_ q = mkCmd $ \conn -> catch (PGS.query_ conn q) printError
where
printError (SomeException e) = do
hPutStrLn stderr (fromQuery q)
throw (SomeException e)
CES.throw (SomeException e)
execPGSQuery :: PGS.ToRow a => PGS.Query -> a -> DBCmd err Int64
execPGSQuery q a = mkCmd $ \conn -> PGS.execute conn q a
fromField' :: (Typeable b, FromJSON b) => Field -> Maybe DB.ByteString -> Conversion b
fromField' :: (Typeable b, FromJSON b) => Field -> Maybe BS.ByteString -> Conversion b
fromField' field mb = do
v <- fromField field mb
valueToHyperdata v
......@@ -263,3 +272,60 @@ createDBIfNotExists connStr dbName = do
(result,) <$> SH.lastExitCode
return ()
------------------------------
-- PostgreSQL Large Object functionality
-- https://www.postgresql.org/docs/17/largeobjects.html
-- NOTE: During development of this feature, I had problems (in tests)
-- with a hanging transaction. After debugging, it turned out this
-- was, for some reason, conflicting with our `logLocM` (though I'm no
-- sure why). Please be careful when adding debug info to large
-- objects and if you do, make sure the tests run.
createLargeObject :: BS.ByteString -> DBCmd err PSQL.Oid
createLargeObject bs = mkCmd $ \c -> PGS.withTransaction c $ do
oId <- PSQL.loCreat c
loFd <- PSQL.loOpen c oId PSQL.WriteMode
_ <- PSQL.loWrite c loFd bs
PSQL.loClose c loFd
pure oId
-- | Read a large object directly, given an oid. We read it in a
-- single transaction, looping by given chunk size
readLargeObject :: PSQL.Oid -> DBCmd err BS.ByteString
readLargeObject oId = mkCmd $ \c -> PGS.withTransaction c $ do
loFd <- PSQL.loOpen c oId PSQL.ReadMode
let chunkSize = 1024
let readChunks tell = do
c' <- PSQL.loRead c loFd chunkSize
tell' <- PSQL.loTell c loFd
if tell == tell' then
pure ([c'], tell)
else do
(cs', tell'') <- readChunks tell'
pure (c':cs', tell'')
(chunks, _size) <- readChunks 0
let s = force BSL.toStrict $ BSL.fromChunks chunks
PSQL.loClose c loFd
pure s
-- | Read large object by exporting it to a temporary file, then
-- reading that file. The difference from 'readLargeObject' is that we
-- have only 1 call inside a transaction
readLargeObjectViaTempFile :: (CES.MonadMask m, IsDBCmd env err m)
=> PSQL.Oid -> m BS.ByteString
readLargeObjectViaTempFile oId = do
CES.bracket (liftBase $ emptySystemTempFile "large-object")
(liftBase . removeFile)
(\fp -> do
mkCmd $ \c -> withTransaction c $ \_ -> PSQL.loExport c oId fp
!contents <- liftBase $ BS.readFile fp
pure contents)
where
withTransaction c = CES.bracket (PGS.begin c) (\_ -> PGS.rollback c)
removeLargeObject :: Int -> DBCmd err ()
removeLargeObject oId = mkCmd $ \c -> do
PSQL.loUnlink c $ PSQL.Oid $ fromIntegral oId
......@@ -307,6 +307,7 @@ selectNodesIdWithType nt = proc () -> do
restrict -< tn .== (sqlInt4 $ toDBid nt)
returnA -< _node_id row
-- | Get node, Hyperdata is 'Aeson.Value'
getNode :: HasNodeError err => NodeId -> DBCmd err (Node Value)
getNode nId = do
maybeNode <- headMay <$> runOpaQuery (selectNode (pgNodeId nId))
......
......@@ -158,7 +158,7 @@
git: "https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git"
subdirs:
- .
- commit: 521ca54f1502b13f629eff2223aaf5007e6d52ec
- commit: 894482ef97eadce6b1c13ebced1edfe394b5be5e
git: "https://gitlab.iscpif.fr/gargantext/crawlers/istex.git"
subdirs:
- .
......@@ -273,6 +273,10 @@ flags:
tagged: true
bitvec:
simd: true
"blaze-textual":
developer: false
"integer-simple": false
native: true
boring:
tagged: true
"bytestring-builder":
......@@ -338,6 +342,14 @@ flags:
have_strong_getauxval: false
have_weak_getauxval: false
"pkg-config": true
"direct-sqlite":
dbstat: true
fulltextsearch: true
haveusleep: true
json1: true
mathfunctions: false
systemlib: false
urifilenames: true
distributive:
semigroups: true
tagged: true
......
......@@ -5,6 +5,7 @@ import Prelude
import Test.Hspec
import qualified Test.API.Authentication as Auth
import qualified Test.API.Errors as Errors
import qualified Test.API.Export as Export
import qualified Test.API.GraphQL as GraphQL
import qualified Test.API.Notifications as Notifications
import qualified Test.API.Private as Private
......@@ -17,6 +18,7 @@ tests = describe "Gargantext API" $ do
Private.tests
GraphQL.tests
Errors.tests
Export.tests
UpdateList.tests
Notifications.tests
Worker.tests
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module Test.API.Export (tests) where
import Data.ByteString.Lazy qualified as BSL
import Data.Version (showVersion)
import Database.SQLite.Simple qualified as S
-- import Fmt (build)
import Gargantext.API.Node.Corpus.Export.Types (CorpusSQLite(..), CorpusSQLiteData(..))
import Gargantext.API.Node.Corpus.Export.Utils (withTempSQLiteDir, mkCorpusSQLiteData)
import Gargantext.Core (Lang(EN))
import Gargantext.Core.Text.Terms (TermType(Multi))
import Gargantext.Core.Types (unNodeId)
import Gargantext.Core.Types.Individu
import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus)
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Node (NodeType(NodeFolder, NodeCorpus, NodeFolderPrivate))
import Gargantext.Database.Query.Table.Node (getOrMkList, getNodeWith, insertDefaultNode, insertNode)
import Gargantext.Database.Query.Tree.Root (getRootId)
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude hiding (get)
import Paths_gargantext qualified as PG -- cabal magic build module
import Servant.API.ResponseHeaders (Headers(getResponse))
import Servant.Auth.Client ()
import Servant.Client.Streaming (runClientM)
import Test.API.Prelude (checkEither)
import Test.API.Routes (get_corpus_sqlite_export)
import Test.API.Setup (withTestDBAndPort, dbEnvSetup, SpecContext (..))
import Test.API.UpdateList (createFortranDocsList)
import Test.Database.Operations.DocumentSearch (exampleDocument_01, exampleDocument_02)
import Test.Database.Types (runTestMonad)
import Test.Hspec
import Test.Hspec.Wai.Internal (withApplication)
import Test.Utils (withValidLogin)
tests :: Spec
tests = sequential $ around withTestDBAndPort $ beforeWith dbEnvSetup $ do
describe "Export API" $ do
describe "Check CorpusSQLiteData creation" $ do
it "correctly creates CorpusSQLiteData" $ \ctx -> do
flip runReaderT (_sctx_env ctx) $ runTestMonad $ do
aliceUserId <- getUserId (UserName "alice")
aliceRootId <- getRootId (UserName "alice")
alicePrivateFolderId <- insertNode NodeFolderPrivate (Just "NodeFolderPrivate") Nothing aliceRootId aliceUserId
aliceFolderId <- insertDefaultNode NodeFolder alicePrivateFolderId aliceUserId
corpusId <- insertDefaultNode NodeCorpus aliceFolderId aliceUserId
aliceListId <- getOrMkList corpusId aliceUserId
corpus <- getNodeWith corpusId (Proxy @HyperdataCorpus)
let docs = [ exampleDocument_01, exampleDocument_02 ]
let lang = EN
_ <- addDocumentsToHyperCorpus (Just $ corpus ^. node_hyperdata) (Multi lang) corpusId docs
(CorpusSQLiteData { .. }) <- mkCorpusSQLiteData corpusId Nothing
liftIO $ do
_csd_version `shouldBe` PG.version
_csd_cId `shouldBe` corpusId
_csd_lId `shouldBe` aliceListId
length _csd_contexts `shouldBe` 2
length _csd_map_context_ngrams `shouldBe` 0
length _csd_stop_context_ngrams `shouldBe` 0
length _csd_candidate_context_ngrams `shouldBe` 0
describe "GET /api/v1.0/corpus/cId/sqlite" $ do
it "returns correct SQLite db" $ \ctx -> do
let port = _sctx_port ctx
withApplication (_sctx_app ctx) $ do
withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do
cId <- createFortranDocsList (_sctx_env ctx) port clientEnv token
void $ liftIO $ do
(CorpusSQLite { _cs_bs }) <-
(checkEither $ runClientM (get_corpus_sqlite_export token cId) clientEnv) >>= (pure . getResponse)
withTempSQLiteDir $ \(_fp, _fname, fpath) -> do
BSL.writeFile fpath _cs_bs
S.withConnection fpath $ \conn -> do
[S.Only cId'] <- S.query_ conn "SELECT value FROM info WHERE key = 'corpusId'"
cId' `shouldBe` unNodeId cId
-- [S.Only lId'] <- S.query_ conn "SELECT value FROM info WHERE key = 'listId'"
-- lId' `shouldBe` unNodeId listId
[S.Only version] <- S.query_ conn "SELECT value FROM info WHERE key = 'gargVersion'"
version `shouldBe` showVersion PG.version
[S.Only corpoLen] <- S.query conn "SELECT COUNT(*) FROM corpus WHERE id = ?" (S.Only $ unNodeId cId)
corpoLen `shouldBe` (1 :: Int)
-- [S.Only listLen] <- S.query conn "SELECT COUNT(*) FROM lists WHERE id = ?" (S.Only $ unNodeId listId)
-- listLen `shouldBe` (1 :: Int)
[S.Only ngramsLen] <- S.query_ conn "SELECT COUNT(*) FROM ngrams"
ngramsLen `shouldBe` (0 :: Int)
[S.Only docsLen] <- S.query_ conn "SELECT COUNT(*) FROM documents"
docsLen `shouldBe` (2 :: Int)
......@@ -20,7 +20,7 @@ import Gargantext.Core.Types (NodeId, NodeType(..))
import Gargantext.Core.Worker.Env () -- instance HasNodeError
import Gargantext.Database.Action.User
import Gargantext.Database.Admin.Types.Hyperdata.Corpus
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node (insertNode, mk, getUserRootPublicNode, getUserRootPrivateNode)
import Gargantext.Database.Query.Table.Node.User (getUserByName)
import Gargantext.Database.Query.Tree.Root
import Gargantext.Database.Schema.Node (_node_id)
......
......@@ -28,6 +28,7 @@ module Test.API.Routes (
, delete_node
, add_form_to_list
, add_tsv_to_list
, get_corpus_sqlite_export
) where
import Data.Text.Encoding qualified as TE
......@@ -37,13 +38,14 @@ import Gargantext.API.Errors
import Gargantext.API.HashedResponse (HashedResponse)
import Gargantext.API.Ngrams.List.Types (WithJsonFile, WithTextFile)
import Gargantext.API.Ngrams.Types ( NgramsTable, NgramsTablePatch, OrderBy, TabType, Versioned, VersionedWithCount )
import Gargantext.API.Node.Corpus.Export.Types (CorpusSQLite)
import Gargantext.API.Routes.Client
import Gargantext.API.Routes.Named
import Gargantext.API.Routes.Named.Corpus (CorpusExportAPI(corpusSQLiteEp))
import Gargantext.API.Routes.Named.List (updateListJSONEp, updateListTSVEp)
import Gargantext.API.Routes.Named.Node hiding (treeAPI)
import Gargantext.API.Routes.Named.Private hiding (tableNgramsAPI)
import Gargantext.API.Routes.Named.Publish (PublishAPI(..))
import Gargantext.API.Routes.Named.Publish (PublishRequest(..))
import Gargantext.API.Routes.Named.Publish (PublishAPI(..), PublishRequest(..))
import Gargantext.API.Routes.Named.Table
import Gargantext.API.Routes.Named.Tree (nodeTreeEp)
import Gargantext.API.Types () -- MimeUnrender instances
......@@ -57,6 +59,7 @@ import Gargantext.Database.Query.Facet qualified as Facet
import Gargantext.Database.Query.Table.NodeNode (SourceId(..), TargetId(..))
import Gargantext.Prelude
import Network.Wai.Handler.Warp (Port)
import Servant (Headers, Header)
import Servant.Auth.Client qualified as S
import Servant.Client.Streaming
import Servant.Conduit ()
......@@ -337,3 +340,22 @@ publish_node (toServantToken -> token) sourceId policy = fmap UnsafeMkNodeId $
& publishAPI
& publishEp
& ($ PublishRequest policy)
get_corpus_sqlite_export :: Token
-> CorpusId
-> ClientM (Headers '[Header "Content-Disposition" Text] CorpusSQLite)
get_corpus_sqlite_export (toServantToken -> token) cId =
clientRoutes & apiWithCustomErrorScheme
& ($ GES_new)
& backendAPI
& backendAPI'
& mkBackEndAPI
& gargAPIVersion
& gargPrivateAPI
& mkPrivateAPI
& ($ token)
& corpusExportAPI
& ($ cId)
& corpusSQLiteEp
& ($ Nothing) -- Maybe ListId
......@@ -23,13 +23,13 @@ module Test.API.UpdateList (
, JobPollHandle(..)
, updateNode
, createDocsList
, createFortranDocsList
) where
import Control.Lens (mapped, over)
import Control.Monad.Fail (fail)
import Data.Aeson.QQ
import Data.Aeson qualified as JSON
import Data.ByteString.Lazy qualified as BSL
import Data.Aeson.QQ
import Data.Map.Strict.Patch qualified as PM
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
......@@ -46,7 +46,7 @@ import Gargantext.API.Ngrams.Types
import Gargantext.API.Node.Corpus.New.Types qualified as FType
import Gargantext.API.Node.Types
import Gargantext.API.Routes.Named
import Gargantext.API.Routes.Named.Corpus
import Gargantext.API.Routes.Named.Corpus (addWithTempFileEp)
import Gargantext.API.Routes.Named.Node
import Gargantext.API.Routes.Named.Private
import Gargantext.API.Worker (workerAPIPost)
......@@ -89,12 +89,12 @@ uploadJSONList log_cfg port token cId pathToNgrams clientEnv = do
([listId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> build cId)) [aesonQQ|{"pn_typename":"NodeList","pn_name":"Testing"}|]
-- Upload the JSON doc
simpleNgrams' <- liftIO (TIO.readFile =<< getDataFileName pathToNgrams)
let (Just simpleNgrams) = JSON.decode $ BSL.fromStrict $ encodeUtf8 simpleNgrams'
-- let (Just simpleNgrams) = JSON.decode $ BSL.fromStrict $ encodeUtf8 simpleNgrams'
-- let jsonFileFormData = [ (T.pack "_wjf_data", simpleNgrams)
-- , ("_wjf_filetype", "JSON")
-- , ("_wjf_name", "simple_ngrams.json")
-- ]
let params = WithJsonFile { _wjf_data = simpleNgrams
let params = WithJsonFile { _wjf_data = simpleNgrams'
, _wjf_name = "simple_ngrams.json" }
-- let url = "/lists/" +|listId|+ "/add/form/async"
-- let mkPollUrl j = "/corpus/" +|listId|+ "/add/form/async/" +|_jph_id j|+ "/poll?limit=1"
......@@ -401,8 +401,8 @@ add_file_async (toServantToken -> token) corpusId nwf =
& gargPrivateAPI
& mkPrivateAPI
& ($ token)
& addWithFormAPI
& addWithFormEp
& addWithTempFile
& addWithTempFileEp
& ($ corpusId)
& workerAPIPost
& (\submitForm -> submitForm nwf)
......
......@@ -12,15 +12,11 @@ Portability : POSIX
module Test.Database.Operations.DocumentSearch where
-- import Gargantext.API.Node.Update (updateDocs)
-- import Network.URI (parseURI)
import Control.Lens (view)
import Control.Monad.Reader
import Data.Aeson.QQ.Simple
import Data.Aeson.Types
import Data.Text qualified as T
import Gargantext.Core
import Gargantext.Core.NLP (nlpServerGet)
import Gargantext.Core.Text.Corpus.Query qualified as API
import Gargantext.Core.Text.Terms.Mono.Stem
import Gargantext.Core.Types.Individu
......@@ -122,9 +118,7 @@ addCorpusDocuments env = flip runReaderT env $ runTestMonad $ do
let lang = EN
let docs = [exampleDocument_01, exampleDocument_02, exampleDocument_03, exampleDocument_04]
server <- view (nlpServerGet lang)
_ <- addDocumentsToHyperCorpus server
(Just $ _node_hyperdata $ corpus)
_ <- addDocumentsToHyperCorpus (Just $ _node_hyperdata $ corpus)
(Multi lang)
corpusId
docs
......
......@@ -8,9 +8,6 @@ Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TupleSections #-}
module Test.Database.Operations.PublishNode where
import Prelude
......
......@@ -89,12 +89,12 @@ setup = do
-- putText $ "[setup] database: " <> show (gargConfig ^. gc_database_config)
-- putText $ "[setup] worker db: " <> show (gargConfig ^. gc_worker . wsDatabase)
let idleTime = 60.0
let maxResources = 2
let maxResources = 5
let poolConfig = defaultPoolConfig (PG.connectPostgreSQL (Tmp.toConnectionString db))
PG.close
idleTime
maxResources
pool <- newPool (setNumStripes (Just 2) poolConfig)
pool <- newPool (setNumStripes (Just 4) poolConfig)
bootstrapDB db pool gargConfig
ugen <- emptyCounter
test_nodeStory <- fromDBNodeStoryEnv pool
......@@ -104,7 +104,7 @@ setup = do
PG.close
idleTime
maxResources
wPool <- newPool (setNumStripes (Just 2) wPoolConfig)
wPool <- newPool (setNumStripes (Just 4) wPoolConfig)
wNodeStory <- fromDBNodeStoryEnv wPool
_w_env_job_state <- newTVarIO Nothing
withLoggerIO Mock $ \wioLogger -> do
......
......@@ -70,6 +70,9 @@ newtype TestMonad a = TestMonad { runTestMonad :: ReaderT TestEnv IO a }
, MonadBaseControl IO
, MonadFail
, MonadIO
, MonadMask
, MonadCatch
, MonadThrow
)
data TestJobHandle = TestNoJobHandle
......
......@@ -39,7 +39,7 @@ import Gargantext.API.Node.Get (GetNodeParams)
import Gargantext.API.Node.New.Types (PostNode(..))
import Gargantext.API.Node.Share.Types (ShareNodeParams(..))
import Gargantext.API.Node.Update.Types qualified as NU
import Gargantext.API.Node.Types (NewWithForm, RenameNode(..), WithQuery)
import Gargantext.API.Node.Types (NewWithForm, NewWithTempFile(..), RenameNode(..), WithQuery)
import Gargantext.API.Public.Types (PublicData(..))
import Gargantext.API.Routes.Named.Publish (PublishRequest(..))
import Gargantext.API.Search.Types (SearchQuery(..), SearchResult(..), SearchResultTypes(..), SearchType(..))
......@@ -572,6 +572,14 @@ genFrontendErr be = do
pure $ Errors.mkFrontendErr' txt $ Errors.FE_job_generic_exception err
instance Arbitrary NewWithTempFile where
arbitrary = NewWithTempFile <$> arbitrary -- _wtf_filetype
<*> arbitrary -- _wtf_fileformat
<*> arbitrary -- _wtf_file_oid
<*> arbitrary -- _wtf_lang
<*> arbitrary -- _wtf_name
<*> arbitrary -- _wtf_selection
instance Arbitrary Job where
arbitrary = oneof [ pure Ping
......@@ -591,7 +599,7 @@ instance Arbitrary Job where
, uploadDocumentGen ]
where
addContactGen = AddContact <$> arbitrary <*> arbitrary <*> arbitrary
addCorpusFormAsyncGen = AddCorpusFormAsync <$> arbitrary <*> arbitrary <*> arbitrary
addCorpusFormAsyncGen = AddCorpusTempFileAsync <$> arbitrary <*> arbitrary <*> arbitrary
addCorpusWithQueryGen = AddCorpusWithQuery <$> arbitrary <*> arbitrary <*> arbitrary
-- addWithFileGen = AddWithFile <$> arbitrary <*> arbitrary <*> arbitrary
addToAnnuaireWithFormGen = AddToAnnuaireWithForm <$> arbitrary <*> arbitrary
......
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
{-# LANGUAGE TypeApplications #-}
module Test.Ngrams.Terms (tests) where
import Data.HashMap.Strict qualified as HashMap
......