Commit 8b9cf512 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch '444-dev-temporary-file-storage' into 'dev'

Resolve "Implement temporary file storage"

See merge request !392
parents 5225daf6 0cc7e082
Pipeline #7505 passed with stages
in 43 minutes and 51 seconds
...@@ -42,31 +42,33 @@ workerCLI (CLIW_run (WorkerArgs { .. })) = do ...@@ -42,31 +42,33 @@ workerCLI (CLIW_run (WorkerArgs { .. })) = do
$ List.take 72 $ List.take 72
$ List.cycle ["_"]) :: Prelude.String) $ List.cycle ["_"]) :: Prelude.String)
___
putText "GarganText worker"
putText $ "worker_name: " <> worker_name
putText $ "worker toml: " <> T.pack (_SettingsFile worker_toml)
___
withWorkerEnv worker_toml $ \env -> do withWorkerEnv worker_toml $ \env -> do
let ws = env ^. hasConfig . gc_worker let log_cfg = env ^. hasConfig . gc_logging
case findDefinitionByName ws worker_name of withLogger log_cfg $ \ioLogger -> do
Nothing -> do ___
let workerNames = _wdName <$> (_wsDefinitions ws) logMsg ioLogger INFO "GarganText worker"
let availableWorkers = T.intercalate ", " workerNames logMsg ioLogger INFO $ "worker_name: " <> T.unpack worker_name
putText $ "Worker definition not found! Available workers: " <> availableWorkers logMsg ioLogger INFO $ "worker toml: " <> _SettingsFile worker_toml
Just wd -> do ___
putText $ "Starting worker '" <> worker_name <> "'"
putText $ "gc config: " <> show (env ^. hasConfig) let ws = env ^. hasConfig . gc_worker
putText $ "Worker settings: " <> show ws case findDefinitionByName ws worker_name of
___ Nothing -> do
if worker_run_single then let workerNames = _wdName <$> (_wsDefinitions ws)
withPGMQWorkerSingleCtrlC env wd $ \a _state -> do let availableWorkers = T.intercalate ", " workerNames
wait a putText $ "Worker definition not found! Available workers: " <> availableWorkers
else Just wd -> do
withPGMQWorkerCtrlC env wd $ \a _state -> do logMsg ioLogger INFO $ "Starting worker '" <> T.unpack worker_name <> "'"
-- _ <- runReaderT (sendJob Ping) env logMsg ioLogger DEBUG $ "gc config: " <> show (env ^. hasConfig)
wait a logMsg ioLogger DEBUG $ "Worker settings: " <> show ws
___
if worker_run_single then
withPGMQWorkerSingleCtrlC env wd $ \a _state -> do
wait a
else
withPGMQWorkerCtrlC env wd $ \a _state -> do
-- _ <- runReaderT (sendJob Ping) env
wait a
workerCLI (CLIW_runAll (WorkerAllArgs { .. })) = withWorkerEnv worker_toml $ \env -> do workerCLI (CLIW_runAll (WorkerAllArgs { .. })) = withWorkerEnv worker_toml $ \env -> do
let log_cfg = env ^. hasConfig . gc_logging let log_cfg = env ^. hasConfig . gc_logging
withLogger log_cfg $ \ioLogger -> runAllWorkers ioLogger worker_toml withLogger log_cfg $ \ioLogger -> runAllWorkers ioLogger worker_toml
......
...@@ -17,7 +17,7 @@ fi ...@@ -17,7 +17,7 @@ fi
# `cabal.project.freeze`. This ensures the files stay deterministic so that CI # `cabal.project.freeze`. This ensures the files stay deterministic so that CI
# cache can kick in. # cache can kick in.
expected_cabal_project_hash="963418e37a17d4bb67d4b885613144b36d290f612eea80355e82abc7e76b450c" expected_cabal_project_hash="963418e37a17d4bb67d4b885613144b36d290f612eea80355e82abc7e76b450c"
expected_cabal_project_freeze_hash="cd52143d3a9d285360b59c6371d3e258552c1bc115bd612024db3de1f7593ff7" expected_cabal_project_freeze_hash="cd52143d3a9d285360b59c6371d3e258552c1bc115bd612024db3de1f7593ff7"
cabal --store-dir=$STORE_DIR v2-build --dry-run cabal --store-dir=$STORE_DIR v2-build --dry-run
......
...@@ -17,6 +17,7 @@ Portability : POSIX ...@@ -17,6 +17,7 @@ Portability : POSIX
module Gargantext.API.Ngrams.List module Gargantext.API.Ngrams.List
where where
import Data.Aeson qualified as Aeson
import Data.ByteString.Lazy qualified as BSL import Data.ByteString.Lazy qualified as BSL
import Data.Csv qualified as Tsv import Data.Csv qualified as Tsv
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
...@@ -25,8 +26,10 @@ import Data.Map.Strict (toList) ...@@ -25,8 +26,10 @@ import Data.Map.Strict (toList)
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Set qualified as Set import Data.Set qualified as Set
import Data.Text (concat, pack, splitOn) import Data.Text (concat, pack, splitOn)
import Data.Vector (Vector) import Data.Text.Encoding qualified as TE
import Data.Vector qualified as Vec import Data.Vector qualified as Vec
import Data.Vector (Vector)
import Database.PostgreSQL.Simple.LargeObjects qualified as PSQL
import Gargantext.API.Admin.EnvTypes (Env) import Gargantext.API.Admin.EnvTypes (Env)
import Gargantext.API.Errors.Types (BackendInternalError(InternalServerError)) import Gargantext.API.Errors.Types (BackendInternalError(InternalServerError))
import Gargantext.API.Ngrams (setListNgrams) import Gargantext.API.Ngrams (setListNgrams)
...@@ -35,19 +38,20 @@ import Gargantext.API.Ngrams.Prelude (getNgramsList) ...@@ -35,19 +38,20 @@ import Gargantext.API.Ngrams.Prelude (getNgramsList)
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.API.Prelude (GargM, serverError, HasServerError) import Gargantext.API.Prelude (GargM, serverError, HasServerError)
import Gargantext.API.Routes.Named.List qualified as Named import Gargantext.API.Routes.Named.List qualified as Named
import Gargantext.API.Worker (serveWorkerAPI, serveWorkerAPIM) import Gargantext.API.Worker (serveWorkerAPIM)
import Gargantext.Core.NodeStory.Types ( HasNodeStory ) import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType(NgramsTerms)) import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType(NgramsTerms))
import Gargantext.Core.Types.Main (ListType(..)) import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Worker.Jobs.Types qualified as Jobs import Gargantext.Core.Worker.Jobs.Types qualified as Jobs
import Gargantext.Database.Action.Flow (reIndexWith) import Gargantext.Database.Action.Flow (reIndexWith)
import Gargantext.Database.Admin.Types.Node ( NodeId(_NodeId), ListId ) import Gargantext.Database.Admin.Types.Node ( NodeId(_NodeId), ListId )
import Gargantext.Database.Prelude (createLargeObject)
import Gargantext.Database.Query.Table.Node (getNode) import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Schema.Ngrams ( text2ngrams, NgramsId ) import Gargantext.Database.Schema.Ngrams ( text2ngrams, NgramsId )
import Gargantext.Database.Schema.Node (_node_parent_id) import Gargantext.Database.Schema.Node (_node_parent_id)
import Gargantext.Database.Types (Indexed(..)) import Gargantext.Database.Types (Indexed(..))
import Gargantext.Prelude hiding (concat, toList) import Gargantext.Prelude hiding (concat, toList)
import Gargantext.System.Logging import Gargantext.System.Logging (logLocM, LogLevel(..), MonadLogger)
import Gargantext.Utils.Jobs.Monad (MonadJobStatus(..)) import Gargantext.Utils.Jobs.Monad (MonadJobStatus(..))
import Prelude qualified import Prelude qualified
import Protolude qualified as P import Protolude qualified as P
...@@ -110,9 +114,11 @@ getTsv lId = do ...@@ -110,9 +114,11 @@ getTsv lId = do
------------------------------------------------------------------------ ------------------------------------------------------------------------
jsonPostAsync :: Named.JSONAPI (AsServerT (GargM Env BackendInternalError)) jsonPostAsync :: Named.JSONAPI (AsServerT (GargM Env BackendInternalError))
jsonPostAsync = Named.JSONAPI { jsonPostAsync = Named.JSONAPI {
updateListJSONEp = \lId -> serveWorkerAPI $ \p -> updateListJSONEp = \lId -> serveWorkerAPIM $ \p -> do
Jobs.JSONPost { _jp_list_id = lId (PSQL.Oid oId) <- createLargeObject $ TE.encodeUtf8 $ _wjf_data p
, _jp_ngrams_list = _wjf_data p } pure $ Jobs.JSONPost { _jp_list_id = lId
, _jp_ngrams_oid = fromIntegral oId }
-- , _jp_ngrams_list = _wjf_data p }
} }
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -162,8 +168,10 @@ tsvPostAsync = ...@@ -162,8 +168,10 @@ tsvPostAsync =
$(logLocM) DEBUG $ "Started to upload " <> (_wtf_name p) $(logLocM) DEBUG $ "Started to upload " <> (_wtf_name p)
case ngramsListFromTSVData (_wtf_data p) of case ngramsListFromTSVData (_wtf_data p) of
Left err -> throwError $ InternalServerError $ err500 { errReasonPhrase = err } Left err -> throwError $ InternalServerError $ err500 { errReasonPhrase = err }
Right ngramsList -> pure $ Jobs.JSONPost { _jp_list_id = lId Right ngramsList -> do
, _jp_ngrams_list = ngramsList } (PSQL.Oid oId) <- createLargeObject $ BSL.toStrict $ Aeson.encode ngramsList
pure $ Jobs.JSONPost { _jp_list_id = lId
, _jp_ngrams_oid = fromIntegral oId }
} }
-- | Tries converting a text file into an 'NgramList', so that we can reuse the -- | Tries converting a text file into an 'NgramList', so that we can reuse the
......
...@@ -9,15 +9,13 @@ Portability : POSIX ...@@ -9,15 +9,13 @@ Portability : POSIX
-} -}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Ngrams.List.Types where module Gargantext.API.Ngrams.List.Types where
import Data.Aeson import Data.Aeson
import Data.ByteString.Lazy qualified as BSL -- import Data.ByteString.Lazy qualified as BSL
import Data.Swagger (ToSchema, declareNamedSchema, genericDeclareNamedSchema) import Data.Swagger (ToSchema, declareNamedSchema, genericDeclareNamedSchema)
import Data.Text import Data.Text
import Data.Text.Encoding qualified as E -- import Data.Text.Encoding qualified as E
import Gargantext.API.Ngrams.Types (NgramsList) import Gargantext.API.Ngrams.Types (NgramsList)
import Gargantext.API.Node.Corpus.New.Types (FileType(..)) import Gargantext.API.Node.Corpus.New.Types (FileType(..))
import Gargantext.Core.Utils.Aeson (jsonOptions) import Gargantext.Core.Utils.Aeson (jsonOptions)
...@@ -46,16 +44,18 @@ instance ToSchema WithFile where ...@@ -46,16 +44,18 @@ instance ToSchema WithFile where
------------------------------------------------------------------------ ------------------------------------------------------------------------
data WithJsonFile = WithJsonFile data WithJsonFile = WithJsonFile
{ _wjf_data :: !NgramsList { -- _wjf_data :: !NgramsList
_wjf_data :: !Text
, _wjf_name :: !Text , _wjf_name :: !Text
} deriving (Eq, Show, Generic) } deriving (Eq, Show, Generic)
instance FromForm WithJsonFile where instance FromForm WithJsonFile where
fromForm f = do fromForm f = do
d' <- parseUnique "_wjf_data" f d <- parseUnique "_wjf_data" f
d <- case eitherDecode' (BSL.fromStrict $ E.encodeUtf8 d') of -- d' <- parseUnique "_wjf_data" f
Left s -> Left $ pack s -- d <- case eitherDecode' (BSL.fromStrict $ E.encodeUtf8 d') of
Right v -> Right v -- Left s -> Left $ pack s
-- Right v -> Right v
n <- parseUnique "_wjf_name" f n <- parseUnique "_wjf_name" f
pure $ WithJsonFile { _wjf_data = d pure $ WithJsonFile { _wjf_data = d
, _wjf_name = n } , _wjf_name = n }
......
...@@ -22,7 +22,6 @@ Node API ...@@ -22,7 +22,6 @@ Node API
-} -}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node module Gargantext.API.Node
...@@ -271,7 +270,7 @@ genericNodeAPI' _ authenticatedUser targetNode = Named.NodeAPI ...@@ -271,7 +270,7 @@ genericNodeAPI' _ authenticatedUser targetNode = Named.NodeAPI
, moveAPI = Named.MoveAPI $ \parentId -> , moveAPI = Named.MoveAPI $ \parentId ->
withPolicy authenticatedUser (moveChecks (SourceId targetNode) (TargetId parentId)) $ withPolicy authenticatedUser (moveChecks (SourceId targetNode) (TargetId parentId)) $
moveNode loggedInUserId targetNode parentId moveNode loggedInUserId targetNode parentId
, fileAPI = Named.FileAPI $ fileApi targetNode , fileAPI = Named.FileAPI { fileDownloadEp = fileApi targetNode }
, fileAsyncAPI = fileAsyncApi authenticatedUser targetNode , fileAsyncAPI = fileAsyncApi authenticatedUser targetNode
, dfwnAPI = DFWN.api authenticatedUser targetNode , dfwnAPI = DFWN.api authenticatedUser targetNode
, documentUploadAPI = DocumentUpload.api targetNode , documentUploadAPI = DocumentUpload.api targetNode
......
...@@ -21,15 +21,15 @@ module Gargantext.API.Node.Corpus.New ...@@ -21,15 +21,15 @@ module Gargantext.API.Node.Corpus.New
import Conduit ((.|), yieldMany, mapMC, mapC, transPipe) import Conduit ((.|), yieldMany, mapMC, mapC, transPipe)
import Control.Exception.Safe (MonadMask)
import Control.Lens ( view, non ) import Control.Lens ( view, non )
import Data.ByteString.Base64 qualified as BSB64
import Data.Conduit.Internal (zipSources) import Data.Conduit.Internal (zipSources)
import Data.Swagger ( ToSchema(..) ) import Data.Swagger ( ToSchema(..) )
import Data.Text.Encoding qualified as TE
import Data.Text qualified as T import Data.Text qualified as T
import Database.PostgreSQL.Simple.LargeObjects qualified as PSQL
import Gargantext.API.Admin.Orchestrator.Types qualified as API import Gargantext.API.Admin.Orchestrator.Types qualified as API
import Gargantext.API.Ngrams (commitStatePatch, Versioned(..)) import Gargantext.API.Ngrams (commitStatePatch, Versioned(..))
import Gargantext.API.Node.Corpus.New.Types ( FileFormat(..), FileType(..) ) import Gargantext.API.Node.Corpus.New.Types ( FileType(..) )
import Gargantext.API.Node.Corpus.Searx ( triggerSearxSearch ) import Gargantext.API.Node.Corpus.Searx ( triggerSearxSearch )
import Gargantext.API.Node.Corpus.Types ( Datafield(Web), datafield2origin ) import Gargantext.API.Node.Corpus.Types ( Datafield(Web), datafield2origin )
import Gargantext.API.Node.Corpus.Update (addLanguageToCorpus) import Gargantext.API.Node.Corpus.Update (addLanguageToCorpus)
...@@ -51,7 +51,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document ( ToHyperdataDocument( ...@@ -51,7 +51,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document ( ToHyperdataDocument(
import Gargantext.Database.Admin.Types.Hyperdata.File ( HyperdataFile(..) ) import Gargantext.Database.Admin.Types.Hyperdata.File ( HyperdataFile(..) )
import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..), ParentId) import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..), ParentId)
import Gargantext.Database.GargDB qualified as GargDB import Gargantext.Database.GargDB qualified as GargDB
import Gargantext.Database.Prelude (IsDBCmd) import Gargantext.Database.Prelude (readLargeObject, IsDBCmd)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError) import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node (getNodeWith, getOrMkList) import Gargantext.Database.Query.Table.Node (getNodeWith, getOrMkList)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata) import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
...@@ -214,28 +214,29 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q ...@@ -214,28 +214,29 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
$(logLocM) ERROR $ "[addToCorpusWithQuery] error: " <> show err -- log the full error $(logLocM) ERROR $ "[addToCorpusWithQuery] error: " <> show err -- log the full error
markFailed (Just err) jobHandle markFailed (Just err) jobHandle
addToCorpusWithForm :: ( FlowCmdM env err m addToCorpusWithTempFile :: ( MonadMask m
, MonadJobStatus m , FlowCmdM env err m
, HasNodeStoryImmediateSaver env , MonadJobStatus m
, HasNodeArchiveStoryImmediateSaver env , HasNodeStoryImmediateSaver env
) , HasNodeArchiveStoryImmediateSaver env
=> User )
-> CorpusId => User
-> NewWithForm -> CorpusId
-> JobHandle m -> NewWithTempFile
-> m () -> JobHandle m
addToCorpusWithForm user cid nwf jobHandle = do -> m ()
addToCorpusWithTempFile user cid nwtf jobHandle = do
-- printDebug "[addToCorpusWithForm] Parsing corpus: " cid -- printDebug "[addToCorpusWithForm] Parsing corpus: " cid
-- printDebug "[addToCorpusWithForm] fileType" ft -- printDebug "[addToCorpusWithForm] fileType" ft
-- printDebug "[addToCorpusWithForm] fileFormat" ff -- printDebug "[addToCorpusWithForm] fileFormat" ff
let l = nwf ^. wf_lang . non defaultLanguage let l = nwtf ^. wtf_lang . non defaultLanguage
addLanguageToCorpus cid l addLanguageToCorpus cid l
limit' <- view $ hasConfig . gc_jobs . jc_max_docs_parsers limit' <- view $ hasConfig . gc_jobs . jc_max_docs_parsers
let limit = fromIntegral limit' :: Integer let limit = fromIntegral limit' :: Integer
let let
parseC = case (nwf ^. wf_filetype) of parseC = case nwtf ^. wtf_filetype of
TSV -> Parser.parseFormatC Parser.TsvGargV3 TSV -> Parser.parseFormatC Parser.TsvGargV3
TSV_HAL -> Parser.parseFormatC Parser.TsvHal TSV_HAL -> Parser.parseFormatC Parser.TsvHal
Iramuteq -> Parser.parseFormatC Parser.Iramuteq Iramuteq -> Parser.parseFormatC Parser.Iramuteq
...@@ -245,12 +246,11 @@ addToCorpusWithForm user cid nwf jobHandle = do ...@@ -245,12 +246,11 @@ addToCorpusWithForm user cid nwf jobHandle = do
WOS -> Parser.parseFormatC Parser.WOS WOS -> Parser.parseFormatC Parser.WOS
-- TODO granularity of the logStatus -- TODO granularity of the logStatus
let data' = case (nwf ^. wf_fileformat) of let oId = PSQL.Oid $ fromIntegral $ nwtf ^. wtf_file_oid
Plain -> cs (nwf ^. wf_data) data' <- readLargeObject oId
ZIP -> case BSB64.decode $ TE.encodeUtf8 (nwf ^. wf_data) of -- $(logLocM) DEBUG $ "[addToCorpusWithTempFile] size: " <> show size
Left err -> panicTrace $ T.pack "[addToCorpusWithForm] error decoding base64: " <> T.pack err -- $(logLocM) DEBUG $ "[addToCorpusWithTempFile] data': " <> TE.decodeUtf8 data'
Right decoded -> decoded eDocsC <- liftBase $ parseC (nwtf ^. wtf_fileformat) data'
eDocsC <- liftBase $ parseC (nwf ^. wf_fileformat) data'
case eDocsC of case eDocsC of
Right (count, docsC) -> do Right (count, docsC) -> do
-- TODO Add progress (jobStatus) update for docs - this is a -- TODO Add progress (jobStatus) update for docs - this is a
...@@ -260,7 +260,7 @@ addToCorpusWithForm user cid nwf jobHandle = do ...@@ -260,7 +260,7 @@ addToCorpusWithForm user cid nwf jobHandle = do
.| mapMC (\(idx, doc) -> .| mapMC (\(idx, doc) ->
if idx > limit then do if idx > limit then do
--printDebug "[addToCorpusWithForm] number of docs exceeds the limit" (show limit) --printDebug "[addToCorpusWithForm] number of docs exceeds the limit" (show limit)
let panicMsg' = [ "[addToCorpusWithForm] number of docs " let panicMsg' = [ "[addToCorpusWithTempFile] number of docs "
, "exceeds the MAX_DOCS_PARSERS limit (" , "exceeds the MAX_DOCS_PARSERS limit ("
, show limit , show limit
, ")" ] , ")" ]
...@@ -280,7 +280,7 @@ addToCorpusWithForm user cid nwf jobHandle = do ...@@ -280,7 +280,7 @@ addToCorpusWithForm user cid nwf jobHandle = do
_cid' <- flowCorpus (MkCorpusUserNormalCorpusIds user [cid]) _cid' <- flowCorpus (MkCorpusUserNormalCorpusIds user [cid])
(Multi l) (Multi l)
(Just (nwf ^. wf_selection)) (Just (nwtf ^. wtf_selection))
--(Just $ fromIntegral $ length docs, docsC') --(Just $ fromIntegral $ length docs, docsC')
(count, transPipe liftBase docsC') -- TODO fix number of docs (count, transPipe liftBase docsC') -- TODO fix number of docs
--(map (map toHyperdataDocument) docs) --(map (map toHyperdataDocument) docs)
...@@ -295,7 +295,7 @@ addToCorpusWithForm user cid nwf jobHandle = do ...@@ -295,7 +295,7 @@ addToCorpusWithForm user cid nwf jobHandle = do
markComplete jobHandle markComplete jobHandle
Left parseErr -> do Left parseErr -> do
$(logLocM) ERROR $ "[addToCorpusWithForm] parse error: " <> (Parser._ParseFormatError parseErr) $(logLocM) ERROR $ "[addToCorpusWithTempFile] parse error: " <> Parser._ParseFormatError parseErr
markFailed (Just parseErr) jobHandle markFailed (Just parseErr) jobHandle
{- {-
......
...@@ -11,7 +11,6 @@ Portability : POSIX ...@@ -11,7 +11,6 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-unused-matches #-} {-# OPTIONS_GHC -fno-warn-unused-matches #-}
{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE IncoherentInstances #-} {-# LANGUAGE IncoherentInstances #-}
module Gargantext.API.Node.File where module Gargantext.API.Node.File where
...@@ -60,9 +59,7 @@ fileDownload nId = do ...@@ -60,9 +59,7 @@ fileDownload nId = do
Contents c <- GargDB.readGargFile $ T.unpack path Contents c <- GargDB.readGargFile $ T.unpack path
let (mMime, _) = DMT.guessType DMT.defaultmtd False $ T.unpack name' let (mMime, _) = DMT.guessType DMT.defaultmtd False $ T.unpack name'
mime = case mMime of mime = fromMaybe "text/plain" mMime
Just m -> m
Nothing -> "text/plain"
pure $ addHeader (T.pack mime) $ BSResponse c pure $ addHeader (T.pack mime) $ BSResponse c
...@@ -118,3 +115,4 @@ addWithFile authenticatedUser nId nwf@(NewWithFile _d _l fName) jobHandle = do ...@@ -118,3 +115,4 @@ addWithFile authenticatedUser nId nwf@(NewWithFile _d _l fName) jobHandle = do
markComplete jobHandle markComplete jobHandle
where where
userId = authenticatedUser ^. auth_user_id userId = authenticatedUser ^. auth_user_id
{-|
Module : Gargantext.API.Node.File.Types
Description :
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.API.Node.File.Types where module Gargantext.API.Node.File.Types where
...@@ -10,27 +19,34 @@ import Gargantext.Prelude ...@@ -10,27 +19,34 @@ import Gargantext.Prelude
import Network.HTTP.Media qualified as M import Network.HTTP.Media qualified as M
import Servant import Servant
data RESPONSE deriving Typeable data RESPONSE deriving Typeable
instance Accept RESPONSE where instance Accept RESPONSE where
contentType _ = "text" M.// "*" contentType _ = "text" M.// "*"
instance MimeRender RESPONSE BSResponse where instance MimeRender RESPONSE BSResponse where
mimeRender _ (BSResponse val) = BSL.fromStrict $ val mimeRender _ (BSResponse val) = BSL.fromStrict $ val
instance MimeUnrender RESPONSE BSResponse where instance MimeUnrender RESPONSE BSResponse where
mimeUnrender _ lbs = Right $ BSResponse (BSL.toStrict lbs) mimeUnrender _ lbs = Right $ BSResponse (BSL.toStrict lbs)
newtype Contents = Contents BS.ByteString newtype Contents = Contents BS.ByteString
instance GargDB.ReadFile Contents where instance GargDB.ReadFile Contents where
readFile' fp = do readFile' fp = do
c <- BS.readFile fp c <- BS.readFile fp
pure $ Contents c pure $ Contents c
instance ToSchema Contents where
declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
instance MimeUnrender OctetStream Contents where
mimeUnrender _ lbs = Right $ Contents (BSL.toStrict lbs)
newtype BSResponse = BSResponse BS.ByteString newtype BSResponse = BSResponse BS.ByteString
deriving (Generic) deriving (Generic)
instance ToSchema BSResponse where instance ToSchema BSResponse where
declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO) declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
-- | Temporary file, held in database, return it's OID
newtype DBTempFile = DBTempFile Int
deriving (Generic, ToJSON)
instance ToSchema DBTempFile where
declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
...@@ -14,16 +14,17 @@ Portability : POSIX ...@@ -14,16 +14,17 @@ Portability : POSIX
module Gargantext.API.Node.FrameCalcUpload where module Gargantext.API.Node.FrameCalcUpload where
import Control.Exception.Safe (MonadMask)
import Data.ByteString.Lazy qualified as BSL import Data.ByteString.Lazy qualified as BSL
import Data.ByteString.UTF8 qualified as BSU8
import Data.Text qualified as T import Data.Text qualified as T
import Database.PostgreSQL.Simple.LargeObjects qualified as PSQL
import Gargantext.API.Admin.Auth.Types ( auth_node_id, AuthenticatedUser ) import Gargantext.API.Admin.Auth.Types ( auth_node_id, AuthenticatedUser )
import Gargantext.API.Admin.EnvTypes (Env) import Gargantext.API.Admin.EnvTypes (Env)
import Gargantext.API.Errors.Types ( BackendInternalError ) import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Node.Corpus.New (addToCorpusWithForm) import Gargantext.API.Node.Corpus.New (addToCorpusWithTempFile)
import Gargantext.API.Node.Corpus.New.Types (FileFormat(..), FileType(..)) import Gargantext.API.Node.Corpus.New.Types (FileFormat(..), FileType(..))
import Gargantext.API.Node.FrameCalcUpload.Types import Gargantext.API.Node.FrameCalcUpload.Types
import Gargantext.API.Node.Types (NewWithForm(..)) import Gargantext.API.Node.Types (NewWithTempFile(..))
import Gargantext.API.Prelude ( GargM ) import Gargantext.API.Prelude ( GargM )
import Gargantext.API.Routes.Named.FrameCalc qualified as Named import Gargantext.API.Routes.Named.FrameCalc qualified as Named
import Gargantext.API.Worker (serveWorkerAPI) import Gargantext.API.Worker (serveWorkerAPI)
...@@ -35,6 +36,7 @@ import Gargantext.Database.Action.Flow.Types ( FlowCmdM ) ...@@ -35,6 +36,7 @@ import Gargantext.Database.Action.Flow.Types ( FlowCmdM )
import Gargantext.Database.Admin.Types.Hyperdata.Frame ( HyperdataFrame(..) ) import Gargantext.Database.Admin.Types.Hyperdata.Frame ( HyperdataFrame(..) )
import Gargantext.Database.Admin.Types.Node ( NodeId, NodeType(NodeCorpus) ) import Gargantext.Database.Admin.Types.Node ( NodeId, NodeType(NodeCorpus) )
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType, getNodeWith) import Gargantext.Database.Query.Table.Node (getClosestParentIdByType, getNodeWith)
import Gargantext.Database.Prelude (createLargeObject)
import Gargantext.Database.Schema.Node (node_hyperdata) import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.Jobs.Monad (MonadJobStatus(..), markFailureNoErr) import Gargantext.Utils.Jobs.Monad (MonadJobStatus(..), markFailureNoErr)
...@@ -54,7 +56,8 @@ api authenticatedUser nId = ...@@ -54,7 +56,8 @@ api authenticatedUser nId =
frameCalcUploadAsync :: ( HasConfig env frameCalcUploadAsync :: ( MonadMask m
, HasConfig env
, FlowCmdM env err m , FlowCmdM env err m
, MonadJobStatus m , MonadJobStatus m
, HasNodeArchiveStoryImmediateSaver env , HasNodeArchiveStoryImmediateSaver env
...@@ -65,7 +68,7 @@ frameCalcUploadAsync :: ( HasConfig env ...@@ -65,7 +68,7 @@ frameCalcUploadAsync :: ( HasConfig env
-> FrameCalcUpload -> FrameCalcUpload
-> JobHandle m -> JobHandle m
-> m () -> m ()
frameCalcUploadAsync authenticatedUser nId (FrameCalcUpload _wf_lang _wf_selection) jobHandle = do frameCalcUploadAsync authenticatedUser nId (FrameCalcUpload _wtf_lang _wtf_selection) jobHandle = do
markStarted 5 jobHandle markStarted 5 jobHandle
-- printDebug "[frameCalcUploadAsync] uId" uId -- printDebug "[frameCalcUploadAsync] uId" uId
...@@ -82,7 +85,8 @@ frameCalcUploadAsync authenticatedUser nId (FrameCalcUpload _wf_lang _wf_selecti ...@@ -82,7 +85,8 @@ frameCalcUploadAsync authenticatedUser nId (FrameCalcUpload _wf_lang _wf_selecti
manager <- newManager tlsManagerSettings manager <- newManager tlsManagerSettings
req <- parseRequest $ T.unpack csvUrl req <- parseRequest $ T.unpack csvUrl
httpLbs req manager httpLbs req manager
let body = T.pack $ BSU8.toString $ BSL.toStrict $ responseBody res let body = BSL.toStrict $ responseBody res
PSQL.Oid oId <- createLargeObject body
-- printDebug "body" body -- printDebug "body" body
mCId <- getClosestParentIdByType nId NodeCorpus mCId <- getClosestParentIdByType nId NodeCorpus
...@@ -92,14 +96,14 @@ frameCalcUploadAsync authenticatedUser nId (FrameCalcUpload _wf_lang _wf_selecti ...@@ -92,14 +96,14 @@ frameCalcUploadAsync authenticatedUser nId (FrameCalcUpload _wf_lang _wf_selecti
Nothing -> markFailureNoErr 1 jobHandle Nothing -> markFailureNoErr 1 jobHandle
Just cId -> Just cId ->
-- FIXME(adn) Audit this conversion. -- FIXME(adn) Audit this conversion.
addToCorpusWithForm (RootId userNodeId) addToCorpusWithTempFile (RootId userNodeId)
cId cId
(NewWithForm { _wf_filetype = TSV (NewWithTempFile { _wtf_filetype = TSV
, _wf_fileformat = Plain , _wtf_fileformat = Plain
, _wf_data = body , _wtf_file_oid = fromIntegral oId
, _wf_lang , _wtf_lang
, _wf_name = "calc-upload.csv" , _wtf_name = "calc-upload.csv"
, _wf_selection }) jobHandle , _wtf_selection }) jobHandle
markComplete jobHandle markComplete jobHandle
where where
......
...@@ -31,6 +31,9 @@ import Gargantext.Prelude ...@@ -31,6 +31,9 @@ import Gargantext.Prelude
import Web.FormUrlEncoded (FromForm, ToForm) import Web.FormUrlEncoded (FromForm, ToForm)
------------------------------------------------------- -------------------------------------------------------
-- | A file is uploaded with this type. Then, for internal job
-- creation for the worker, 'NewWithTempFile' is used with a large
-- object oid
data NewWithForm = NewWithForm data NewWithForm = NewWithForm
{ _wf_filetype :: !FileType { _wf_filetype :: !FileType
, _wf_fileformat :: !FileFormat , _wf_fileformat :: !FileFormat
...@@ -50,6 +53,26 @@ instance ToJSON NewWithForm where ...@@ -50,6 +53,26 @@ instance ToJSON NewWithForm where
instance ToSchema NewWithForm where instance ToSchema NewWithForm where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_")
-------------------------------------------------------
data NewWithTempFile = NewWithTempFile
{ _wtf_filetype :: !FileType
, _wtf_fileformat :: !FileFormat
, _wtf_file_oid :: !Int
, _wtf_lang :: !(Maybe Lang)
, _wtf_name :: !Text
, _wtf_selection :: !FlowSocialListWith
} deriving (Eq, Show, Generic)
makeLenses ''NewWithTempFile
instance FromForm NewWithTempFile
instance ToForm NewWithTempFile
instance FromJSON NewWithTempFile where
parseJSON = genericParseJSON $ jsonOptions "_wtf_"
instance ToJSON NewWithTempFile where
toJSON = genericToJSON $ jsonOptions "_wtf_"
instance ToSchema NewWithTempFile where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wtf_")
------------------------------------------------------- -------------------------------------------------------
data NewWithFile = NewWithFile data NewWithFile = NewWithFile
......
...@@ -11,21 +11,30 @@ Portability : POSIX ...@@ -11,21 +11,30 @@ Portability : POSIX
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE UndecidableInstances #-}
module Gargantext.API.Routes module Gargantext.API.Routes where
where
import Data.ByteString.Base64 qualified as BSB64
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Database.PostgreSQL.Simple.LargeObjects qualified as PSQL
import Gargantext.API.Admin.Auth.Types (AuthenticatedUser, auth_user_id)
import Gargantext.API.Admin.EnvTypes (Env) import Gargantext.API.Admin.EnvTypes (Env)
import Gargantext.API.Errors.Types (BackendInternalError) import Gargantext.API.Errors.Types (BackendInternalError)
import Gargantext.API.Node.Corpus.New.Types ( FileFormat(..) )
import Gargantext.API.Node.Types (NewWithForm(..), NewWithTempFile(..))
import Gargantext.API.Prelude (GargServer, GargM) import Gargantext.API.Prelude (GargServer, GargM)
import Gargantext.API.Routes.Named.Annuaire qualified as Named import Gargantext.API.Routes.Named.Annuaire qualified as Named
import Gargantext.API.Routes.Named.Corpus qualified as Named import Gargantext.API.Routes.Named.Corpus qualified as Named
import Gargantext.API.Worker (serveWorkerAPI) import Gargantext.API.Worker (serveWorkerAPI, serveWorkerAPIM)
import Gargantext.Core.Types.Individu (User(..)) import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Worker.Jobs.Types qualified as Jobs import Gargantext.Core.Worker.Jobs.Types qualified as Jobs
import Gargantext.Database.Prelude (createLargeObject)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.System.Logging ( logLocM, LogLevel(..) )
import Servant (Get, JSON) import Servant (Get, JSON)
import Servant.Server.Generic (AsServerT) import Servant.Server.Generic (AsServerT)
...@@ -63,19 +72,34 @@ addCorpusWithQuery user = ...@@ -63,19 +72,34 @@ addCorpusWithQuery user =
, Jobs._acq_cid = cId } , Jobs._acq_cid = cId }
} }
addCorpusWithForm :: User -> Named.AddWithForm (AsServerT (GargM Env BackendInternalError)) -- | Uses temporary file stored in postgres to add that file to a corpus
addCorpusWithForm user = addWithTempFileApi :: AuthenticatedUser
Named.AddWithForm { -> Named.AddWithTempFile (AsServerT (GargM Env BackendInternalError))
addWithFormEp = \cId -> serveWorkerAPI $ \p -> addWithTempFileApi authenticatedUser =
-- /NOTE(adinapoli)/ Track the initial steps outside 'addToCorpusWithForm', because it's Named.AddWithTempFile {
-- called in a few places, and the job status might be different between invocations. addWithTempFileEp = \cId ->
-- markStarted 3 jHandle serveWorkerAPIM $ \(NewWithForm { .. }) -> do
-- New.addToCorpusWithForm user cid i jHandle let bs = case _wf_fileformat of
Jobs.AddCorpusFormAsync { Jobs._acf_args = p Plain -> cs _wf_data
, Jobs._acf_user = user ZIP -> case BSB64.decode $ TE.encodeUtf8 _wf_data of
, Jobs._acf_cid = cId } Left err -> panicTrace $ T.pack "[addWithTempFileApi] error decoding base64: " <> T.pack err
Right decoded -> decoded
(PSQL.Oid oId) <- createLargeObject bs
$(logLocM) DEBUG $ "[addWithTempFileApi] oId': " <> show oId
let args = NewWithTempFile { _wtf_filetype = _wf_filetype
, _wtf_fileformat = _wf_fileformat
, _wtf_file_oid = fromIntegral oId
, _wtf_lang = _wf_lang
, _wtf_name = _wf_name
, _wtf_selection = _wf_selection }
pure $ Jobs.AddCorpusTempFileAsync { _actf_args = args
, _actf_cid = cId
, _actf_user = userId }
} }
where
userId = UserDBId $ authenticatedUser ^. auth_user_id
addAnnuaireWithForm :: Named.AddAnnuaireWithForm (AsServerT (GargM Env BackendInternalError)) addAnnuaireWithForm :: Named.AddAnnuaireWithForm (AsServerT (GargM Env BackendInternalError))
addAnnuaireWithForm = addAnnuaireWithForm =
Named.AddAnnuaireWithForm { Named.AddAnnuaireWithForm {
......
...@@ -14,7 +14,7 @@ Portability : POSIX ...@@ -14,7 +14,7 @@ Portability : POSIX
module Gargantext.API.Routes.Named.Corpus ( module Gargantext.API.Routes.Named.Corpus (
-- * Routes types -- * Routes types
CorpusExportAPI(..) CorpusExportAPI(..)
, AddWithForm(..) , AddWithTempFile(..)
, AddWithQuery(..) , AddWithQuery(..)
, MakeSubcorpusAPI(..) , MakeSubcorpusAPI(..)
-- * Others -- * Others
...@@ -43,14 +43,15 @@ newtype CorpusExportAPI mode = CorpusExportAPI ...@@ -43,14 +43,15 @@ newtype CorpusExportAPI mode = CorpusExportAPI
:> Get '[JSON] (Headers '[Servant.Header "Content-Disposition" Text] Corpus) :> Get '[JSON] (Headers '[Servant.Header "Content-Disposition" Text] Corpus)
} deriving Generic } deriving Generic
newtype AddWithForm mode = AddWithForm
{ addWithFormEp :: mode :- Summary "Add with FormUrlEncoded to corpus endpoint" data AddWithTempFile mode = AddWithTempFile
:> "corpus" { addWithTempFileEp :: mode :- Summary "Add with form via temp file"
:> Capture "corpus_id" CorpusId :> "corpus"
:> "add" :> Capture "corpus_id" CorpusId
:> "form" :> "add"
:> "async" :> "form"
:> NamedRoutes (WorkerAPI '[FormUrlEncoded] NewWithForm) :> "async"
:> NamedRoutes (WorkerAPI '[FormUrlEncoded] NewWithForm)
} deriving Generic } deriving Generic
newtype AddWithQuery mode = AddWithQuery newtype AddWithQuery mode = AddWithQuery
......
{-|
Module : Gargantext.API.Routes.Named.File
Description :
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
module Gargantext.API.Routes.Named.File ( module Gargantext.API.Routes.Named.File (
...@@ -6,6 +16,7 @@ module Gargantext.API.Routes.Named.File ( ...@@ -6,6 +16,7 @@ module Gargantext.API.Routes.Named.File (
, FileAsyncAPI(..) , FileAsyncAPI(..)
) where ) where
import Data.Text (Text) import Data.Text (Text)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Gargantext.API.Node.File.Types (BSResponse, RESPONSE) import Gargantext.API.Node.File.Types (BSResponse, RESPONSE)
...@@ -13,6 +24,7 @@ import Gargantext.API.Node.Types (NewWithFile) ...@@ -13,6 +24,7 @@ import Gargantext.API.Node.Types (NewWithFile)
import Gargantext.API.Worker (WorkerAPI) import Gargantext.API.Worker (WorkerAPI)
import Servant import Servant
data FileAPI mode = FileAPI data FileAPI mode = FileAPI
{ fileDownloadEp :: mode :- Summary "File download" { fileDownloadEp :: mode :- Summary "File download"
:> "download" :> "download"
...@@ -26,4 +38,3 @@ data FileAsyncAPI mode = FileAsyncAPI ...@@ -26,4 +38,3 @@ data FileAsyncAPI mode = FileAsyncAPI
:> "add" :> "add"
:> NamedRoutes (WorkerAPI '[FormUrlEncoded] NewWithFile) :> NamedRoutes (WorkerAPI '[FormUrlEncoded] NewWithFile)
} deriving Generic } deriving Generic
{-|
Module : Gargantext.API.Routes.Named.Node
Description :
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
module Gargantext.API.Routes.Named.Node ( module Gargantext.API.Routes.Named.Node (
...@@ -84,7 +94,7 @@ data NodeAPI a mode = NodeAPI ...@@ -84,7 +94,7 @@ data NodeAPI a mode = NodeAPI
, searchAPI :: mode :- "search" :> NamedRoutes (SearchAPI SearchResult) , searchAPI :: mode :- "search" :> NamedRoutes (SearchAPI SearchResult)
, shareAPI :: mode :- "share" :> NamedRoutes ShareNode , shareAPI :: mode :- "share" :> NamedRoutes ShareNode
, unshareEp :: mode :- "unshare" :> NamedRoutes UnshareNode , unshareEp :: mode :- "unshare" :> NamedRoutes UnshareNode
, publishAPI :: mode :- "publish" :> (PolicyChecked (NamedRoutes PublishAPI)) , publishAPI :: mode :- "publish" :> PolicyChecked (NamedRoutes PublishAPI)
---- Pairing utilities ---- Pairing utilities
, pairWithEp :: mode :- "pairwith" :> NamedRoutes PairWith , pairWithEp :: mode :- "pairwith" :> NamedRoutes PairWith
, pairsEp :: mode :- "pairs" :> NamedRoutes Pairs , pairsEp :: mode :- "pairs" :> NamedRoutes Pairs
......
...@@ -8,9 +8,8 @@ Stability : experimental ...@@ -8,9 +8,8 @@ Stability : experimental
Portability : POSIX Portability : POSIX
-} -}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Routes.Named.Private ( module Gargantext.API.Routes.Named.Private (
-- * Routes types -- * Routes types
...@@ -26,13 +25,13 @@ module Gargantext.API.Routes.Named.Private ( ...@@ -26,13 +25,13 @@ module Gargantext.API.Routes.Named.Private (
import Data.Text (Text) import Data.Text (Text)
import Gargantext.API.Admin.Auth.Types import Gargantext.API.Admin.Auth.Types (AuthenticatedUser)
import Gargantext.API.Auth.PolicyCheck import Gargantext.API.Auth.PolicyCheck (PolicyChecked)
import Gargantext.API.Routes.Named.Contact import Gargantext.API.Routes.Named.Contact (ContactAPI)
import Gargantext.API.Routes.Named.Context import Gargantext.API.Routes.Named.Context (ContextAPI)
import Gargantext.API.Routes.Named.Corpus import Gargantext.API.Routes.Named.Corpus (AddWithTempFile, AddWithQuery, CorpusExportAPI, MakeSubcorpusAPI)
import Gargantext.API.Routes.Named.Count import Gargantext.API.Routes.Named.Count (CountAPI, Query)
import Gargantext.API.Routes.Named.Document import Gargantext.API.Routes.Named.Document (DocumentExportAPI)
import Gargantext.API.Routes.Named.List (GETAPI, JSONAPI, TSVAPI) import Gargantext.API.Routes.Named.List (GETAPI, JSONAPI, TSVAPI)
import Gargantext.API.Routes.Named.Node import Gargantext.API.Routes.Named.Node
import Gargantext.API.Routes.Named.Remote import Gargantext.API.Routes.Named.Remote
...@@ -95,7 +94,7 @@ data GargPrivateAPI' mode = GargPrivateAPI' ...@@ -95,7 +94,7 @@ data GargPrivateAPI' mode = GargPrivateAPI'
:> Capture "tree_id" NodeId :> Capture "tree_id" NodeId
:> NamedRoutes TreeFlatAPI :> NamedRoutes TreeFlatAPI
, membersAPI :: mode :- "members" :> Summary "Team node members" :> NamedRoutes MembersAPI , membersAPI :: mode :- "members" :> Summary "Team node members" :> NamedRoutes MembersAPI
, addWithFormAPI :: mode :- NamedRoutes AddWithForm , addWithTempFile :: mode :- NamedRoutes AddWithTempFile
, addWithQueryEp :: mode :- NamedRoutes AddWithQuery , addWithQueryEp :: mode :- NamedRoutes AddWithQuery
, makeSubcorpusAPI :: mode :- NamedRoutes MakeSubcorpusAPI , makeSubcorpusAPI :: mode :- NamedRoutes MakeSubcorpusAPI
, listGetAPI :: mode :- NamedRoutes GETAPI , listGetAPI :: mode :- NamedRoutes GETAPI
......
...@@ -18,7 +18,7 @@ import Gargantext.API.Node.Phylo.Export qualified as PhyloExport ...@@ -18,7 +18,7 @@ import Gargantext.API.Node.Phylo.Export qualified as PhyloExport
import Gargantext.API.Node qualified as Tree import Gargantext.API.Node qualified as Tree
import Gargantext.API.Node.ShareURL ( shareURL ) import Gargantext.API.Node.ShareURL ( shareURL )
import Gargantext.API.Prelude (GargM) import Gargantext.API.Prelude (GargM)
import Gargantext.API.Routes (addCorpusWithForm, addCorpusWithQuery) import Gargantext.API.Routes (addWithTempFileApi, addCorpusWithQuery)
import Gargantext.API.Routes.Named.Private qualified as Named import Gargantext.API.Routes.Named.Private qualified as Named
import Gargantext.API.Server.Named.Ngrams import Gargantext.API.Server.Named.Ngrams
import Gargantext.API.Server.Named.Viz qualified as Viz import Gargantext.API.Server.Named.Viz qualified as Viz
...@@ -59,7 +59,7 @@ serverPrivateGargAPI' authenticatedUser@(AuthenticatedUser userNodeId userId) ...@@ -59,7 +59,7 @@ serverPrivateGargAPI' authenticatedUser@(AuthenticatedUser userNodeId userId)
, treeAPI = Tree.treeAPI authenticatedUser , treeAPI = Tree.treeAPI authenticatedUser
, treeFlatAPI = Tree.treeFlatAPI authenticatedUser , treeFlatAPI = Tree.treeFlatAPI authenticatedUser
, membersAPI = members , membersAPI = members
, addWithFormAPI = addCorpusWithForm (RootId userNodeId) , addWithTempFile = addWithTempFileApi authenticatedUser
, addWithQueryEp = addCorpusWithQuery (RootId userNodeId) , addWithQueryEp = addCorpusWithQuery (RootId userNodeId)
, makeSubcorpusAPI = Subcorpus.makeSubcorpus userId , makeSubcorpusAPI = Subcorpus.makeSubcorpus userId
, listGetAPI = List.getAPI , listGetAPI = List.getAPI
......
...@@ -59,4 +59,3 @@ serveWorkerAPIEJob f = WorkerAPI { workerAPIPost } ...@@ -59,4 +59,3 @@ serveWorkerAPIEJob f = WorkerAPI { workerAPIPost }
mId <- sendJob job mId <- sendJob job
pure $ JobInfo { _ji_message_id = mId pure $ JobInfo { _ji_message_id = mId
, _ji_mNode_id = getWorkerMNodeId job } , _ji_mNode_id = getWorkerMNodeId job }
...@@ -20,7 +20,7 @@ import Data.Bimap (Bimap) ...@@ -20,7 +20,7 @@ import Data.Bimap (Bimap)
import Data.Bimap qualified as Bimap import Data.Bimap qualified as Bimap
import Data.LanguageCodes qualified as ISO639 import Data.LanguageCodes qualified as ISO639
import Data.Morpheus.Types (GQLType) import Data.Morpheus.Types (GQLType)
import Data.Swagger (ToSchema(..), defaultSchemaOptions, genericDeclareNamedSchemaUnrestricted) import Data.Swagger (ToParamSchema, ToSchema(..), defaultSchemaOptions, genericDeclareNamedSchemaUnrestricted)
import Data.Text (pack) import Data.Text (pack)
import Gargantext.Prelude hiding (All) import Gargantext.Prelude hiding (All)
import Prelude (userError) import Prelude (userError)
...@@ -70,6 +70,7 @@ defaultLanguage = EN ...@@ -70,6 +70,7 @@ defaultLanguage = EN
instance ToJSON Lang instance ToJSON Lang
instance FromJSON Lang instance FromJSON Lang
instance ToParamSchema Lang
instance ToSchema Lang where instance ToSchema Lang where
declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
instance FromHttpApiData Lang instance FromHttpApiData Lang
......
...@@ -312,7 +312,8 @@ getMultipleLinefile bl del headers res x = do ...@@ -312,7 +312,8 @@ getMultipleLinefile bl del headers res x = do
then checkNextLine bl del headers res x then checkNextLine bl del headers res x
else else
if (length tmp > length headers) || (V.length bl == (x + 1)) if (length tmp > length headers) || (V.length bl == (x + 1))
then Left (pack $ "Cannot parse the file at line " <> show x <> ". Maybe because of a delimiter") then
Left (pack $ "Cannot parse the file at line " <> show x <> ". Maybe because of a delimiter")
else do else do
case BL.append res <$> ((V.!?) bl (x+1)) of case BL.append res <$> ((V.!?) bl (x+1)) of
Nothing -> Left "getMultipleLinefile" Nothing -> Left "getMultipleLinefile"
...@@ -591,7 +592,7 @@ parseTsv' bs = (V.toList . V.map tsv2doc . snd) <$> readTsvLazyBS Comma bs ...@@ -591,7 +592,7 @@ parseTsv' bs = (V.toList . V.map tsv2doc . snd) <$> readTsvLazyBS Comma bs
parseTsv' :: BL.ByteString -> Either Text [HyperdataDocument] parseTsv' :: BL.ByteString -> Either Text [HyperdataDocument]
parseTsv' bs = do parseTsv' bs = do
let let
result = case (testCorrectFile bs) of result = case testCorrectFile bs of
Left _err -> Left _err Left _err -> Left _err
Right del -> readTsvLazyBS del bs Right del -> readTsvLazyBS del bs
V.toList . V.map tsv2doc . snd <$> result V.toList . V.map tsv2doc . snd <$> result
...@@ -601,7 +602,7 @@ parseTsvC :: BL.ByteString ...@@ -601,7 +602,7 @@ parseTsvC :: BL.ByteString
parseTsvC bs = parseTsvC bs =
(\(_h, rs) -> (fromIntegral $ V.length rs, yieldMany rs .| mapC tsv2doc)) <$> eResult (\(_h, rs) -> (fromIntegral $ V.length rs, yieldMany rs .| mapC tsv2doc)) <$> eResult
where where
eResult = case (testCorrectFile bs) of eResult = case testCorrectFile bs of
Left _err -> Left _err Left _err -> Left _err
Right del -> readTsvLazyBS del bs Right del -> readTsvLazyBS del bs
......
...@@ -21,19 +21,24 @@ module Gargantext.Core.Worker where ...@@ -21,19 +21,24 @@ module Gargantext.Core.Worker where
import Async.Worker.Broker.Types (toA, getMessage, messageId) import Async.Worker.Broker.Types (toA, getMessage, messageId)
import Async.Worker qualified as W import Async.Worker qualified as W
import Async.Worker.Types qualified as W import Async.Worker.Types qualified as W
import Control.Exception.Safe qualified as CES
import Control.Lens (to) import Control.Lens (to)
import Data.Aeson qualified as Aeson
import Data.ByteString.Lazy qualified as BSL
import Data.Text qualified as T import Data.Text qualified as T
import Database.PostgreSQL.Simple.LargeObjects qualified as PSQL
import Gargantext.API.Admin.Auth (forgotUserPassword) import Gargantext.API.Admin.Auth (forgotUserPassword)
import Gargantext.API.Admin.Auth.Types (ForgotPasswordAsyncParams(..)) import Gargantext.API.Admin.Auth.Types (ForgotPasswordAsyncParams(..))
import Gargantext.API.Ngrams.List (postAsyncJSON) import Gargantext.API.Ngrams.List (postAsyncJSON)
import Gargantext.API.Node.Contact (addContact) import Gargantext.API.Node.Contact (addContact)
import Gargantext.API.Node.Corpus.Annuaire qualified as Annuaire import Gargantext.API.Node.Corpus.Annuaire qualified as Annuaire
import Gargantext.API.Node.Corpus.New (addToCorpusWithForm, addToCorpusWithQuery) import Gargantext.API.Node.Corpus.New (addToCorpusWithTempFile, addToCorpusWithQuery)
import Gargantext.API.Node.DocumentsFromWriteNodes (documentsFromWriteNodes) import Gargantext.API.Node.DocumentsFromWriteNodes (documentsFromWriteNodes)
import Gargantext.API.Node.DocumentUpload (documentUploadAsync, remoteImportDocuments) import Gargantext.API.Node.DocumentUpload (documentUploadAsync, remoteImportDocuments)
import Gargantext.API.Node.File (addWithFile) import Gargantext.API.Node.File (addWithFile)
import Gargantext.API.Node.FrameCalcUpload (frameCalcUploadAsync) import Gargantext.API.Node.FrameCalcUpload (frameCalcUploadAsync)
import Gargantext.API.Node.New (postNode') import Gargantext.API.Node.New (postNode')
import Gargantext.API.Node.Types (_wtf_file_oid)
import Gargantext.API.Node.Update.Types (UpdateNodeParams(..), Granularity (..)) import Gargantext.API.Node.Update.Types (UpdateNodeParams(..), Granularity (..))
import Gargantext.API.Node.Update (updateNode) import Gargantext.API.Node.Update (updateNode)
import Gargantext.API.Server.Named.Ngrams (tableNgramsPostChartsAsync) import Gargantext.API.Server.Named.Ngrams (tableNgramsPostChartsAsync)
...@@ -48,6 +53,7 @@ import Gargantext.Core.Worker.Env ...@@ -48,6 +53,7 @@ import Gargantext.Core.Worker.Env
import Gargantext.Core.Worker.Jobs.Types (Job(..), getWorkerMNodeId, ImportRemoteDocumentsPayload(..), ImportRemoteTermsPayload(..)) import Gargantext.Core.Worker.Jobs.Types (Job(..), getWorkerMNodeId, ImportRemoteDocumentsPayload(..), ImportRemoteTermsPayload(..))
import Gargantext.Core.Worker.PGMQTypes (BrokerMessage, HasWorkerBroker, WState) import Gargantext.Core.Worker.PGMQTypes (BrokerMessage, HasWorkerBroker, WState)
import Gargantext.Core.Worker.Types (JobInfo(..)) import Gargantext.Core.Worker.Types (JobInfo(..))
import Gargantext.Database.Prelude (readLargeObject, removeLargeObject)
import Gargantext.Database.Query.Table.User (getUsersWithEmail) import Gargantext.Database.Query.Table.User (getUsersWithEmail)
import Gargantext.Prelude hiding (to) import Gargantext.Prelude hiding (to)
import Gargantext.System.Logging ( logLocM, LogLevel(..), logMsg, withLogger ) import Gargantext.System.Logging ( logLocM, LogLevel(..), logMsg, withLogger )
...@@ -223,12 +229,14 @@ performAction env _state bm = do ...@@ -223,12 +229,14 @@ performAction env _state bm = do
AddContact { .. } -> runWorkerMonad env $ do AddContact { .. } -> runWorkerMonad env $ do
$(logLocM) DEBUG $ "[performAction] add contact" $(logLocM) DEBUG $ "[performAction] add contact"
addContact _ac_user _ac_node_id _ac_args jh addContact _ac_user _ac_node_id _ac_args jh
-- | Send a file with documents and index them in corpus -- | Uses temporary file to add documents into corpus
AddCorpusFormAsync { .. } -> runWorkerMonad env $ do AddCorpusTempFileAsync { .. } -> runWorkerMonad env $ do
$(logLocM) DEBUG $ "[performAction] add corpus form" -- TODO CES.filnally
addToCorpusWithForm _acf_user _acf_cid _acf_args jh $(logLocM) DEBUG "[performAction] add to corpus with temporary file"
CES.finally (addToCorpusWithTempFile _actf_user _actf_cid _actf_args jh)
(removeLargeObject $ _wtf_file_oid _actf_args)
-- | Perform external API search query and index documents in corpus -- | Perform external API search query and index documents in corpus
AddCorpusWithQuery { .. } -> runWorkerMonad env $ do AddCorpusWithQuery { .. } -> runWorkerMonad env $ do
$(logLocM) DEBUG "[performAction] add corpus with query" $(logLocM) DEBUG "[performAction] add corpus with query"
...@@ -266,7 +274,12 @@ performAction env _state bm = do ...@@ -266,7 +274,12 @@ performAction env _state bm = do
-- | Process uploaded JSON file -- | Process uploaded JSON file
JSONPost { .. } -> runWorkerMonad env $ do JSONPost { .. } -> runWorkerMonad env $ do
$(logLocM) DEBUG $ "[performAction] json post" $(logLocM) DEBUG $ "[performAction] json post"
void $ postAsyncJSON _jp_list_id _jp_ngrams_list jh CES.finally (do
_jp_ngrams_list' <- readLargeObject (PSQL.Oid $ fromIntegral _jp_ngrams_oid)
case Aeson.eitherDecode (BSL.fromStrict _jp_ngrams_list') of
Left err -> CES.throwString err
Right _jp_ngrams_list -> void $ postAsyncJSON _jp_list_id _jp_ngrams_list jh)
(removeLargeObject _jp_ngrams_oid)
-- | Task for updating metrics charts -- | Task for updating metrics charts
NgramsPostCharts { .. } -> runWorkerMonad env $ do NgramsPostCharts { .. } -> runWorkerMonad env $ do
......
...@@ -19,6 +19,7 @@ module Gargantext.Core.Worker.Env where ...@@ -19,6 +19,7 @@ module Gargantext.Core.Worker.Env where
import Control.Concurrent.STM.TVar (TVar, modifyTVar, newTVarIO, readTVarIO) import Control.Concurrent.STM.TVar (TVar, modifyTVar, newTVarIO, readTVarIO)
import Control.Exception.Safe qualified as CES
import Control.Lens (prism', to, view) import Control.Lens (prism', to, view)
import Control.Lens.TH import Control.Lens.TH
import Control.Monad.Trans.Control (MonadBaseControl) import Control.Monad.Trans.Control (MonadBaseControl)
...@@ -171,7 +172,10 @@ newtype WorkerMonad a = ...@@ -171,7 +172,10 @@ newtype WorkerMonad a =
, MonadBase IO , MonadBase IO
, MonadBaseControl IO , MonadBaseControl IO
, MonadError IOException , MonadError IOException
, MonadFail ) , MonadFail
, CES.MonadThrow
, CES.MonadCatch
, CES.MonadMask )
instance HasLogger WorkerMonad where instance HasLogger WorkerMonad where
newtype instance Logger WorkerMonad = newtype instance Logger WorkerMonad =
......
...@@ -50,7 +50,6 @@ sendJobWithCfg gcConfig job = do ...@@ -50,7 +50,6 @@ sendJobWithCfg gcConfig job = do
-- | We want to fine-tune job metadata parameters, for each job type -- | We want to fine-tune job metadata parameters, for each job type
updateJobData :: Job -> SendJob -> SendJob updateJobData :: Job -> SendJob -> SendJob
updateJobData (AddCorpusFormAsync {}) sj = sj { W.timeout = 3000 }
updateJobData (AddCorpusWithQuery {}) sj = sj { W.timeout = 3000 } updateJobData (AddCorpusWithQuery {}) sj = sj { W.timeout = 3000 }
updateJobData (AddToAnnuaireWithForm {}) sj = sj { W.timeout = 3000 } updateJobData (AddToAnnuaireWithForm {}) sj = sj { W.timeout = 3000 }
updateJobData (AddWithFile {}) sj = sj { W.timeout = 3000 } updateJobData (AddWithFile {}) sj = sj { W.timeout = 3000 }
......
...@@ -28,7 +28,7 @@ import Gargantext.API.Node.DocumentsFromWriteNodes.Types qualified as DFWN ...@@ -28,7 +28,7 @@ import Gargantext.API.Node.DocumentsFromWriteNodes.Types qualified as DFWN
import Gargantext.API.Node.DocumentUpload.Types (DocumentUpload) import Gargantext.API.Node.DocumentUpload.Types (DocumentUpload)
import Gargantext.API.Node.FrameCalcUpload.Types (FrameCalcUpload) import Gargantext.API.Node.FrameCalcUpload.Types (FrameCalcUpload)
import Gargantext.API.Node.New.Types ( PostNode(..) ) import Gargantext.API.Node.New.Types ( PostNode(..) )
import Gargantext.API.Node.Types (NewWithFile, NewWithForm, WithQuery(..)) import Gargantext.API.Node.Types (NewWithFile, NewWithTempFile, WithQuery(..))
import Gargantext.API.Node.Update.Types (UpdateNodeParams) import Gargantext.API.Node.Update.Types (UpdateNodeParams)
import Gargantext.Core.Types.Individu (User) import Gargantext.Core.Types.Individu (User)
import Gargantext.Database.Admin.Types.Node (AnnuaireId, CorpusId, ListId, NodeId(UnsafeMkNodeId), ParentId) import Gargantext.Database.Admin.Types.Node (AnnuaireId, CorpusId, ListId, NodeId(UnsafeMkNodeId), ParentId)
...@@ -101,9 +101,9 @@ data Job = ...@@ -101,9 +101,9 @@ data Job =
| AddContact { _ac_args :: AddContactParams | AddContact { _ac_args :: AddContactParams
, _ac_node_id :: NodeId , _ac_node_id :: NodeId
, _ac_user :: User } , _ac_user :: User }
| AddCorpusFormAsync { _acf_args :: NewWithForm | AddCorpusTempFileAsync { _actf_args :: NewWithTempFile
, _acf_user :: User , _actf_user :: User
, _acf_cid :: CorpusId } , _actf_cid :: CorpusId }
| AddCorpusWithQuery { _acq_args :: WithQuery | AddCorpusWithQuery { _acq_args :: WithQuery
, _acq_user :: User , _acq_user :: User
, _acq_cid :: CorpusId } , _acq_cid :: CorpusId }
...@@ -120,7 +120,8 @@ data Job = ...@@ -120,7 +120,8 @@ data Job =
, _fca_authenticatedUser :: AuthenticatedUser , _fca_authenticatedUser :: AuthenticatedUser
, _fca_node_id :: NodeId } , _fca_node_id :: NodeId }
| JSONPost { _jp_list_id :: ListId | JSONPost { _jp_list_id :: ListId
, _jp_ngrams_list :: NgramsList } , _jp_ngrams_oid :: Int }
-- , _jp_ngrams_list :: NgramsList }
| NgramsPostCharts { _npc_node_id :: NodeId | NgramsPostCharts { _npc_node_id :: NodeId
, _npc_args :: UpdateTableNgramsCharts } , _npc_args :: UpdateTableNgramsCharts }
| PostNodeAsync { _pna_node_id :: NodeId | PostNodeAsync { _pna_node_id :: NodeId
...@@ -144,11 +145,11 @@ instance FromJSON Job where ...@@ -144,11 +145,11 @@ instance FromJSON Job where
_ac_node_id <- o .: "node_id" _ac_node_id <- o .: "node_id"
_ac_user <- o .: "user" _ac_user <- o .: "user"
return $ AddContact { .. } return $ AddContact { .. }
"AddCorpusFormAsync" -> do "AddCorpusTempFileAsync" -> do
_acf_args <- o .: "args" _actf_args <- o .: "args"
_acf_user <- o .: "user" _actf_user <- o .: "user"
_acf_cid <- o .: "cid" _actf_cid <- o .: "cid"
return $ AddCorpusFormAsync { .. } return $ AddCorpusTempFileAsync { .. }
"AddCorpusWithQuery" -> do "AddCorpusWithQuery" -> do
_acq_args <- o .: "args" _acq_args <- o .: "args"
_acq_user <- o .: "user" _acq_user <- o .: "user"
...@@ -178,7 +179,8 @@ instance FromJSON Job where ...@@ -178,7 +179,8 @@ instance FromJSON Job where
return $ FrameCalcUpload { .. } return $ FrameCalcUpload { .. }
"JSONPost" -> do "JSONPost" -> do
_jp_list_id <- o .: "list_id" _jp_list_id <- o .: "list_id"
_jp_ngrams_list <- o .: "ngrams_list" -- _jp_ngrams_list <- o .: "ngrams_list"
_jp_ngrams_oid <- o .: "ngrams_oid"
return $ JSONPost { .. } return $ JSONPost { .. }
"NgramsPostCharts" -> do "NgramsPostCharts" -> do
_npc_node_id <- o .: "node_id" _npc_node_id <- o .: "node_id"
...@@ -212,11 +214,11 @@ instance ToJSON Job where ...@@ -212,11 +214,11 @@ instance ToJSON Job where
, "args" .= _ac_args , "args" .= _ac_args
, "user" .= _ac_user , "user" .= _ac_user
, "node_id" .= _ac_node_id ] , "node_id" .= _ac_node_id ]
toJSON (AddCorpusFormAsync { .. }) = toJSON (AddCorpusTempFileAsync { .. }) =
object [ "type" .= ("AddCorpusFormAsync" :: Text) object [ "type" .= ("AddCorpusTempFileAsync" :: Text)
, "args" .= _acf_args , "args" .= _actf_args
, "user" .= _acf_user , "user" .= _actf_user
, "cid" .= _acf_cid ] , "cid" .= _actf_cid ]
toJSON (AddCorpusWithQuery { .. }) = toJSON (AddCorpusWithQuery { .. }) =
object [ "type" .= ("AddCorpusWithQuery" :: Text) object [ "type" .= ("AddCorpusWithQuery" :: Text)
, "args" .= _acq_args , "args" .= _acq_args
...@@ -247,7 +249,8 @@ instance ToJSON Job where ...@@ -247,7 +249,8 @@ instance ToJSON Job where
toJSON (JSONPost { .. }) = toJSON (JSONPost { .. }) =
object [ "type" .= ("JSONPost" :: Text) object [ "type" .= ("JSONPost" :: Text)
, "list_id" .= _jp_list_id , "list_id" .= _jp_list_id
, "ngrams_list" .= _jp_ngrams_list ] , "ngrams_oid" .= _jp_ngrams_oid ]
-- , "ngrams_list" .= _jp_ngrams_list ]
toJSON (NgramsPostCharts { .. }) = toJSON (NgramsPostCharts { .. }) =
object [ "type" .= ("NgramsPostCharts" :: Text) object [ "type" .= ("NgramsPostCharts" :: Text)
, "node_id" .= _npc_node_id , "node_id" .= _npc_node_id
...@@ -290,7 +293,7 @@ instance ToJSON Job where ...@@ -290,7 +293,7 @@ instance ToJSON Job where
getWorkerMNodeId :: Job -> Maybe NodeId getWorkerMNodeId :: Job -> Maybe NodeId
getWorkerMNodeId Ping = Nothing getWorkerMNodeId Ping = Nothing
getWorkerMNodeId (AddContact { _ac_node_id }) = Just _ac_node_id getWorkerMNodeId (AddContact { _ac_node_id }) = Just _ac_node_id
getWorkerMNodeId (AddCorpusFormAsync { _acf_args, _acf_cid }) = Just _acf_cid getWorkerMNodeId (AddCorpusTempFileAsync { _actf_cid }) = Just _actf_cid
getWorkerMNodeId (AddCorpusWithQuery { _acq_args = WithQuery { _wq_node_id }}) = Just $ UnsafeMkNodeId _wq_node_id getWorkerMNodeId (AddCorpusWithQuery { _acq_args = WithQuery { _wq_node_id }}) = Just $ UnsafeMkNodeId _wq_node_id
getWorkerMNodeId (AddToAnnuaireWithForm { _aawf_annuaire_id }) = Just _aawf_annuaire_id getWorkerMNodeId (AddToAnnuaireWithForm { _aawf_annuaire_id }) = Just _aawf_annuaire_id
getWorkerMNodeId (AddWithFile { _awf_node_id }) = Just _awf_node_id getWorkerMNodeId (AddWithFile { _awf_node_id }) = Just _awf_node_id
......
...@@ -418,7 +418,6 @@ insertMasterDocs ncs c lang hs = do ...@@ -418,7 +418,6 @@ insertMasterDocs ncs c lang hs = do
-- add documents to the corpus (create node_node link) -- add documents to the corpus (create node_node link)
-- this will enable global database monitoring -- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs' :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount))) mapNgramsDocs' :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
<- mapNodeIdNgrams <- mapNodeIdNgrams
<$> documentIdWithNgrams <$> documentIdWithNgrams
...@@ -489,4 +488,3 @@ reIndexWith cId lId nt lts = do ...@@ -489,4 +488,3 @@ reIndexWith cId lId nt lts = do
-- Saving the indexation in database -- Saving the indexation in database
mapM_ (saveDocNgramsWith lId . ngramsByDoc corpusLang nt ts) docs mapM_ (saveDocNgramsWith lId . ngramsByDoc corpusLang nt ts) docs
...@@ -9,6 +9,7 @@ Portability : POSIX ...@@ -9,6 +9,7 @@ Portability : POSIX
-} -}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE LambdaCase #-} {-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
...@@ -51,15 +52,20 @@ module Gargantext.Database.Prelude ...@@ -51,15 +52,20 @@ module Gargantext.Database.Prelude
, fromField' , fromField'
, mkCmd , mkCmd
, restrictMaybe , restrictMaybe
, createLargeObject
, readLargeObject
, readLargeObjectViaTempFile
, removeLargeObject
) )
where where
import Control.Exception.Safe (throw) import Control.Exception.Safe qualified as CES
import Control.Lens (Getter, view) import Control.Lens (Getter, view)
import Control.Monad.Random ( MonadRandom ) import Control.Monad.Random ( MonadRandom )
import Control.Monad.Trans.Control (MonadBaseControl) import Control.Monad.Trans.Control (MonadBaseControl)
import Data.Aeson (Result(..)) import Data.Aeson (Result(..))
import Data.ByteString qualified as DB import Data.ByteString qualified as BS
import Data.ByteString.Lazy qualified as BSL
import Data.List qualified as DL import Data.List qualified as DL
import Data.Pool (Pool, withResource) import Data.Pool (Pool, withResource)
import Data.Profunctor.Product.Default (Default) import Data.Profunctor.Product.Default (Default)
...@@ -67,6 +73,7 @@ import Database.PostgreSQL.Simple (Connection) ...@@ -67,6 +73,7 @@ import Database.PostgreSQL.Simple (Connection)
import Database.PostgreSQL.Simple qualified as PGS import Database.PostgreSQL.Simple qualified as PGS
import Database.PostgreSQL.Simple.FromField ( Conversion, ResultError(ConversionFailed), fromField, returnError) import Database.PostgreSQL.Simple.FromField ( Conversion, ResultError(ConversionFailed), fromField, returnError)
import Database.PostgreSQL.Simple.Internal (Field) import Database.PostgreSQL.Simple.Internal (Field)
import Database.PostgreSQL.Simple.LargeObjects qualified as PSQL
import Database.PostgreSQL.Simple.Types (Query(..)) import Database.PostgreSQL.Simple.Types (Query(..))
import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET
import Gargantext.Core.Config (HasConfig(..)) import Gargantext.Core.Config (HasConfig(..))
...@@ -78,6 +85,8 @@ import Opaleye.Aggregate (countRows) ...@@ -78,6 +85,8 @@ import Opaleye.Aggregate (countRows)
import Opaleye.Internal.Constant qualified import Opaleye.Internal.Constant qualified
import Opaleye.Internal.Operators qualified import Opaleye.Internal.Operators qualified
import Shelly qualified as SH import Shelly qualified as SH
import System.Directory (removeFile)
import System.IO.Temp (emptySystemTempFile)
-- $typesAndConstraints -- $typesAndConstraints
...@@ -199,7 +208,7 @@ runCountOpaQuery q = do ...@@ -199,7 +208,7 @@ runCountOpaQuery q = do
-- countRows is guaranteed to return a list with exactly one row so DL.head is safe here -- countRows is guaranteed to return a list with exactly one row so DL.head is safe here
pure $ fromInt64ToInt $ DL.head counts pure $ fromInt64ToInt $ DL.head counts
formatPGSQuery :: PGS.ToRow a => PGS.Query -> a -> DBCmd err DB.ByteString formatPGSQuery :: PGS.ToRow a => PGS.Query -> a -> DBCmd err BS.ByteString
formatPGSQuery q a = mkCmd $ \conn -> PGS.formatQuery conn q a formatPGSQuery q a = mkCmd $ \conn -> PGS.formatQuery conn q a
runPGSQuery :: ( PGS.FromRow r, PGS.ToRow q ) runPGSQuery :: ( PGS.FromRow r, PGS.ToRow q )
...@@ -209,7 +218,7 @@ runPGSQuery q a = mkCmd $ \conn -> catch (PGS.query conn q a) (printError conn) ...@@ -209,7 +218,7 @@ runPGSQuery q a = mkCmd $ \conn -> catch (PGS.query conn q a) (printError conn)
printError c (SomeException e) = do printError c (SomeException e) = do
q' <- PGS.formatQuery c q a q' <- PGS.formatQuery c q a
hPutStrLn stderr q' hPutStrLn stderr q'
throw (SomeException e) CES.throw (SomeException e)
-- | TODO catch error -- | TODO catch error
runPGSQuery_ :: ( PGS.FromRow r ) runPGSQuery_ :: ( PGS.FromRow r )
...@@ -218,13 +227,13 @@ runPGSQuery_ q = mkCmd $ \conn -> catch (PGS.query_ conn q) printError ...@@ -218,13 +227,13 @@ runPGSQuery_ q = mkCmd $ \conn -> catch (PGS.query_ conn q) printError
where where
printError (SomeException e) = do printError (SomeException e) = do
hPutStrLn stderr (fromQuery q) hPutStrLn stderr (fromQuery q)
throw (SomeException e) CES.throw (SomeException e)
execPGSQuery :: PGS.ToRow a => PGS.Query -> a -> DBCmd err Int64 execPGSQuery :: PGS.ToRow a => PGS.Query -> a -> DBCmd err Int64
execPGSQuery q a = mkCmd $ \conn -> PGS.execute conn q a execPGSQuery q a = mkCmd $ \conn -> PGS.execute conn q a
fromField' :: (Typeable b, FromJSON b) => Field -> Maybe DB.ByteString -> Conversion b fromField' :: (Typeable b, FromJSON b) => Field -> Maybe BS.ByteString -> Conversion b
fromField' field mb = do fromField' field mb = do
v <- fromField field mb v <- fromField field mb
valueToHyperdata v valueToHyperdata v
...@@ -263,3 +272,60 @@ createDBIfNotExists connStr dbName = do ...@@ -263,3 +272,60 @@ createDBIfNotExists connStr dbName = do
(result,) <$> SH.lastExitCode (result,) <$> SH.lastExitCode
return () return ()
------------------------------
-- PostgreSQL Large Object functionality
-- https://www.postgresql.org/docs/17/largeobjects.html
-- NOTE: During development of this feature, I had problems (in tests)
-- with a hanging transaction. After debugging, it turned out this
-- was, for some reason, conflicting with our `logLocM` (though I'm no
-- sure why). Please be careful when adding debug info to large
-- objects and if you do, make sure the tests run.
createLargeObject :: BS.ByteString -> DBCmd err PSQL.Oid
createLargeObject bs = mkCmd $ \c -> PGS.withTransaction c $ do
oId <- PSQL.loCreat c
loFd <- PSQL.loOpen c oId PSQL.WriteMode
_ <- PSQL.loWrite c loFd bs
PSQL.loClose c loFd
pure oId
-- | Read a large object directly, given an oid. We read it in a
-- single transaction, looping by given chunk size
readLargeObject :: PSQL.Oid -> DBCmd err BS.ByteString
readLargeObject oId = mkCmd $ \c -> PGS.withTransaction c $ do
loFd <- PSQL.loOpen c oId PSQL.ReadMode
let chunkSize = 1024
let readChunks tell = do
c' <- PSQL.loRead c loFd chunkSize
tell' <- PSQL.loTell c loFd
if tell == tell' then
pure ([c'], tell)
else do
(cs', tell'') <- readChunks tell'
pure (c':cs', tell'')
(chunks, _size) <- readChunks 0
let s = force BSL.toStrict $ BSL.fromChunks chunks
PSQL.loClose c loFd
pure s
-- | Read large object by exporting it to a temporary file, then
-- reading that file. The difference from 'readLargeObject' is that we
-- have only 1 call inside a transaction
readLargeObjectViaTempFile :: (CES.MonadMask m, IsDBCmd env err m)
=> PSQL.Oid -> m BS.ByteString
readLargeObjectViaTempFile oId = do
CES.bracket (liftBase $ emptySystemTempFile "large-object")
(liftBase . removeFile)
(\fp -> do
mkCmd $ \c -> withTransaction c $ \_ -> PSQL.loExport c oId fp
!contents <- liftBase $ BS.readFile fp
pure contents)
where
withTransaction c = CES.bracket (PGS.begin c) (\_ -> PGS.rollback c)
removeLargeObject :: Int -> DBCmd err ()
removeLargeObject oId = mkCmd $ \c -> do
PSQL.loUnlink c $ PSQL.Oid $ fromIntegral oId
...@@ -355,7 +355,7 @@ flags: ...@@ -355,7 +355,7 @@ flags:
gargantext: gargantext:
"enable-benchmarks": false "enable-benchmarks": false
"no-phylo-debug-logs": true "no-phylo-debug-logs": true
"test-crypto": true "test-crypto": false
graphviz: graphviz:
"test-parsing": false "test-parsing": false
hashable: hashable:
......
...@@ -27,9 +27,8 @@ module Test.API.UpdateList ( ...@@ -27,9 +27,8 @@ module Test.API.UpdateList (
import Control.Lens (mapped, over) import Control.Lens (mapped, over)
import Control.Monad.Fail (fail) import Control.Monad.Fail (fail)
import Data.Aeson.QQ
import Data.Aeson qualified as JSON import Data.Aeson qualified as JSON
import Data.ByteString.Lazy qualified as BSL import Data.Aeson.QQ
import Data.Map.Strict.Patch qualified as PM import Data.Map.Strict.Patch qualified as PM
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Set qualified as Set import Data.Set qualified as Set
...@@ -46,7 +45,7 @@ import Gargantext.API.Ngrams.Types ...@@ -46,7 +45,7 @@ import Gargantext.API.Ngrams.Types
import Gargantext.API.Node.Corpus.New.Types qualified as FType import Gargantext.API.Node.Corpus.New.Types qualified as FType
import Gargantext.API.Node.Types import Gargantext.API.Node.Types
import Gargantext.API.Routes.Named import Gargantext.API.Routes.Named
import Gargantext.API.Routes.Named.Corpus import Gargantext.API.Routes.Named.Corpus (addWithTempFileEp)
import Gargantext.API.Routes.Named.Node import Gargantext.API.Routes.Named.Node
import Gargantext.API.Routes.Named.Private import Gargantext.API.Routes.Named.Private
import Gargantext.API.Worker (workerAPIPost) import Gargantext.API.Worker (workerAPIPost)
...@@ -89,12 +88,12 @@ uploadJSONList log_cfg port token cId pathToNgrams clientEnv = do ...@@ -89,12 +88,12 @@ uploadJSONList log_cfg port token cId pathToNgrams clientEnv = do
([listId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> build cId)) [aesonQQ|{"pn_typename":"NodeList","pn_name":"Testing"}|] ([listId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> build cId)) [aesonQQ|{"pn_typename":"NodeList","pn_name":"Testing"}|]
-- Upload the JSON doc -- Upload the JSON doc
simpleNgrams' <- liftIO (TIO.readFile =<< getDataFileName pathToNgrams) simpleNgrams' <- liftIO (TIO.readFile =<< getDataFileName pathToNgrams)
let (Just simpleNgrams) = JSON.decode $ BSL.fromStrict $ encodeUtf8 simpleNgrams' -- let (Just simpleNgrams) = JSON.decode $ BSL.fromStrict $ encodeUtf8 simpleNgrams'
-- let jsonFileFormData = [ (T.pack "_wjf_data", simpleNgrams) -- let jsonFileFormData = [ (T.pack "_wjf_data", simpleNgrams)
-- , ("_wjf_filetype", "JSON") -- , ("_wjf_filetype", "JSON")
-- , ("_wjf_name", "simple_ngrams.json") -- , ("_wjf_name", "simple_ngrams.json")
-- ] -- ]
let params = WithJsonFile { _wjf_data = simpleNgrams let params = WithJsonFile { _wjf_data = simpleNgrams'
, _wjf_name = "simple_ngrams.json" } , _wjf_name = "simple_ngrams.json" }
-- let url = "/lists/" +|listId|+ "/add/form/async" -- let url = "/lists/" +|listId|+ "/add/form/async"
-- let mkPollUrl j = "/corpus/" +|listId|+ "/add/form/async/" +|_jph_id j|+ "/poll?limit=1" -- let mkPollUrl j = "/corpus/" +|listId|+ "/add/form/async/" +|_jph_id j|+ "/poll?limit=1"
...@@ -401,8 +400,8 @@ add_file_async (toServantToken -> token) corpusId nwf = ...@@ -401,8 +400,8 @@ add_file_async (toServantToken -> token) corpusId nwf =
& gargPrivateAPI & gargPrivateAPI
& mkPrivateAPI & mkPrivateAPI
& ($ token) & ($ token)
& addWithFormAPI & addWithTempFile
& addWithFormEp & addWithTempFileEp
& ($ corpusId) & ($ corpusId)
& workerAPIPost & workerAPIPost
& (\submitForm -> submitForm nwf) & (\submitForm -> submitForm nwf)
......
...@@ -39,7 +39,7 @@ import Gargantext.API.Node.Get (GetNodeParams) ...@@ -39,7 +39,7 @@ import Gargantext.API.Node.Get (GetNodeParams)
import Gargantext.API.Node.New.Types (PostNode(..)) import Gargantext.API.Node.New.Types (PostNode(..))
import Gargantext.API.Node.Share.Types (ShareNodeParams(..)) import Gargantext.API.Node.Share.Types (ShareNodeParams(..))
import Gargantext.API.Node.Update.Types qualified as NU import Gargantext.API.Node.Update.Types qualified as NU
import Gargantext.API.Node.Types (NewWithForm, RenameNode(..), WithQuery) import Gargantext.API.Node.Types (NewWithForm, NewWithTempFile(..), RenameNode(..), WithQuery)
import Gargantext.API.Public.Types (PublicData(..)) import Gargantext.API.Public.Types (PublicData(..))
import Gargantext.API.Routes.Named.Publish (PublishRequest(..)) import Gargantext.API.Routes.Named.Publish (PublishRequest(..))
import Gargantext.API.Search.Types (SearchQuery(..), SearchResult(..), SearchResultTypes(..), SearchType(..)) import Gargantext.API.Search.Types (SearchQuery(..), SearchResult(..), SearchResultTypes(..), SearchType(..))
...@@ -572,6 +572,14 @@ genFrontendErr be = do ...@@ -572,6 +572,14 @@ genFrontendErr be = do
pure $ Errors.mkFrontendErr' txt $ Errors.FE_job_generic_exception err pure $ Errors.mkFrontendErr' txt $ Errors.FE_job_generic_exception err
instance Arbitrary NewWithTempFile where
arbitrary = NewWithTempFile <$> arbitrary -- _wtf_filetype
<*> arbitrary -- _wtf_fileformat
<*> arbitrary -- _wtf_file_oid
<*> arbitrary -- _wtf_lang
<*> arbitrary -- _wtf_name
<*> arbitrary -- _wtf_selection
instance Arbitrary Job where instance Arbitrary Job where
arbitrary = oneof [ pure Ping arbitrary = oneof [ pure Ping
...@@ -591,7 +599,7 @@ instance Arbitrary Job where ...@@ -591,7 +599,7 @@ instance Arbitrary Job where
, uploadDocumentGen ] , uploadDocumentGen ]
where where
addContactGen = AddContact <$> arbitrary <*> arbitrary <*> arbitrary addContactGen = AddContact <$> arbitrary <*> arbitrary <*> arbitrary
addCorpusFormAsyncGen = AddCorpusFormAsync <$> arbitrary <*> arbitrary <*> arbitrary addCorpusFormAsyncGen = AddCorpusTempFileAsync <$> arbitrary <*> arbitrary <*> arbitrary
addCorpusWithQueryGen = AddCorpusWithQuery <$> arbitrary <*> arbitrary <*> arbitrary addCorpusWithQueryGen = AddCorpusWithQuery <$> arbitrary <*> arbitrary <*> arbitrary
-- addWithFileGen = AddWithFile <$> arbitrary <*> arbitrary <*> arbitrary -- addWithFileGen = AddWithFile <$> arbitrary <*> arbitrary <*> arbitrary
addToAnnuaireWithFormGen = AddToAnnuaireWithForm <$> arbitrary <*> arbitrary addToAnnuaireWithFormGen = AddToAnnuaireWithForm <$> arbitrary <*> arbitrary
......
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
{-# LANGUAGE TypeApplications #-}
module Test.Ngrams.Terms (tests) where module Test.Ngrams.Terms (tests) where
import Data.HashMap.Strict qualified as HashMap import Data.HashMap.Strict qualified as HashMap
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment