Verified Commit 3e49fe87 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 362-dev-sqlite

parents 613b47b7 8b9cf512
Pipeline #7506 passed with stages
in 44 minutes and 38 seconds
......@@ -42,31 +42,33 @@ workerCLI (CLIW_run (WorkerArgs { .. })) = do
$ List.take 72
$ List.cycle ["_"]) :: Prelude.String)
___
putText "GarganText worker"
putText $ "worker_name: " <> worker_name
putText $ "worker toml: " <> T.pack (_SettingsFile worker_toml)
___
withWorkerEnv worker_toml $ \env -> do
let ws = env ^. hasConfig . gc_worker
case findDefinitionByName ws worker_name of
Nothing -> do
let workerNames = _wdName <$> (_wsDefinitions ws)
let availableWorkers = T.intercalate ", " workerNames
putText $ "Worker definition not found! Available workers: " <> availableWorkers
Just wd -> do
putText $ "Starting worker '" <> worker_name <> "'"
putText $ "gc config: " <> show (env ^. hasConfig)
putText $ "Worker settings: " <> show ws
___
if worker_run_single then
withPGMQWorkerSingleCtrlC env wd $ \a _state -> do
wait a
else
withPGMQWorkerCtrlC env wd $ \a _state -> do
-- _ <- runReaderT (sendJob Ping) env
wait a
let log_cfg = env ^. hasConfig . gc_logging
withLogger log_cfg $ \ioLogger -> do
___
logMsg ioLogger INFO "GarganText worker"
logMsg ioLogger INFO $ "worker_name: " <> T.unpack worker_name
logMsg ioLogger INFO $ "worker toml: " <> _SettingsFile worker_toml
___
let ws = env ^. hasConfig . gc_worker
case findDefinitionByName ws worker_name of
Nothing -> do
let workerNames = _wdName <$> (_wsDefinitions ws)
let availableWorkers = T.intercalate ", " workerNames
putText $ "Worker definition not found! Available workers: " <> availableWorkers
Just wd -> do
logMsg ioLogger INFO $ "Starting worker '" <> T.unpack worker_name <> "'"
logMsg ioLogger DEBUG $ "gc config: " <> show (env ^. hasConfig)
logMsg ioLogger DEBUG $ "Worker settings: " <> show ws
___
if worker_run_single then
withPGMQWorkerSingleCtrlC env wd $ \a _state -> do
wait a
else
withPGMQWorkerCtrlC env wd $ \a _state -> do
-- _ <- runReaderT (sendJob Ping) env
wait a
workerCLI (CLIW_runAll (WorkerAllArgs { .. })) = withWorkerEnv worker_toml $ \env -> do
let log_cfg = env ^. hasConfig . gc_logging
withLogger log_cfg $ \ioLogger -> runAllWorkers ioLogger worker_toml
......
......@@ -17,7 +17,7 @@ fi
# `cabal.project.freeze`. This ensures the files stay deterministic so that CI
# cache can kick in.
expected_cabal_project_hash="963418e37a17d4bb67d4b885613144b36d290f612eea80355e82abc7e76b450c"
expected_cabal_project_freeze_hash="cd52143d3a9d285360b59c6371d3e258552c1bc115bd612024db3de1f7593ff7"
expected_cabal_project_freeze_hash="cd52143d3a9d285360b59c6371d3e258552c1bc115bd612024db3de1f7593ff7"
cabal --store-dir=$STORE_DIR v2-build --dry-run
......
......@@ -17,6 +17,7 @@ Portability : POSIX
module Gargantext.API.Ngrams.List
where
import Data.Aeson qualified as Aeson
import Data.ByteString.Lazy qualified as BSL
import Data.Csv qualified as Tsv
import Data.HashMap.Strict (HashMap)
......@@ -25,8 +26,10 @@ import Data.Map.Strict (toList)
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text (concat, pack, splitOn)
import Data.Vector (Vector)
import Data.Text.Encoding qualified as TE
import Data.Vector qualified as Vec
import Data.Vector (Vector)
import Database.PostgreSQL.Simple.LargeObjects qualified as PSQL
import Gargantext.API.Admin.EnvTypes (Env)
import Gargantext.API.Errors.Types (BackendInternalError(InternalServerError))
import Gargantext.API.Ngrams (setListNgrams)
......@@ -35,19 +38,20 @@ import Gargantext.API.Ngrams.Prelude (getNgramsList)
import Gargantext.API.Ngrams.Types
import Gargantext.API.Prelude (GargM, serverError, HasServerError)
import Gargantext.API.Routes.Named.List qualified as Named
import Gargantext.API.Worker (serveWorkerAPI, serveWorkerAPIM)
import Gargantext.API.Worker (serveWorkerAPIM)
import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType(NgramsTerms))
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Worker.Jobs.Types qualified as Jobs
import Gargantext.Database.Action.Flow (reIndexWith)
import Gargantext.Database.Admin.Types.Node ( NodeId(_NodeId), ListId )
import Gargantext.Database.Prelude (createLargeObject)
import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Schema.Ngrams ( text2ngrams, NgramsId )
import Gargantext.Database.Schema.Node (_node_parent_id)
import Gargantext.Database.Types (Indexed(..))
import Gargantext.Prelude hiding (concat, toList)
import Gargantext.System.Logging
import Gargantext.System.Logging (logLocM, LogLevel(..), MonadLogger)
import Gargantext.Utils.Jobs.Monad (MonadJobStatus(..))
import Prelude qualified
import Protolude qualified as P
......@@ -110,9 +114,11 @@ getTsv lId = do
------------------------------------------------------------------------
jsonPostAsync :: Named.JSONAPI (AsServerT (GargM Env BackendInternalError))
jsonPostAsync = Named.JSONAPI {
updateListJSONEp = \lId -> serveWorkerAPI $ \p ->
Jobs.JSONPost { _jp_list_id = lId
, _jp_ngrams_list = _wjf_data p }
updateListJSONEp = \lId -> serveWorkerAPIM $ \p -> do
(PSQL.Oid oId) <- createLargeObject $ TE.encodeUtf8 $ _wjf_data p
pure $ Jobs.JSONPost { _jp_list_id = lId
, _jp_ngrams_oid = fromIntegral oId }
-- , _jp_ngrams_list = _wjf_data p }
}
------------------------------------------------------------------------
......@@ -162,8 +168,10 @@ tsvPostAsync =
$(logLocM) DEBUG $ "Started to upload " <> (_wtf_name p)
case ngramsListFromTSVData (_wtf_data p) of
Left err -> throwError $ InternalServerError $ err500 { errReasonPhrase = err }
Right ngramsList -> pure $ Jobs.JSONPost { _jp_list_id = lId
, _jp_ngrams_list = ngramsList }
Right ngramsList -> do
(PSQL.Oid oId) <- createLargeObject $ BSL.toStrict $ Aeson.encode ngramsList
pure $ Jobs.JSONPost { _jp_list_id = lId
, _jp_ngrams_oid = fromIntegral oId }
}
-- | Tries converting a text file into an 'NgramList', so that we can reuse the
......
......@@ -9,15 +9,13 @@ Portability : POSIX
-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Ngrams.List.Types where
import Data.Aeson
import Data.ByteString.Lazy qualified as BSL
-- import Data.ByteString.Lazy qualified as BSL
import Data.Swagger (ToSchema, declareNamedSchema, genericDeclareNamedSchema)
import Data.Text
import Data.Text.Encoding qualified as E
-- import Data.Text.Encoding qualified as E
import Gargantext.API.Ngrams.Types (NgramsList)
import Gargantext.API.Node.Corpus.New.Types (FileType(..))
import Gargantext.Core.Utils.Aeson (jsonOptions)
......@@ -46,16 +44,18 @@ instance ToSchema WithFile where
------------------------------------------------------------------------
data WithJsonFile = WithJsonFile
{ _wjf_data :: !NgramsList
{ -- _wjf_data :: !NgramsList
_wjf_data :: !Text
, _wjf_name :: !Text
} deriving (Eq, Show, Generic)
instance FromForm WithJsonFile where
fromForm f = do
d' <- parseUnique "_wjf_data" f
d <- case eitherDecode' (BSL.fromStrict $ E.encodeUtf8 d') of
Left s -> Left $ pack s
Right v -> Right v
d <- parseUnique "_wjf_data" f
-- d' <- parseUnique "_wjf_data" f
-- d <- case eitherDecode' (BSL.fromStrict $ E.encodeUtf8 d') of
-- Left s -> Left $ pack s
-- Right v -> Right v
n <- parseUnique "_wjf_name" f
pure $ WithJsonFile { _wjf_data = d
, _wjf_name = n }
......
......@@ -270,7 +270,7 @@ genericNodeAPI' _ authenticatedUser targetNode = Named.NodeAPI
, moveAPI = Named.MoveAPI $ \parentId ->
withPolicy authenticatedUser (moveChecks (SourceId targetNode) (TargetId parentId)) $
moveNode loggedInUserId targetNode parentId
, fileAPI = Named.FileAPI $ fileApi targetNode
, fileAPI = Named.FileAPI { fileDownloadEp = fileApi targetNode }
, fileAsyncAPI = fileAsyncApi authenticatedUser targetNode
, dfwnAPI = DFWN.api authenticatedUser targetNode
, documentUploadAPI = DocumentUpload.api targetNode
......
......@@ -21,15 +21,15 @@ module Gargantext.API.Node.Corpus.New
import Conduit ((.|), yieldMany, mapMC, mapC, transPipe)
import Control.Exception.Safe (MonadMask)
import Control.Lens ( view, non )
import Data.ByteString.Base64 qualified as BSB64
import Data.Conduit.Internal (zipSources)
import Data.Swagger ( ToSchema(..) )
import Data.Text.Encoding qualified as TE
import Data.Text qualified as T
import Database.PostgreSQL.Simple.LargeObjects qualified as PSQL
import Gargantext.API.Admin.Orchestrator.Types qualified as API
import Gargantext.API.Ngrams (commitStatePatch, Versioned(..))
import Gargantext.API.Node.Corpus.New.Types ( FileFormat(..), FileType(..) )
import Gargantext.API.Node.Corpus.New.Types ( FileType(..) )
import Gargantext.API.Node.Corpus.Searx ( triggerSearxSearch )
import Gargantext.API.Node.Corpus.Types ( Datafield(Web), datafield2origin )
import Gargantext.API.Node.Corpus.Update (addLanguageToCorpus)
......@@ -51,7 +51,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document ( ToHyperdataDocument(
import Gargantext.Database.Admin.Types.Hyperdata.File ( HyperdataFile(..) )
import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..), ParentId)
import Gargantext.Database.GargDB qualified as GargDB
import Gargantext.Database.Prelude (IsDBCmd)
import Gargantext.Database.Prelude (readLargeObject, IsDBCmd)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node (getNodeWith, getOrMkList)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
......@@ -214,28 +214,29 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
$(logLocM) ERROR $ "[addToCorpusWithQuery] error: " <> show err -- log the full error
markFailed (Just err) jobHandle
addToCorpusWithForm :: ( FlowCmdM env err m
, MonadJobStatus m
, HasNodeStoryImmediateSaver env
, HasNodeArchiveStoryImmediateSaver env
)
=> User
-> CorpusId
-> NewWithForm
-> JobHandle m
-> m ()
addToCorpusWithForm user cid nwf jobHandle = do
addToCorpusWithTempFile :: ( MonadMask m
, FlowCmdM env err m
, MonadJobStatus m
, HasNodeStoryImmediateSaver env
, HasNodeArchiveStoryImmediateSaver env
)
=> User
-> CorpusId
-> NewWithTempFile
-> JobHandle m
-> m ()
addToCorpusWithTempFile user cid nwtf jobHandle = do
-- printDebug "[addToCorpusWithForm] Parsing corpus: " cid
-- printDebug "[addToCorpusWithForm] fileType" ft
-- printDebug "[addToCorpusWithForm] fileFormat" ff
let l = nwf ^. wf_lang . non defaultLanguage
let l = nwtf ^. wtf_lang . non defaultLanguage
addLanguageToCorpus cid l
limit' <- view $ hasConfig . gc_jobs . jc_max_docs_parsers
let limit = fromIntegral limit' :: Integer
let
parseC = case (nwf ^. wf_filetype) of
parseC = case nwtf ^. wtf_filetype of
TSV -> Parser.parseFormatC Parser.TsvGargV3
TSV_HAL -> Parser.parseFormatC Parser.TsvHal
Iramuteq -> Parser.parseFormatC Parser.Iramuteq
......@@ -245,12 +246,11 @@ addToCorpusWithForm user cid nwf jobHandle = do
WOS -> Parser.parseFormatC Parser.WOS
-- TODO granularity of the logStatus
let data' = case (nwf ^. wf_fileformat) of
Plain -> cs (nwf ^. wf_data)
ZIP -> case BSB64.decode $ TE.encodeUtf8 (nwf ^. wf_data) of
Left err -> panicTrace $ T.pack "[addToCorpusWithForm] error decoding base64: " <> T.pack err
Right decoded -> decoded
eDocsC <- liftBase $ parseC (nwf ^. wf_fileformat) data'
let oId = PSQL.Oid $ fromIntegral $ nwtf ^. wtf_file_oid
data' <- readLargeObject oId
-- $(logLocM) DEBUG $ "[addToCorpusWithTempFile] size: " <> show size
-- $(logLocM) DEBUG $ "[addToCorpusWithTempFile] data': " <> TE.decodeUtf8 data'
eDocsC <- liftBase $ parseC (nwtf ^. wtf_fileformat) data'
case eDocsC of
Right (count, docsC) -> do
-- TODO Add progress (jobStatus) update for docs - this is a
......@@ -260,7 +260,7 @@ addToCorpusWithForm user cid nwf jobHandle = do
.| mapMC (\(idx, doc) ->
if idx > limit then do
--printDebug "[addToCorpusWithForm] number of docs exceeds the limit" (show limit)
let panicMsg' = [ "[addToCorpusWithForm] number of docs "
let panicMsg' = [ "[addToCorpusWithTempFile] number of docs "
, "exceeds the MAX_DOCS_PARSERS limit ("
, show limit
, ")" ]
......@@ -280,7 +280,7 @@ addToCorpusWithForm user cid nwf jobHandle = do
_cid' <- flowCorpus (MkCorpusUserNormalCorpusIds user [cid])
(Multi l)
(Just (nwf ^. wf_selection))
(Just (nwtf ^. wtf_selection))
--(Just $ fromIntegral $ length docs, docsC')
(count, transPipe liftBase docsC') -- TODO fix number of docs
--(map (map toHyperdataDocument) docs)
......@@ -295,7 +295,7 @@ addToCorpusWithForm user cid nwf jobHandle = do
markComplete jobHandle
Left parseErr -> do
$(logLocM) ERROR $ "[addToCorpusWithForm] parse error: " <> (Parser._ParseFormatError parseErr)
$(logLocM) ERROR $ "[addToCorpusWithTempFile] parse error: " <> Parser._ParseFormatError parseErr
markFailed (Just parseErr) jobHandle
{-
......
......@@ -11,7 +11,6 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-unused-matches #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE IncoherentInstances #-}
module Gargantext.API.Node.File where
......@@ -60,9 +59,7 @@ fileDownload nId = do
Contents c <- GargDB.readGargFile $ T.unpack path
let (mMime, _) = DMT.guessType DMT.defaultmtd False $ T.unpack name'
mime = case mMime of
Just m -> m
Nothing -> "text/plain"
mime = fromMaybe "text/plain" mMime
pure $ addHeader (T.pack mime) $ BSResponse c
......@@ -118,3 +115,4 @@ addWithFile authenticatedUser nId nwf@(NewWithFile _d _l fName) jobHandle = do
markComplete jobHandle
where
userId = authenticatedUser ^. auth_user_id
{-|
Module : Gargantext.API.Node.File.Types
Description :
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.API.Node.File.Types where
......@@ -10,27 +19,34 @@ import Gargantext.Prelude
import Network.HTTP.Media qualified as M
import Servant
data RESPONSE deriving Typeable
instance Accept RESPONSE where
contentType _ = "text" M.// "*"
instance MimeRender RESPONSE BSResponse where
mimeRender _ (BSResponse val) = BSL.fromStrict $ val
instance MimeUnrender RESPONSE BSResponse where
mimeUnrender _ lbs = Right $ BSResponse (BSL.toStrict lbs)
newtype Contents = Contents BS.ByteString
instance GargDB.ReadFile Contents where
readFile' fp = do
c <- BS.readFile fp
pure $ Contents c
instance ToSchema Contents where
declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
instance MimeUnrender OctetStream Contents where
mimeUnrender _ lbs = Right $ Contents (BSL.toStrict lbs)
newtype BSResponse = BSResponse BS.ByteString
deriving (Generic)
instance ToSchema BSResponse where
declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
-- | Temporary file, held in database, return it's OID
newtype DBTempFile = DBTempFile Int
deriving (Generic, ToJSON)
instance ToSchema DBTempFile where
declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
......@@ -14,16 +14,17 @@ Portability : POSIX
module Gargantext.API.Node.FrameCalcUpload where
import Control.Exception.Safe (MonadMask)
import Data.ByteString.Lazy qualified as BSL
import Data.ByteString.UTF8 qualified as BSU8
import Data.Text qualified as T
import Database.PostgreSQL.Simple.LargeObjects qualified as PSQL
import Gargantext.API.Admin.Auth.Types ( auth_node_id, AuthenticatedUser )
import Gargantext.API.Admin.EnvTypes (Env)
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Node.Corpus.New (addToCorpusWithForm)
import Gargantext.API.Node.Corpus.New (addToCorpusWithTempFile)
import Gargantext.API.Node.Corpus.New.Types (FileFormat(..), FileType(..))
import Gargantext.API.Node.FrameCalcUpload.Types
import Gargantext.API.Node.Types (NewWithForm(..))
import Gargantext.API.Node.Types (NewWithTempFile(..))
import Gargantext.API.Prelude ( GargM )
import Gargantext.API.Routes.Named.FrameCalc qualified as Named
import Gargantext.API.Worker (serveWorkerAPI)
......@@ -35,6 +36,7 @@ import Gargantext.Database.Action.Flow.Types ( FlowCmdM )
import Gargantext.Database.Admin.Types.Hyperdata.Frame ( HyperdataFrame(..) )
import Gargantext.Database.Admin.Types.Node ( NodeId, NodeType(NodeCorpus) )
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType, getNodeWith)
import Gargantext.Database.Prelude (createLargeObject)
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude
import Gargantext.Utils.Jobs.Monad (MonadJobStatus(..), markFailureNoErr)
......@@ -54,7 +56,8 @@ api authenticatedUser nId =
frameCalcUploadAsync :: ( HasConfig env
frameCalcUploadAsync :: ( MonadMask m
, HasConfig env
, FlowCmdM env err m
, MonadJobStatus m
, HasNodeArchiveStoryImmediateSaver env
......@@ -65,7 +68,7 @@ frameCalcUploadAsync :: ( HasConfig env
-> FrameCalcUpload
-> JobHandle m
-> m ()
frameCalcUploadAsync authenticatedUser nId (FrameCalcUpload _wf_lang _wf_selection) jobHandle = do
frameCalcUploadAsync authenticatedUser nId (FrameCalcUpload _wtf_lang _wtf_selection) jobHandle = do
markStarted 5 jobHandle
-- printDebug "[frameCalcUploadAsync] uId" uId
......@@ -82,7 +85,8 @@ frameCalcUploadAsync authenticatedUser nId (FrameCalcUpload _wf_lang _wf_selecti
manager <- newManager tlsManagerSettings
req <- parseRequest $ T.unpack csvUrl
httpLbs req manager
let body = T.pack $ BSU8.toString $ BSL.toStrict $ responseBody res
let body = BSL.toStrict $ responseBody res
PSQL.Oid oId <- createLargeObject body
-- printDebug "body" body
mCId <- getClosestParentIdByType nId NodeCorpus
......@@ -92,14 +96,14 @@ frameCalcUploadAsync authenticatedUser nId (FrameCalcUpload _wf_lang _wf_selecti
Nothing -> markFailureNoErr 1 jobHandle
Just cId ->
-- FIXME(adn) Audit this conversion.
addToCorpusWithForm (RootId userNodeId)
cId
(NewWithForm { _wf_filetype = TSV
, _wf_fileformat = Plain
, _wf_data = body
, _wf_lang
, _wf_name = "calc-upload.csv"
, _wf_selection }) jobHandle
addToCorpusWithTempFile (RootId userNodeId)
cId
(NewWithTempFile { _wtf_filetype = TSV
, _wtf_fileformat = Plain
, _wtf_file_oid = fromIntegral oId
, _wtf_lang
, _wtf_name = "calc-upload.csv"
, _wtf_selection }) jobHandle
markComplete jobHandle
where
......
......@@ -31,6 +31,9 @@ import Gargantext.Prelude
import Web.FormUrlEncoded (FromForm, ToForm)
-------------------------------------------------------
-- | A file is uploaded with this type. Then, for internal job
-- creation for the worker, 'NewWithTempFile' is used with a large
-- object oid
data NewWithForm = NewWithForm
{ _wf_filetype :: !FileType
, _wf_fileformat :: !FileFormat
......@@ -50,6 +53,26 @@ instance ToJSON NewWithForm where
instance ToSchema NewWithForm where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_")
-------------------------------------------------------
data NewWithTempFile = NewWithTempFile
{ _wtf_filetype :: !FileType
, _wtf_fileformat :: !FileFormat
, _wtf_file_oid :: !Int
, _wtf_lang :: !(Maybe Lang)
, _wtf_name :: !Text
, _wtf_selection :: !FlowSocialListWith
} deriving (Eq, Show, Generic)
makeLenses ''NewWithTempFile
instance FromForm NewWithTempFile
instance ToForm NewWithTempFile
instance FromJSON NewWithTempFile where
parseJSON = genericParseJSON $ jsonOptions "_wtf_"
instance ToJSON NewWithTempFile where
toJSON = genericToJSON $ jsonOptions "_wtf_"
instance ToSchema NewWithTempFile where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wtf_")
-------------------------------------------------------
data NewWithFile = NewWithFile
......
......@@ -11,21 +11,30 @@ Portability : POSIX
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UndecidableInstances #-}
module Gargantext.API.Routes
where
module Gargantext.API.Routes where
import Data.ByteString.Base64 qualified as BSB64
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Database.PostgreSQL.Simple.LargeObjects qualified as PSQL
import Gargantext.API.Admin.Auth.Types (AuthenticatedUser, auth_user_id)
import Gargantext.API.Admin.EnvTypes (Env)
import Gargantext.API.Errors.Types (BackendInternalError)
import Gargantext.API.Node.Corpus.New.Types ( FileFormat(..) )
import Gargantext.API.Node.Types (NewWithForm(..), NewWithTempFile(..))
import Gargantext.API.Prelude (GargServer, GargM)
import Gargantext.API.Routes.Named.Annuaire qualified as Named
import Gargantext.API.Routes.Named.Corpus qualified as Named
import Gargantext.API.Worker (serveWorkerAPI)
import Gargantext.API.Worker (serveWorkerAPI, serveWorkerAPIM)
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Worker.Jobs.Types qualified as Jobs
import Gargantext.Database.Prelude (createLargeObject)
import Gargantext.Prelude
import Gargantext.System.Logging ( logLocM, LogLevel(..) )
import Servant (Get, JSON)
import Servant.Server.Generic (AsServerT)
......@@ -63,19 +72,34 @@ addCorpusWithQuery user =
, Jobs._acq_cid = cId }
}
addCorpusWithForm :: User -> Named.AddWithForm (AsServerT (GargM Env BackendInternalError))
addCorpusWithForm user =
Named.AddWithForm {
addWithFormEp = \cId -> serveWorkerAPI $ \p ->
-- /NOTE(adinapoli)/ Track the initial steps outside 'addToCorpusWithForm', because it's
-- called in a few places, and the job status might be different between invocations.
-- markStarted 3 jHandle
-- New.addToCorpusWithForm user cid i jHandle
Jobs.AddCorpusFormAsync { Jobs._acf_args = p
, Jobs._acf_user = user
, Jobs._acf_cid = cId }
-- | Uses temporary file stored in postgres to add that file to a corpus
addWithTempFileApi :: AuthenticatedUser
-> Named.AddWithTempFile (AsServerT (GargM Env BackendInternalError))
addWithTempFileApi authenticatedUser =
Named.AddWithTempFile {
addWithTempFileEp = \cId ->
serveWorkerAPIM $ \(NewWithForm { .. }) -> do
let bs = case _wf_fileformat of
Plain -> cs _wf_data
ZIP -> case BSB64.decode $ TE.encodeUtf8 _wf_data of
Left err -> panicTrace $ T.pack "[addWithTempFileApi] error decoding base64: " <> T.pack err
Right decoded -> decoded
(PSQL.Oid oId) <- createLargeObject bs
$(logLocM) DEBUG $ "[addWithTempFileApi] oId': " <> show oId
let args = NewWithTempFile { _wtf_filetype = _wf_filetype
, _wtf_fileformat = _wf_fileformat
, _wtf_file_oid = fromIntegral oId
, _wtf_lang = _wf_lang
, _wtf_name = _wf_name
, _wtf_selection = _wf_selection }
pure $ Jobs.AddCorpusTempFileAsync { _actf_args = args
, _actf_cid = cId
, _actf_user = userId }
}
where
userId = UserDBId $ authenticatedUser ^. auth_user_id
addAnnuaireWithForm :: Named.AddAnnuaireWithForm (AsServerT (GargM Env BackendInternalError))
addAnnuaireWithForm =
Named.AddAnnuaireWithForm {
......
......@@ -14,7 +14,7 @@ Portability : POSIX
module Gargantext.API.Routes.Named.Corpus (
-- * Routes types
CorpusExportAPI(..)
, AddWithForm(..)
, AddWithTempFile(..)
, AddWithQuery(..)
, MakeSubcorpusAPI(..)
-- * Others
......@@ -47,14 +47,15 @@ data CorpusExportAPI mode = CorpusExportAPI
:> Get '[OctetStream] (Headers '[Servant.Header "Content-Disposition" Text] CorpusSQLite)
} deriving Generic
newtype AddWithForm mode = AddWithForm
{ addWithFormEp :: mode :- Summary "Add with FormUrlEncoded to corpus endpoint"
:> "corpus"
:> Capture "corpus_id" CorpusId
:> "add"
:> "form"
:> "async"
:> NamedRoutes (WorkerAPI '[FormUrlEncoded] NewWithForm)
data AddWithTempFile mode = AddWithTempFile
{ addWithTempFileEp :: mode :- Summary "Add with form via temp file"
:> "corpus"
:> Capture "corpus_id" CorpusId
:> "add"
:> "form"
:> "async"
:> NamedRoutes (WorkerAPI '[FormUrlEncoded] NewWithForm)
} deriving Generic
newtype AddWithQuery mode = AddWithQuery
......
{-|
Module : Gargantext.API.Routes.Named.File
Description :
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Routes.Named.File (
......@@ -6,6 +16,7 @@ module Gargantext.API.Routes.Named.File (
, FileAsyncAPI(..)
) where
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.API.Node.File.Types (BSResponse, RESPONSE)
......@@ -13,6 +24,7 @@ import Gargantext.API.Node.Types (NewWithFile)
import Gargantext.API.Worker (WorkerAPI)
import Servant
data FileAPI mode = FileAPI
{ fileDownloadEp :: mode :- Summary "File download"
:> "download"
......@@ -26,4 +38,3 @@ data FileAsyncAPI mode = FileAsyncAPI
:> "add"
:> NamedRoutes (WorkerAPI '[FormUrlEncoded] NewWithFile)
} deriving Generic
{-|
Module : Gargantext.API.Routes.Named.Node
Description :
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Routes.Named.Node (
......@@ -84,7 +94,7 @@ data NodeAPI a mode = NodeAPI
, searchAPI :: mode :- "search" :> NamedRoutes (SearchAPI SearchResult)
, shareAPI :: mode :- "share" :> NamedRoutes ShareNode
, unshareEp :: mode :- "unshare" :> NamedRoutes UnshareNode
, publishAPI :: mode :- "publish" :> (PolicyChecked (NamedRoutes PublishAPI))
, publishAPI :: mode :- "publish" :> PolicyChecked (NamedRoutes PublishAPI)
---- Pairing utilities
, pairWithEp :: mode :- "pairwith" :> NamedRoutes PairWith
, pairsEp :: mode :- "pairs" :> NamedRoutes Pairs
......
......@@ -8,9 +8,8 @@ Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Routes.Named.Private (
-- * Routes types
......@@ -26,13 +25,13 @@ module Gargantext.API.Routes.Named.Private (
import Data.Text (Text)
import Gargantext.API.Admin.Auth.Types
import Gargantext.API.Auth.PolicyCheck
import Gargantext.API.Routes.Named.Contact
import Gargantext.API.Routes.Named.Context
import Gargantext.API.Routes.Named.Corpus
import Gargantext.API.Routes.Named.Count
import Gargantext.API.Routes.Named.Document
import Gargantext.API.Admin.Auth.Types (AuthenticatedUser)
import Gargantext.API.Auth.PolicyCheck (PolicyChecked)
import Gargantext.API.Routes.Named.Contact (ContactAPI)
import Gargantext.API.Routes.Named.Context (ContextAPI)
import Gargantext.API.Routes.Named.Corpus (AddWithTempFile, AddWithQuery, CorpusExportAPI, MakeSubcorpusAPI)
import Gargantext.API.Routes.Named.Count (CountAPI, Query)
import Gargantext.API.Routes.Named.Document (DocumentExportAPI)
import Gargantext.API.Routes.Named.List (GETAPI, JSONAPI, TSVAPI)
import Gargantext.API.Routes.Named.Node
import Gargantext.API.Routes.Named.Remote
......@@ -95,7 +94,7 @@ data GargPrivateAPI' mode = GargPrivateAPI'
:> Capture "tree_id" NodeId
:> NamedRoutes TreeFlatAPI
, membersAPI :: mode :- "members" :> Summary "Team node members" :> NamedRoutes MembersAPI
, addWithFormAPI :: mode :- NamedRoutes AddWithForm
, addWithTempFile :: mode :- NamedRoutes AddWithTempFile
, addWithQueryEp :: mode :- NamedRoutes AddWithQuery
, makeSubcorpusAPI :: mode :- NamedRoutes MakeSubcorpusAPI
, listGetAPI :: mode :- NamedRoutes GETAPI
......
......@@ -18,7 +18,7 @@ import Gargantext.API.Node.Phylo.Export qualified as PhyloExport
import Gargantext.API.Node qualified as Tree
import Gargantext.API.Node.ShareURL ( shareURL )
import Gargantext.API.Prelude (GargM)
import Gargantext.API.Routes (addCorpusWithForm, addCorpusWithQuery)
import Gargantext.API.Routes (addWithTempFileApi, addCorpusWithQuery)
import Gargantext.API.Routes.Named.Private qualified as Named
import Gargantext.API.Server.Named.Ngrams
import Gargantext.API.Server.Named.Viz qualified as Viz
......@@ -43,27 +43,27 @@ serverPrivateGargAPI'
:: AuthenticatedUser -> Named.GargPrivateAPI' (AsServerT (GargM Env BackendInternalError))
serverPrivateGargAPI' authenticatedUser@(AuthenticatedUser userNodeId userId)
= Named.GargPrivateAPI'
{ gargAdminAPI = serverGargAdminAPI
, nodeEp = nodeAPI authenticatedUser
, contextEp = contextAPI (Proxy :: Proxy HyperdataAny) authenticatedUser
, corpusNodeAPI = corpusNodeAPI authenticatedUser
, corpusNodeNodeAPI = nodeNodeAPI (Proxy :: Proxy HyperdataAny) authenticatedUser
, corpusExportAPI = CorpusExport.getCorpus
, annuaireEp = annuaireNodeAPI authenticatedUser
, contactAPI = contactAPI authenticatedUser
, tableNgramsAPI = apiNgramsTableDoc authenticatedUser
, phyloExportAPI = PhyloExport.api userNodeId
, documentExportAPI = documentExportAPI userNodeId
, countAPI = Count.countAPI
, graphAPI = Viz.graphAPI authenticatedUser userId
, treeAPI = Tree.treeAPI authenticatedUser
, treeFlatAPI = Tree.treeFlatAPI authenticatedUser
, membersAPI = members
, addWithFormAPI = addCorpusWithForm (RootId userNodeId)
, addWithQueryEp = addCorpusWithQuery (RootId userNodeId)
, makeSubcorpusAPI = Subcorpus.makeSubcorpus userId
, listGetAPI = List.getAPI
, listJsonAPI = List.jsonAPI
, listTsvAPI = List.tsvAPI
, shareUrlAPI = shareURL
{ gargAdminAPI = serverGargAdminAPI
, nodeEp = nodeAPI authenticatedUser
, contextEp = contextAPI (Proxy :: Proxy HyperdataAny) authenticatedUser
, corpusNodeAPI = corpusNodeAPI authenticatedUser
, corpusNodeNodeAPI = nodeNodeAPI (Proxy :: Proxy HyperdataAny) authenticatedUser
, corpusExportAPI = CorpusExport.getCorpus
, annuaireEp = annuaireNodeAPI authenticatedUser
, contactAPI = contactAPI authenticatedUser
, tableNgramsAPI = apiNgramsTableDoc authenticatedUser
, phyloExportAPI = PhyloExport.api userNodeId
, documentExportAPI = documentExportAPI userNodeId
, countAPI = Count.countAPI
, graphAPI = Viz.graphAPI authenticatedUser userId
, treeAPI = Tree.treeAPI authenticatedUser
, treeFlatAPI = Tree.treeFlatAPI authenticatedUser
, membersAPI = members
, addWithTempFile = addWithTempFileApi authenticatedUser
, addWithQueryEp = addCorpusWithQuery (RootId userNodeId)
, makeSubcorpusAPI = Subcorpus.makeSubcorpus userId
, listGetAPI = List.getAPI
, listJsonAPI = List.jsonAPI
, listTsvAPI = List.tsvAPI
, shareUrlAPI = shareURL
}
......@@ -59,4 +59,3 @@ serveWorkerAPIEJob f = WorkerAPI { workerAPIPost }
mId <- sendJob job
pure $ JobInfo { _ji_message_id = mId
, _ji_mNode_id = getWorkerMNodeId job }
......@@ -20,7 +20,7 @@ import Data.Bimap (Bimap)
import Data.Bimap qualified as Bimap
import Data.LanguageCodes qualified as ISO639
import Data.Morpheus.Types (GQLType)
import Data.Swagger (ToSchema(..), defaultSchemaOptions, genericDeclareNamedSchemaUnrestricted)
import Data.Swagger (ToParamSchema, ToSchema(..), defaultSchemaOptions, genericDeclareNamedSchemaUnrestricted)
import Data.Text (pack)
import Gargantext.Prelude hiding (All)
import Prelude (userError)
......@@ -70,6 +70,7 @@ defaultLanguage = EN
instance ToJSON Lang
instance FromJSON Lang
instance ToParamSchema Lang
instance ToSchema Lang where
declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
instance FromHttpApiData Lang
......
......@@ -312,7 +312,8 @@ getMultipleLinefile bl del headers res x = do
then checkNextLine bl del headers res x
else
if (length tmp > length headers) || (V.length bl == (x + 1))
then Left (pack $ "Cannot parse the file at line " <> show x <> ". Maybe because of a delimiter")
then
Left (pack $ "Cannot parse the file at line " <> show x <> ". Maybe because of a delimiter")
else do
case BL.append res <$> ((V.!?) bl (x+1)) of
Nothing -> Left "getMultipleLinefile"
......@@ -591,7 +592,7 @@ parseTsv' bs = (V.toList . V.map tsv2doc . snd) <$> readTsvLazyBS Comma bs
parseTsv' :: BL.ByteString -> Either Text [HyperdataDocument]
parseTsv' bs = do
let
result = case (testCorrectFile bs) of
result = case testCorrectFile bs of
Left _err -> Left _err
Right del -> readTsvLazyBS del bs
V.toList . V.map tsv2doc . snd <$> result
......@@ -601,7 +602,7 @@ parseTsvC :: BL.ByteString
parseTsvC bs =
(\(_h, rs) -> (fromIntegral $ V.length rs, yieldMany rs .| mapC tsv2doc)) <$> eResult
where
eResult = case (testCorrectFile bs) of
eResult = case testCorrectFile bs of
Left _err -> Left _err
Right del -> readTsvLazyBS del bs
......
......@@ -21,19 +21,24 @@ module Gargantext.Core.Worker where
import Async.Worker.Broker.Types (toA, getMessage, messageId)
import Async.Worker qualified as W
import Async.Worker.Types qualified as W
import Control.Exception.Safe qualified as CES
import Control.Lens (to)
import Data.Aeson qualified as Aeson
import Data.ByteString.Lazy qualified as BSL
import Data.Text qualified as T
import Database.PostgreSQL.Simple.LargeObjects qualified as PSQL
import Gargantext.API.Admin.Auth (forgotUserPassword)
import Gargantext.API.Admin.Auth.Types (ForgotPasswordAsyncParams(..))
import Gargantext.API.Ngrams.List (postAsyncJSON)
import Gargantext.API.Node.Contact (addContact)
import Gargantext.API.Node.Corpus.Annuaire qualified as Annuaire
import Gargantext.API.Node.Corpus.New (addToCorpusWithForm, addToCorpusWithQuery)
import Gargantext.API.Node.Corpus.New (addToCorpusWithTempFile, addToCorpusWithQuery)
import Gargantext.API.Node.DocumentsFromWriteNodes (documentsFromWriteNodes)
import Gargantext.API.Node.DocumentUpload (documentUploadAsync, remoteImportDocuments)
import Gargantext.API.Node.File (addWithFile)
import Gargantext.API.Node.FrameCalcUpload (frameCalcUploadAsync)
import Gargantext.API.Node.New (postNode')
import Gargantext.API.Node.Types (_wtf_file_oid)
import Gargantext.API.Node.Update.Types (UpdateNodeParams(..), Granularity (..))
import Gargantext.API.Node.Update (updateNode)
import Gargantext.API.Server.Named.Ngrams (tableNgramsPostChartsAsync)
......@@ -48,6 +53,7 @@ import Gargantext.Core.Worker.Env
import Gargantext.Core.Worker.Jobs.Types (Job(..), getWorkerMNodeId, ImportRemoteDocumentsPayload(..), ImportRemoteTermsPayload(..))
import Gargantext.Core.Worker.PGMQTypes (BrokerMessage, HasWorkerBroker, WState)
import Gargantext.Core.Worker.Types (JobInfo(..))
import Gargantext.Database.Prelude (readLargeObject, removeLargeObject)
import Gargantext.Database.Query.Table.User (getUsersWithEmail)
import Gargantext.Prelude hiding (to)
import Gargantext.System.Logging ( logLocM, LogLevel(..), logMsg, withLogger )
......@@ -223,12 +229,14 @@ performAction env _state bm = do
AddContact { .. } -> runWorkerMonad env $ do
$(logLocM) DEBUG $ "[performAction] add contact"
addContact _ac_user _ac_node_id _ac_args jh
-- | Send a file with documents and index them in corpus
AddCorpusFormAsync { .. } -> runWorkerMonad env $ do
$(logLocM) DEBUG $ "[performAction] add corpus form"
addToCorpusWithForm _acf_user _acf_cid _acf_args jh
-- | Uses temporary file to add documents into corpus
AddCorpusTempFileAsync { .. } -> runWorkerMonad env $ do
-- TODO CES.filnally
$(logLocM) DEBUG "[performAction] add to corpus with temporary file"
CES.finally (addToCorpusWithTempFile _actf_user _actf_cid _actf_args jh)
(removeLargeObject $ _wtf_file_oid _actf_args)
-- | Perform external API search query and index documents in corpus
AddCorpusWithQuery { .. } -> runWorkerMonad env $ do
$(logLocM) DEBUG "[performAction] add corpus with query"
......@@ -266,7 +274,12 @@ performAction env _state bm = do
-- | Process uploaded JSON file
JSONPost { .. } -> runWorkerMonad env $ do
$(logLocM) DEBUG $ "[performAction] json post"
void $ postAsyncJSON _jp_list_id _jp_ngrams_list jh
CES.finally (do
_jp_ngrams_list' <- readLargeObject (PSQL.Oid $ fromIntegral _jp_ngrams_oid)
case Aeson.eitherDecode (BSL.fromStrict _jp_ngrams_list') of
Left err -> CES.throwString err
Right _jp_ngrams_list -> void $ postAsyncJSON _jp_list_id _jp_ngrams_list jh)
(removeLargeObject _jp_ngrams_oid)
-- | Task for updating metrics charts
NgramsPostCharts { .. } -> runWorkerMonad env $ do
......
......@@ -19,6 +19,7 @@ module Gargantext.Core.Worker.Env where
import Control.Concurrent.STM.TVar (TVar, modifyTVar, newTVarIO, readTVarIO)
import Control.Exception.Safe qualified as CES
import Control.Lens (prism', to, view)
import Control.Lens.TH
import Control.Monad.Trans.Control (MonadBaseControl)
......@@ -171,7 +172,10 @@ newtype WorkerMonad a =
, MonadBase IO
, MonadBaseControl IO
, MonadError IOException
, MonadFail )
, MonadFail
, CES.MonadThrow
, CES.MonadCatch
, CES.MonadMask )
instance HasLogger WorkerMonad where
newtype instance Logger WorkerMonad =
......
......@@ -50,7 +50,6 @@ sendJobWithCfg gcConfig job = do
-- | We want to fine-tune job metadata parameters, for each job type
updateJobData :: Job -> SendJob -> SendJob
updateJobData (AddCorpusFormAsync {}) sj = sj { W.timeout = 3000 }
updateJobData (AddCorpusWithQuery {}) sj = sj { W.timeout = 3000 }
updateJobData (AddToAnnuaireWithForm {}) sj = sj { W.timeout = 3000 }
updateJobData (AddWithFile {}) sj = sj { W.timeout = 3000 }
......
......@@ -28,7 +28,7 @@ import Gargantext.API.Node.DocumentsFromWriteNodes.Types qualified as DFWN
import Gargantext.API.Node.DocumentUpload.Types (DocumentUpload)
import Gargantext.API.Node.FrameCalcUpload.Types (FrameCalcUpload)
import Gargantext.API.Node.New.Types ( PostNode(..) )
import Gargantext.API.Node.Types (NewWithFile, NewWithForm, WithQuery(..))
import Gargantext.API.Node.Types (NewWithFile, NewWithTempFile, WithQuery(..))
import Gargantext.API.Node.Update.Types (UpdateNodeParams)
import Gargantext.Core.Types.Individu (User)
import Gargantext.Database.Admin.Types.Node (AnnuaireId, CorpusId, ListId, NodeId(UnsafeMkNodeId), ParentId)
......@@ -101,9 +101,9 @@ data Job =
| AddContact { _ac_args :: AddContactParams
, _ac_node_id :: NodeId
, _ac_user :: User }
| AddCorpusFormAsync { _acf_args :: NewWithForm
, _acf_user :: User
, _acf_cid :: CorpusId }
| AddCorpusTempFileAsync { _actf_args :: NewWithTempFile
, _actf_user :: User
, _actf_cid :: CorpusId }
| AddCorpusWithQuery { _acq_args :: WithQuery
, _acq_user :: User
, _acq_cid :: CorpusId }
......@@ -120,7 +120,8 @@ data Job =
, _fca_authenticatedUser :: AuthenticatedUser
, _fca_node_id :: NodeId }
| JSONPost { _jp_list_id :: ListId
, _jp_ngrams_list :: NgramsList }
, _jp_ngrams_oid :: Int }
-- , _jp_ngrams_list :: NgramsList }
| NgramsPostCharts { _npc_node_id :: NodeId
, _npc_args :: UpdateTableNgramsCharts }
| PostNodeAsync { _pna_node_id :: NodeId
......@@ -144,11 +145,11 @@ instance FromJSON Job where
_ac_node_id <- o .: "node_id"
_ac_user <- o .: "user"
return $ AddContact { .. }
"AddCorpusFormAsync" -> do
_acf_args <- o .: "args"
_acf_user <- o .: "user"
_acf_cid <- o .: "cid"
return $ AddCorpusFormAsync { .. }
"AddCorpusTempFileAsync" -> do
_actf_args <- o .: "args"
_actf_user <- o .: "user"
_actf_cid <- o .: "cid"
return $ AddCorpusTempFileAsync { .. }
"AddCorpusWithQuery" -> do
_acq_args <- o .: "args"
_acq_user <- o .: "user"
......@@ -178,7 +179,8 @@ instance FromJSON Job where
return $ FrameCalcUpload { .. }
"JSONPost" -> do
_jp_list_id <- o .: "list_id"
_jp_ngrams_list <- o .: "ngrams_list"
-- _jp_ngrams_list <- o .: "ngrams_list"
_jp_ngrams_oid <- o .: "ngrams_oid"
return $ JSONPost { .. }
"NgramsPostCharts" -> do
_npc_node_id <- o .: "node_id"
......@@ -212,11 +214,11 @@ instance ToJSON Job where
, "args" .= _ac_args
, "user" .= _ac_user
, "node_id" .= _ac_node_id ]
toJSON (AddCorpusFormAsync { .. }) =
object [ "type" .= ("AddCorpusFormAsync" :: Text)
, "args" .= _acf_args
, "user" .= _acf_user
, "cid" .= _acf_cid ]
toJSON (AddCorpusTempFileAsync { .. }) =
object [ "type" .= ("AddCorpusTempFileAsync" :: Text)
, "args" .= _actf_args
, "user" .= _actf_user
, "cid" .= _actf_cid ]
toJSON (AddCorpusWithQuery { .. }) =
object [ "type" .= ("AddCorpusWithQuery" :: Text)
, "args" .= _acq_args
......@@ -247,7 +249,8 @@ instance ToJSON Job where
toJSON (JSONPost { .. }) =
object [ "type" .= ("JSONPost" :: Text)
, "list_id" .= _jp_list_id
, "ngrams_list" .= _jp_ngrams_list ]
, "ngrams_oid" .= _jp_ngrams_oid ]
-- , "ngrams_list" .= _jp_ngrams_list ]
toJSON (NgramsPostCharts { .. }) =
object [ "type" .= ("NgramsPostCharts" :: Text)
, "node_id" .= _npc_node_id
......@@ -290,7 +293,7 @@ instance ToJSON Job where
getWorkerMNodeId :: Job -> Maybe NodeId
getWorkerMNodeId Ping = Nothing
getWorkerMNodeId (AddContact { _ac_node_id }) = Just _ac_node_id
getWorkerMNodeId (AddCorpusFormAsync { _acf_args, _acf_cid }) = Just _acf_cid
getWorkerMNodeId (AddCorpusTempFileAsync { _actf_cid }) = Just _actf_cid
getWorkerMNodeId (AddCorpusWithQuery { _acq_args = WithQuery { _wq_node_id }}) = Just $ UnsafeMkNodeId _wq_node_id
getWorkerMNodeId (AddToAnnuaireWithForm { _aawf_annuaire_id }) = Just _aawf_annuaire_id
getWorkerMNodeId (AddWithFile { _awf_node_id }) = Just _awf_node_id
......
......@@ -418,7 +418,6 @@ insertMasterDocs c lang hs = do
-- add documents to the corpus (create node_node link)
-- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs' :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
<- mapNodeIdNgrams
<$> documentIdWithNgrams
......@@ -489,4 +488,3 @@ reIndexWith cId lId nt lts = do
-- Saving the indexation in database
mapM_ (saveDocNgramsWith lId . ngramsByDoc corpusLang nt ts) docs
......@@ -9,6 +9,7 @@ Portability : POSIX
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ScopedTypeVariables #-}
......@@ -51,15 +52,20 @@ module Gargantext.Database.Prelude
, fromField'
, mkCmd
, restrictMaybe
, createLargeObject
, readLargeObject
, readLargeObjectViaTempFile
, removeLargeObject
)
where
import Control.Exception.Safe (throw)
import Control.Exception.Safe qualified as CES
import Control.Lens (Getter, view)
import Control.Monad.Random ( MonadRandom )
import Control.Monad.Trans.Control (MonadBaseControl)
import Data.Aeson (Result(..))
import Data.ByteString qualified as DB
import Data.ByteString qualified as BS
import Data.ByteString.Lazy qualified as BSL
import Data.List qualified as DL
import Data.Pool (Pool, withResource)
import Data.Profunctor.Product.Default (Default)
......@@ -67,6 +73,7 @@ import Database.PostgreSQL.Simple (Connection)
import Database.PostgreSQL.Simple qualified as PGS
import Database.PostgreSQL.Simple.FromField ( Conversion, ResultError(ConversionFailed), fromField, returnError)
import Database.PostgreSQL.Simple.Internal (Field)
import Database.PostgreSQL.Simple.LargeObjects qualified as PSQL
import Database.PostgreSQL.Simple.Types (Query(..))
import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET
import Gargantext.Core.Config (HasConfig(..))
......@@ -78,6 +85,8 @@ import Opaleye.Aggregate (countRows)
import Opaleye.Internal.Constant qualified
import Opaleye.Internal.Operators qualified
import Shelly qualified as SH
import System.Directory (removeFile)
import System.IO.Temp (emptySystemTempFile)
-- $typesAndConstraints
......@@ -199,7 +208,7 @@ runCountOpaQuery q = do
-- countRows is guaranteed to return a list with exactly one row so DL.head is safe here
pure $ fromInt64ToInt $ DL.head counts
formatPGSQuery :: PGS.ToRow a => PGS.Query -> a -> DBCmd err DB.ByteString
formatPGSQuery :: PGS.ToRow a => PGS.Query -> a -> DBCmd err BS.ByteString
formatPGSQuery q a = mkCmd $ \conn -> PGS.formatQuery conn q a
runPGSQuery :: ( PGS.FromRow r, PGS.ToRow q )
......@@ -209,7 +218,7 @@ runPGSQuery q a = mkCmd $ \conn -> catch (PGS.query conn q a) (printError conn)
printError c (SomeException e) = do
q' <- PGS.formatQuery c q a
hPutStrLn stderr q'
throw (SomeException e)
CES.throw (SomeException e)
-- | TODO catch error
runPGSQuery_ :: ( PGS.FromRow r )
......@@ -218,13 +227,13 @@ runPGSQuery_ q = mkCmd $ \conn -> catch (PGS.query_ conn q) printError
where
printError (SomeException e) = do
hPutStrLn stderr (fromQuery q)
throw (SomeException e)
CES.throw (SomeException e)
execPGSQuery :: PGS.ToRow a => PGS.Query -> a -> DBCmd err Int64
execPGSQuery q a = mkCmd $ \conn -> PGS.execute conn q a
fromField' :: (Typeable b, FromJSON b) => Field -> Maybe DB.ByteString -> Conversion b
fromField' :: (Typeable b, FromJSON b) => Field -> Maybe BS.ByteString -> Conversion b
fromField' field mb = do
v <- fromField field mb
valueToHyperdata v
......@@ -263,3 +272,60 @@ createDBIfNotExists connStr dbName = do
(result,) <$> SH.lastExitCode
return ()
------------------------------
-- PostgreSQL Large Object functionality
-- https://www.postgresql.org/docs/17/largeobjects.html
-- NOTE: During development of this feature, I had problems (in tests)
-- with a hanging transaction. After debugging, it turned out this
-- was, for some reason, conflicting with our `logLocM` (though I'm no
-- sure why). Please be careful when adding debug info to large
-- objects and if you do, make sure the tests run.
createLargeObject :: BS.ByteString -> DBCmd err PSQL.Oid
createLargeObject bs = mkCmd $ \c -> PGS.withTransaction c $ do
oId <- PSQL.loCreat c
loFd <- PSQL.loOpen c oId PSQL.WriteMode
_ <- PSQL.loWrite c loFd bs
PSQL.loClose c loFd
pure oId
-- | Read a large object directly, given an oid. We read it in a
-- single transaction, looping by given chunk size
readLargeObject :: PSQL.Oid -> DBCmd err BS.ByteString
readLargeObject oId = mkCmd $ \c -> PGS.withTransaction c $ do
loFd <- PSQL.loOpen c oId PSQL.ReadMode
let chunkSize = 1024
let readChunks tell = do
c' <- PSQL.loRead c loFd chunkSize
tell' <- PSQL.loTell c loFd
if tell == tell' then
pure ([c'], tell)
else do
(cs', tell'') <- readChunks tell'
pure (c':cs', tell'')
(chunks, _size) <- readChunks 0
let s = force BSL.toStrict $ BSL.fromChunks chunks
PSQL.loClose c loFd
pure s
-- | Read large object by exporting it to a temporary file, then
-- reading that file. The difference from 'readLargeObject' is that we
-- have only 1 call inside a transaction
readLargeObjectViaTempFile :: (CES.MonadMask m, IsDBCmd env err m)
=> PSQL.Oid -> m BS.ByteString
readLargeObjectViaTempFile oId = do
CES.bracket (liftBase $ emptySystemTempFile "large-object")
(liftBase . removeFile)
(\fp -> do
mkCmd $ \c -> withTransaction c $ \_ -> PSQL.loExport c oId fp
!contents <- liftBase $ BS.readFile fp
pure contents)
where
withTransaction c = CES.bracket (PGS.begin c) (\_ -> PGS.rollback c)
removeLargeObject :: Int -> DBCmd err ()
removeLargeObject oId = mkCmd $ \c -> do
PSQL.loUnlink c $ PSQL.Oid $ fromIntegral oId
......@@ -367,7 +367,7 @@ flags:
gargantext:
"enable-benchmarks": false
"no-phylo-debug-logs": true
"test-crypto": true
"test-crypto": false
graphviz:
"test-parsing": false
hashable:
......
......@@ -28,9 +28,8 @@ module Test.API.UpdateList (
import Control.Lens (mapped, over)
import Control.Monad.Fail (fail)
import Data.Aeson.QQ
import Data.Aeson qualified as JSON
import Data.ByteString.Lazy qualified as BSL
import Data.Aeson.QQ
import Data.Map.Strict.Patch qualified as PM
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
......@@ -47,7 +46,7 @@ import Gargantext.API.Ngrams.Types
import Gargantext.API.Node.Corpus.New.Types qualified as FType
import Gargantext.API.Node.Types
import Gargantext.API.Routes.Named
import Gargantext.API.Routes.Named.Corpus
import Gargantext.API.Routes.Named.Corpus (addWithTempFileEp)
import Gargantext.API.Routes.Named.Node
import Gargantext.API.Routes.Named.Private
import Gargantext.API.Worker (workerAPIPost)
......@@ -90,12 +89,12 @@ uploadJSONList log_cfg port token cId pathToNgrams clientEnv = do
([listId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> build cId)) [aesonQQ|{"pn_typename":"NodeList","pn_name":"Testing"}|]
-- Upload the JSON doc
simpleNgrams' <- liftIO (TIO.readFile =<< getDataFileName pathToNgrams)
let (Just simpleNgrams) = JSON.decode $ BSL.fromStrict $ encodeUtf8 simpleNgrams'
-- let (Just simpleNgrams) = JSON.decode $ BSL.fromStrict $ encodeUtf8 simpleNgrams'
-- let jsonFileFormData = [ (T.pack "_wjf_data", simpleNgrams)
-- , ("_wjf_filetype", "JSON")
-- , ("_wjf_name", "simple_ngrams.json")
-- ]
let params = WithJsonFile { _wjf_data = simpleNgrams
let params = WithJsonFile { _wjf_data = simpleNgrams'
, _wjf_name = "simple_ngrams.json" }
-- let url = "/lists/" +|listId|+ "/add/form/async"
-- let mkPollUrl j = "/corpus/" +|listId|+ "/add/form/async/" +|_jph_id j|+ "/poll?limit=1"
......@@ -402,8 +401,8 @@ add_file_async (toServantToken -> token) corpusId nwf =
& gargPrivateAPI
& mkPrivateAPI
& ($ token)
& addWithFormAPI
& addWithFormEp
& addWithTempFile
& addWithTempFileEp
& ($ corpusId)
& workerAPIPost
& (\submitForm -> submitForm nwf)
......
......@@ -39,7 +39,7 @@ import Gargantext.API.Node.Get (GetNodeParams)
import Gargantext.API.Node.New.Types (PostNode(..))
import Gargantext.API.Node.Share.Types (ShareNodeParams(..))
import Gargantext.API.Node.Update.Types qualified as NU
import Gargantext.API.Node.Types (NewWithForm, RenameNode(..), WithQuery)
import Gargantext.API.Node.Types (NewWithForm, NewWithTempFile(..), RenameNode(..), WithQuery)
import Gargantext.API.Public.Types (PublicData(..))
import Gargantext.API.Routes.Named.Publish (PublishRequest(..))
import Gargantext.API.Search.Types (SearchQuery(..), SearchResult(..), SearchResultTypes(..), SearchType(..))
......@@ -572,6 +572,14 @@ genFrontendErr be = do
pure $ Errors.mkFrontendErr' txt $ Errors.FE_job_generic_exception err
instance Arbitrary NewWithTempFile where
arbitrary = NewWithTempFile <$> arbitrary -- _wtf_filetype
<*> arbitrary -- _wtf_fileformat
<*> arbitrary -- _wtf_file_oid
<*> arbitrary -- _wtf_lang
<*> arbitrary -- _wtf_name
<*> arbitrary -- _wtf_selection
instance Arbitrary Job where
arbitrary = oneof [ pure Ping
......@@ -591,7 +599,7 @@ instance Arbitrary Job where
, uploadDocumentGen ]
where
addContactGen = AddContact <$> arbitrary <*> arbitrary <*> arbitrary
addCorpusFormAsyncGen = AddCorpusFormAsync <$> arbitrary <*> arbitrary <*> arbitrary
addCorpusFormAsyncGen = AddCorpusTempFileAsync <$> arbitrary <*> arbitrary <*> arbitrary
addCorpusWithQueryGen = AddCorpusWithQuery <$> arbitrary <*> arbitrary <*> arbitrary
-- addWithFileGen = AddWithFile <$> arbitrary <*> arbitrary <*> arbitrary
addToAnnuaireWithFormGen = AddToAnnuaireWithForm <$> arbitrary <*> arbitrary
......
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
{-# LANGUAGE TypeApplications #-}
module Test.Ngrams.Terms (tests) where
import Data.HashMap.Strict qualified as HashMap
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment