Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
191
Issues
191
List
Board
Labels
Milestones
Merge Requests
8
Merge Requests
8
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
e7ce7bcb
Commit
e7ce7bcb
authored
Sep 29, 2025
by
Fabien Maniere
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev-threaded-flow-poc' into 'dev'
Concurrent queries in NLP See merge request
!451
parents
3e57a2fe
d90467da
Pipeline
#7937
passed with stages
in 59 minutes and 28 seconds
Changes
8
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
43 additions
and
26 deletions
+43
-26
Ini.hs
bin/gargantext-cli/CLI/Ini.hs
+2
-1
gargantext-settings.toml_toModify
gargantext-settings.toml_toModify
+4
-0
gargantext.cabal
gargantext.cabal
+1
-0
NLP.hs
src/Gargantext/Core/Config/Ini/NLP.hs
+0
-8
NLP.hs
src/Gargantext/Core/Config/NLP.hs
+2
-3
Worker.hs
src/Gargantext/Core/Config/Worker.hs
+7
-2
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+23
-12
test_config.toml
test-data/test_config.toml
+4
-0
No files found.
bin/gargantext-cli/CLI/Ini.hs
View file @
e7ce7bcb
...
...
@@ -88,7 +88,8 @@ convertConfigs ini@(Ini.GargConfig { .. }) iniMail nlpConfig connInfo =
,
_wsLongJobTimeout
=
3000
,
_wsDefaultDelay
=
0
,
_wsAdditionalDelayAfterRead
=
5
,
_wsDatabase
=
connInfo
{
PGS
.
connectDatabase
=
"pgmq"
}
}
,
_wsDatabase
=
connInfo
{
PGS
.
connectDatabase
=
"pgmq"
}
,
_wsNlpConduitChunkSize
=
10
}
,
_gc_logging
=
Config
.
LogConfig
{
_lc_log_level
=
INFO
,
_lc_log_file
=
Nothing
...
...
gargantext-settings.toml_toModify
View file @
e7ce7bcb
...
...
@@ -166,6 +166,10 @@ default_job_timeout = 60
# default timeout for "long" jobs (in seconds)
long_job_timeout = 3000
# Batch size when sending data to NLP.
# Preferably, set as much as the number of CPUs
nlp_conduit_chunk_size = 10
# if you leave the same credentials as in [database] section above,
# workers will try to set up the `gargantext_pgmq` database
# automatically
...
...
gargantext.cabal
View file @
e7ce7bcb
...
...
@@ -570,6 +570,7 @@ library
, json-stream ^>= 0.4.2.4
, lens >= 5.2.2 && < 5.3
, lens-aeson < 1.3
, lifted-async >= 0.10 && < 0.12
, list-zipper
, massiv < 1.1
, matrix ^>= 0.3.6.1
...
...
src/Gargantext/Core/Config/Ini/NLP.hs
View file @
e7ce7bcb
...
...
@@ -9,19 +9,12 @@ Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Core.Config.Ini.NLP
(
-- * Types
NLPConfig
(
..
)
-- * Utility functions
,
readConfig
-- * Lenses
,
nlp_default
,
nlp_languages
)
where
...
...
@@ -59,4 +52,3 @@ readConfig fp = do
,
T
.
pack
$
show
m_nlp_other
]
Just
ret
->
pure
ret
makeLenses
''
N
LPConfig
src/Gargantext/Core/Config/NLP.hs
View file @
e7ce7bcb
...
...
@@ -19,7 +19,6 @@ module Gargantext.Core.Config.NLP (
-- * Lenses
,
nlp_default
,
nlp_languages
)
where
...
...
@@ -48,9 +47,9 @@ data NLPConfig = NLPConfig { _nlp_default :: URI
instance
FromValue
NLPConfig
where
fromValue
v
=
do
_nlp_default
<-
parseTableFromValue
(
reqKey
"EN"
)
v
-- _nlp_languages <- fromValue <$> getTable
MkTable
t
<-
parseTableFromValue
getTable
v
_nlp_languages
<-
mapM
fromValue
(
snd
<$>
t
)
return
$
NLPConfig
{
..
}
instance
ToValue
NLPConfig
where
toValue
=
defaultTableToValue
...
...
@@ -58,7 +57,7 @@ instance ToTable NLPConfig where
toTable
(
NLPConfig
{
..
})
=
table
([
k
.=
v
|
(
k
,
v
)
<-
Map
.
toList
_nlp_languages
]
-- output the default "EN" language as well
<>
[
(
"EN"
::
Text
)
.=
_nlp_default
])
<>
[
(
"EN"
::
Text
)
.=
_nlp_default
]
)
-- readConfig :: SettingsFile -> IO NLPConfig
...
...
src/Gargantext/Core/Config/Worker.hs
View file @
e7ce7bcb
...
...
@@ -53,6 +53,8 @@ data WorkerSettings =
,
_wsDefaultDelay
::
B
.
TimeoutS
,
_wsAdditionalDelayAfterRead
::
B
.
TimeoutS
,
_wsDefinitions
::
!
[
WorkerDefinition
]
,
_wsNlpConduitChunkSize
::
Int
}
deriving
(
Show
,
Eq
)
instance
FromValue
WorkerSettings
where
fromValue
=
parseTableFromValue
$
do
...
...
@@ -61,6 +63,7 @@ instance FromValue WorkerSettings where
_wsDefaultVisibilityTimeout
<-
reqKey
"default_visibility_timeout"
_wsDefaultJobTimeout
<-
reqKey
"default_job_timeout"
_wsLongJobTimeout
<-
reqKey
"long_job_timeout"
_wsNlpConduitChunkSize
<-
reqKey
"nlp_conduit_chunk_size"
defaultDelay
<-
reqKey
"default_delay"
additionalDelayAfterRead
<-
reqKey
"additional_delay_after_read"
return
$
WorkerSettings
{
_wsDatabase
=
unTOMLConnectInfo
dbConfig
...
...
@@ -69,7 +72,8 @@ instance FromValue WorkerSettings where
,
_wsDefinitions
,
_wsDefaultVisibilityTimeout
,
_wsDefaultDelay
=
B
.
TimeoutS
defaultDelay
,
_wsAdditionalDelayAfterRead
=
B
.
TimeoutS
additionalDelayAfterRead
}
,
_wsAdditionalDelayAfterRead
=
B
.
TimeoutS
additionalDelayAfterRead
,
_wsNlpConduitChunkSize
}
instance
ToValue
WorkerSettings
where
toValue
=
defaultTableToValue
instance
ToTable
WorkerSettings
where
...
...
@@ -80,7 +84,8 @@ instance ToTable WorkerSettings where
,
"default_visibility_timeout"
.=
_wsDefaultVisibilityTimeout
,
"default_delay"
.=
B
.
_TimeoutS
_wsDefaultDelay
,
"additional_delay_after_read"
.=
B
.
_TimeoutS
_wsAdditionalDelayAfterRead
,
"definitions"
.=
_wsDefinitions
]
,
"definitions"
.=
_wsDefinitions
,
"nlp_conduit_chunk_size"
.=
_wsNlpConduitChunkSize
]
data
WorkerDefinition
=
WorkerDefinition
{
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
e7ce7bcb
...
...
@@ -55,6 +55,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
where
import
Conduit
import
Control.Concurrent.Async.Lifted
qualified
as
AsyncL
import
Control.Exception.Safe
qualified
as
CES
import
Control.Lens
(
to
,
view
)
import
Control.Exception.Safe
(
catch
,
MonadCatch
)
...
...
@@ -70,7 +71,8 @@ import Data.Text qualified as T
import
Gargantext.API.Ngrams.Tools
(
getTermsWith
)
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
)
import
Gargantext.Core
(
Lang
(
..
),
withDefaultLanguage
,
NLPServerConfig
)
import
Gargantext.Core.Config
(
GargConfig
(
..
),
hasConfig
)
import
Gargantext.Core.Config
(
GargConfig
(
..
),
hasConfig
,
gc_worker
)
import
Gargantext.Core.Config.Worker
(
wsNlpConduitChunkSize
)
import
Gargantext.Core.Config.Types
(
APIsConfig
(
..
))
import
Gargantext.Core.Ext.IMTUser
(
readFile_Annuaire
)
import
Gargantext.Core.NLP
(
HasNLPServer
,
nlpServerGet
)
...
...
@@ -98,7 +100,8 @@ import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact )
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataAnnuaire
,
HyperdataCorpus
(
_hc_lang
)
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
ToHyperdataDocument
(
toHyperdataDocument
),
HyperdataDocument
)
import
Gargantext.Database.Admin.Types.Node
hiding
(
ERROR
,
DEBUG
)
-- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import
Gargantext.Database.Prelude
import
Gargantext.Database.Class
(
DBCmdWithEnv
,
IsDBCmd
)
import
Gargantext.Database.Transactional
(
DBUpdate
,
runDBTx
)
import
Gargantext.Database.Query.Table.ContextNodeNgrams2
(
ContextNodeNgrams2Poly
(
..
),
insertContextNodeNgrams2
)
import
Gargantext.Database.Query.Table.Node
(
MkCorpus
,
insertDefaultNodeIfNotExists
,
getOrMkList
,
getNodeWith
)
import
Gargantext.Database.Query.Table.Node.Document.Add
qualified
as
Doc
(
add
)
...
...
@@ -108,8 +111,8 @@ import Gargantext.Database.Query.Table.NodeContext (selectDocNodesOnlyId)
import
Gargantext.Database.Query.Table.NodeNgrams
(
listInsertDb
,
getCgramsId
)
import
Gargantext.Database.Query.Tree.Root
(
MkCorpusUser
(
..
),
getOrMkRoot
,
getOrMkRootWithCorpus
,
userFromMkCorpusUser
)
import
Gargantext.Database.Schema.Ngrams
(
indexNgrams
,
NgramsId
)
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Types
import
Gargantext.Database.Schema.Node
(
NodePoly
(
_node_id
,
_node_hash_id
),
node_hyperdata
)
import
Gargantext.Database.Types
(
Indexed
(
Indexed
)
)
import
Gargantext.Prelude
hiding
(
catch
,
onException
,
to
)
import
Gargantext.System.Logging
(
logLocM
,
LogLevel
(
DEBUG
,
ERROR
),
MonadLogger
)
import
Gargantext.Utils.Jobs.Monad
(
JobHandle
,
MonadJobStatus
(
..
),
markFailureNoErr
)
...
...
@@ -296,11 +299,12 @@ flow :: forall env err m a c.
->
m
CorpusId
flow
c
mkCorpusUser
la
mfslw
(
count
,
docsC
)
jobHandle
=
do
cfg
<-
view
hasConfig
let
chunkSize
=
cfg
^.
gc_worker
.
wsNlpConduitChunkSize
(
_userId
,
userCorpusId
,
listId
,
msgs
)
<-
runDBTx
$
createNodes
cfg
mkCorpusUser
c
forM_
msgs
ce_notify
-- TODO if public insertMasterDocs else insertUserDocs
runConduit
(
zipSources
(
yieldMany
([
1
..
]
::
[
Int
]))
docsC
.|
CList
.
chunksOf
5
.|
CList
.
chunksOf
chunkSize
.|
mapM_C
(
addDocumentsWithProgress
userCorpusId
)
.|
sinkNull
)
`
CES
.
catches
`
[
CES
.
Handler
$
\
(
e
::
ClientError
)
->
do
...
...
@@ -544,13 +548,20 @@ extractNgramsFromDocuments :: forall doc env err m.
->
TermType
Lang
->
[
doc
]
->
m
(
UncommittedNgrams
doc
)
extractNgramsFromDocuments
nlpServer
lang
docs
=
foldlM
go
mempty
docs
where
go
::
UncommittedNgrams
doc
->
doc
->
m
(
UncommittedNgrams
doc
)
go
!
acc
inputDoc
=
do
ngrams
<-
extractNgramsFromDocument
nlpServer
lang
inputDoc
pure
$
acc
<>
ngrams
extractNgramsFromDocuments
nlpServer
lang
docs
=
do
ret
<-
AsyncL
.
mapConcurrently
(
extractNgramsFromDocument
nlpServer
lang
)
docs
-- sem <- QSemL.newQSem 10
-- let f = extractNgramsFromDocument nlpServer lang
-- ret <- AsyncL.mapConcurrently (\doc ->
-- CEL.bracket_ (QSemL.waitQSem sem) (QSemL.signalQSem sem) (f doc)
-- ) docs
pure
$
foldl
(
<>
)
mempty
ret
-- foldlM go mempty docs
-- where
-- go :: UncommittedNgrams doc -> doc -> m (UncommittedNgrams doc)
-- go !acc inputDoc = do
-- ngrams <- extractNgramsFromDocument nlpServer lang inputDoc
-- pure $ acc <> ngrams
commitNgramsForDocuments
::
UniqParameters
doc
=>
UncommittedNgrams
doc
...
...
test-data/test_config.toml
View file @
e7ce7bcb
...
...
@@ -99,6 +99,10 @@ default_job_timeout = 60
# default timeout for "long" jobs (in seconds)
long_job_timeout
=
3000
# Batch size when sending data to NLP.
# Preferably, set as much as the number of CPUs
nlp_conduit_chunk_size
=
10
# NOTE This is overridden by Test.Database.Setup
[worker.database]
host
=
"127.0.0.1"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment