Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
191
Issues
191
List
Board
Labels
Milestones
Merge Requests
8
Merge Requests
8
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
4739268a
Commit
4739268a
authored
Sep 29, 2025
by
Alfredo Di Napoli
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'refs/remotes/origin/adinapoli/issue-513' into adinapoli/issue-513
parents
d473eb5b
ea615b2e
Changes
20
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
256 additions
and
222 deletions
+256
-222
Ini.hs
bin/gargantext-cli/CLI/Ini.hs
+2
-1
gargantext-settings.toml_toModify
gargantext-settings.toml_toModify
+4
-0
gargantext.cabal
gargantext.cabal
+2
-0
Job.hs
src/Gargantext/API/Job.hs
+7
-2
Update.hs
src/Gargantext/API/Node/Update.hs
+1
-1
Viz.hs
src/Gargantext/API/Routes/Named/Viz.hs
+3
-5
NLP.hs
src/Gargantext/Core/Config/Ini/NLP.hs
+0
-8
NLP.hs
src/Gargantext/Core/Config/NLP.hs
+2
-3
Worker.hs
src/Gargantext/Core/Config/Worker.hs
+7
-2
API.hs
src/Gargantext/Core/Viz/Phylo/API.hs
+1
-33
Tools.hs
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
+14
-3
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+15
-8
Env.hs
src/Gargantext/Core/Worker/Env.hs
+22
-6
Jobs.hs
src/Gargantext/Core/Worker/Jobs.hs
+5
-3
Types.hs
src/Gargantext/Core/Worker/Types.hs
+2
-1
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+23
-12
NgramsByContext.hs
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
+26
-20
Types.hs
src/Gargantext/System/Logging/Types.hs
+1
-1
test_config.toml
test-data/test_config.toml
+4
-0
Jobs.hs
test/Test/Utils/Jobs.hs
+115
-113
No files found.
bin/gargantext-cli/CLI/Ini.hs
View file @
4739268a
...
@@ -88,7 +88,8 @@ convertConfigs ini@(Ini.GargConfig { .. }) iniMail nlpConfig connInfo =
...
@@ -88,7 +88,8 @@ convertConfigs ini@(Ini.GargConfig { .. }) iniMail nlpConfig connInfo =
,
_wsLongJobTimeout
=
3000
,
_wsLongJobTimeout
=
3000
,
_wsDefaultDelay
=
0
,
_wsDefaultDelay
=
0
,
_wsAdditionalDelayAfterRead
=
5
,
_wsAdditionalDelayAfterRead
=
5
,
_wsDatabase
=
connInfo
{
PGS
.
connectDatabase
=
"pgmq"
}
}
,
_wsDatabase
=
connInfo
{
PGS
.
connectDatabase
=
"pgmq"
}
,
_wsNlpConduitChunkSize
=
10
}
,
_gc_logging
=
Config
.
LogConfig
{
,
_gc_logging
=
Config
.
LogConfig
{
_lc_log_level
=
INFO
_lc_log_level
=
INFO
,
_lc_log_file
=
Nothing
,
_lc_log_file
=
Nothing
...
...
gargantext-settings.toml_toModify
View file @
4739268a
...
@@ -166,6 +166,10 @@ default_job_timeout = 60
...
@@ -166,6 +166,10 @@ default_job_timeout = 60
# default timeout for "long" jobs (in seconds)
# default timeout for "long" jobs (in seconds)
long_job_timeout = 3000
long_job_timeout = 3000
# Batch size when sending data to NLP.
# Preferably, set as much as the number of CPUs
nlp_conduit_chunk_size = 10
# if you leave the same credentials as in [database] section above,
# if you leave the same credentials as in [database] section above,
# workers will try to set up the `gargantext_pgmq` database
# workers will try to set up the `gargantext_pgmq` database
# automatically
# automatically
...
...
gargantext.cabal
View file @
4739268a
...
@@ -570,6 +570,7 @@ library
...
@@ -570,6 +570,7 @@ library
, json-stream ^>= 0.4.2.4
, json-stream ^>= 0.4.2.4
, lens >= 5.2.2 && < 5.3
, lens >= 5.2.2 && < 5.3
, lens-aeson < 1.3
, lens-aeson < 1.3
, lifted-async >= 0.10 && < 0.12
, list-zipper
, list-zipper
, massiv < 1.1
, massiv < 1.1
, matrix ^>= 0.3.6.1
, matrix ^>= 0.3.6.1
...
@@ -753,6 +754,7 @@ common commonTestDependencies
...
@@ -753,6 +754,7 @@ common commonTestDependencies
, generic-arbitrary >= 1.0.1 && < 2
, generic-arbitrary >= 1.0.1 && < 2
, graphviz ^>= 2999.20.1.0
, graphviz ^>= 2999.20.1.0
, haskell-bee
, haskell-bee
, haskell-bee-pgmq
, hspec ^>= 2.11.1
, hspec ^>= 2.11.1
, hspec-expectations >= 0.8 && < 0.9
, hspec-expectations >= 0.8 && < 0.9
, hspec-expectations-lifted < 0.11
, hspec-expectations-lifted < 0.11
...
...
src/Gargantext/API/Job.hs
View file @
4739268a
...
@@ -23,7 +23,7 @@ module Gargantext.API.Job (
...
@@ -23,7 +23,7 @@ module Gargantext.API.Job (
,
addWarningEvent
,
addWarningEvent
)
where
)
where
import
Control.Lens
(
over
,
_Just
)
import
Control.Lens
(
(
%~
),
over
,
_Just
)
import
Data.Text
qualified
as
T
import
Data.Text
qualified
as
T
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.Prelude
import
Gargantext.Prelude
...
@@ -66,7 +66,12 @@ jobLogComplete jl =
...
@@ -66,7 +66,12 @@ jobLogComplete jl =
&
over
scst_remaining
(
const
(
Just
0
))
&
over
scst_remaining
(
const
(
Just
0
))
jobLogAddMore
::
Int
->
JobLog
->
JobLog
jobLogAddMore
::
Int
->
JobLog
->
JobLog
jobLogAddMore
moreSteps
jl
=
jl
&
over
(
scst_remaining
.
_Just
)
(
+
moreSteps
)
jobLogAddMore
moreSteps
jl
=
jl
&
scst_remaining
%~
(
maybe
(
Just
0
)
Just
)
&
scst_succeeded
%~
(
maybe
(
Just
0
)
Just
)
&
scst_failed
%~
(
maybe
(
Just
0
)
Just
)
&
scst_events
%~
(
maybe
(
Just
[]
)
Just
)
&
(
scst_remaining
.
_Just
)
%~
(
+
moreSteps
)
jobLogFailures
::
Int
->
JobLog
->
JobLog
jobLogFailures
::
Int
->
JobLog
->
JobLog
jobLogFailures
n
jl
=
over
(
scst_failed
.
_Just
)
(
+
n
)
$
jobLogFailures
n
jl
=
over
(
scst_failed
.
_Just
)
(
+
n
)
$
...
...
src/Gargantext/API/Node/Update.hs
View file @
4739268a
...
@@ -131,7 +131,7 @@ updateNode phyloId (UpdateNodePhylo config) jobHandle = do
...
@@ -131,7 +131,7 @@ updateNode phyloId (UpdateNodePhylo config) jobHandle = do
let
corpusId
=
fromMaybe
(
panicTrace
"no corpus id"
)
corpusId'
let
corpusId
=
fromMaybe
(
panicTrace
"no corpus id"
)
corpusId'
phy
<-
timeMeasured
"updateNode.flowPhyloAPI"
$
flowPhyloAPI
(
subConfigAPI2config
config
)
mbComputeHistory
corpusId
phy
<-
timeMeasured
"updateNode.flowPhyloAPI"
$
flowPhyloAPI
(
subConfigAPI2config
config
)
mbComputeHistory
corpusId
jobHandle
markProgress
1
jobHandle
markProgress
1
jobHandle
{-
{-
...
...
src/Gargantext/API/Routes/Named/Viz.hs
View file @
4739268a
...
@@ -4,7 +4,6 @@ module Gargantext.API.Routes.Named.Viz (
...
@@ -4,7 +4,6 @@ module Gargantext.API.Routes.Named.Viz (
-- * Routes types
-- * Routes types
PhyloAPI
(
..
)
PhyloAPI
(
..
)
,
GetPhylo
(
..
)
,
GetPhylo
(
..
)
,
PostPhylo
(
..
)
,
GraphAPI
(
..
)
,
GraphAPI
(
..
)
,
GraphAsyncAPI
(
..
)
,
GraphAsyncAPI
(
..
)
,
GraphVersionsAPI
(
..
)
,
GraphVersionsAPI
(
..
)
...
@@ -31,7 +30,6 @@ import Servant.XML.Conduit (XML)
...
@@ -31,7 +30,6 @@ import Servant.XML.Conduit (XML)
data
PhyloAPI
mode
=
PhyloAPI
data
PhyloAPI
mode
=
PhyloAPI
{
getPhyloEp
::
mode
:-
Summary
"Phylo API"
:>
NamedRoutes
GetPhylo
{
getPhyloEp
::
mode
:-
Summary
"Phylo API"
:>
NamedRoutes
GetPhylo
,
postPhyloEp
::
mode
:-
NamedRoutes
PostPhylo
}
deriving
Generic
}
deriving
Generic
...
@@ -43,9 +41,9 @@ newtype GetPhylo mode = GetPhylo
...
@@ -43,9 +41,9 @@ newtype GetPhylo mode = GetPhylo
}
deriving
Generic
}
deriving
Generic
newtype
PostPhylo
mode
=
PostPhylo
--
newtype PostPhylo mode = PostPhylo
{
postPhyloByListIdEp
::
mode
:-
QueryParam
"listId"
ListId
:>
(
Post
'[
J
SON
]
NodeId
)
--
{ postPhyloByListIdEp :: mode :- QueryParam "listId" ListId :> (Post '[JSON] NodeId)
}
deriving
Generic
--
} deriving Generic
-- | There is no Delete specific API for Graph since it can be deleted
-- | There is no Delete specific API for Graph since it can be deleted
...
...
src/Gargantext/Core/Config/Ini/NLP.hs
View file @
4739268a
...
@@ -9,19 +9,12 @@ Portability : POSIX
...
@@ -9,19 +9,12 @@ Portability : POSIX
-}
-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Core.Config.Ini.NLP
(
module
Gargantext.Core.Config.Ini.NLP
(
-- * Types
-- * Types
NLPConfig
(
..
)
NLPConfig
(
..
)
-- * Utility functions
-- * Utility functions
,
readConfig
,
readConfig
-- * Lenses
,
nlp_default
,
nlp_languages
)
)
where
where
...
@@ -59,4 +52,3 @@ readConfig fp = do
...
@@ -59,4 +52,3 @@ readConfig fp = do
,
T
.
pack
$
show
m_nlp_other
]
,
T
.
pack
$
show
m_nlp_other
]
Just
ret
->
pure
ret
Just
ret
->
pure
ret
makeLenses
''
N
LPConfig
src/Gargantext/Core/Config/NLP.hs
View file @
4739268a
...
@@ -19,7 +19,6 @@ module Gargantext.Core.Config.NLP (
...
@@ -19,7 +19,6 @@ module Gargantext.Core.Config.NLP (
-- * Lenses
-- * Lenses
,
nlp_default
,
nlp_default
,
nlp_languages
,
nlp_languages
)
)
where
where
...
@@ -48,9 +47,9 @@ data NLPConfig = NLPConfig { _nlp_default :: URI
...
@@ -48,9 +47,9 @@ data NLPConfig = NLPConfig { _nlp_default :: URI
instance
FromValue
NLPConfig
where
instance
FromValue
NLPConfig
where
fromValue
v
=
do
fromValue
v
=
do
_nlp_default
<-
parseTableFromValue
(
reqKey
"EN"
)
v
_nlp_default
<-
parseTableFromValue
(
reqKey
"EN"
)
v
-- _nlp_languages <- fromValue <$> getTable
MkTable
t
<-
parseTableFromValue
getTable
v
MkTable
t
<-
parseTableFromValue
getTable
v
_nlp_languages
<-
mapM
fromValue
(
snd
<$>
t
)
_nlp_languages
<-
mapM
fromValue
(
snd
<$>
t
)
return
$
NLPConfig
{
..
}
return
$
NLPConfig
{
..
}
instance
ToValue
NLPConfig
where
instance
ToValue
NLPConfig
where
toValue
=
defaultTableToValue
toValue
=
defaultTableToValue
...
@@ -58,7 +57,7 @@ instance ToTable NLPConfig where
...
@@ -58,7 +57,7 @@ instance ToTable NLPConfig where
toTable
(
NLPConfig
{
..
})
=
toTable
(
NLPConfig
{
..
})
=
table
([
k
.=
v
|
(
k
,
v
)
<-
Map
.
toList
_nlp_languages
]
table
([
k
.=
v
|
(
k
,
v
)
<-
Map
.
toList
_nlp_languages
]
-- output the default "EN" language as well
-- output the default "EN" language as well
<>
[
(
"EN"
::
Text
)
.=
_nlp_default
])
<>
[
(
"EN"
::
Text
)
.=
_nlp_default
]
)
-- readConfig :: SettingsFile -> IO NLPConfig
-- readConfig :: SettingsFile -> IO NLPConfig
...
...
src/Gargantext/Core/Config/Worker.hs
View file @
4739268a
...
@@ -53,6 +53,8 @@ data WorkerSettings =
...
@@ -53,6 +53,8 @@ data WorkerSettings =
,
_wsDefaultDelay
::
B
.
TimeoutS
,
_wsDefaultDelay
::
B
.
TimeoutS
,
_wsAdditionalDelayAfterRead
::
B
.
TimeoutS
,
_wsAdditionalDelayAfterRead
::
B
.
TimeoutS
,
_wsDefinitions
::
!
[
WorkerDefinition
]
,
_wsDefinitions
::
!
[
WorkerDefinition
]
,
_wsNlpConduitChunkSize
::
Int
}
deriving
(
Show
,
Eq
)
}
deriving
(
Show
,
Eq
)
instance
FromValue
WorkerSettings
where
instance
FromValue
WorkerSettings
where
fromValue
=
parseTableFromValue
$
do
fromValue
=
parseTableFromValue
$
do
...
@@ -61,6 +63,7 @@ instance FromValue WorkerSettings where
...
@@ -61,6 +63,7 @@ instance FromValue WorkerSettings where
_wsDefaultVisibilityTimeout
<-
reqKey
"default_visibility_timeout"
_wsDefaultVisibilityTimeout
<-
reqKey
"default_visibility_timeout"
_wsDefaultJobTimeout
<-
reqKey
"default_job_timeout"
_wsDefaultJobTimeout
<-
reqKey
"default_job_timeout"
_wsLongJobTimeout
<-
reqKey
"long_job_timeout"
_wsLongJobTimeout
<-
reqKey
"long_job_timeout"
_wsNlpConduitChunkSize
<-
reqKey
"nlp_conduit_chunk_size"
defaultDelay
<-
reqKey
"default_delay"
defaultDelay
<-
reqKey
"default_delay"
additionalDelayAfterRead
<-
reqKey
"additional_delay_after_read"
additionalDelayAfterRead
<-
reqKey
"additional_delay_after_read"
return
$
WorkerSettings
{
_wsDatabase
=
unTOMLConnectInfo
dbConfig
return
$
WorkerSettings
{
_wsDatabase
=
unTOMLConnectInfo
dbConfig
...
@@ -69,7 +72,8 @@ instance FromValue WorkerSettings where
...
@@ -69,7 +72,8 @@ instance FromValue WorkerSettings where
,
_wsDefinitions
,
_wsDefinitions
,
_wsDefaultVisibilityTimeout
,
_wsDefaultVisibilityTimeout
,
_wsDefaultDelay
=
B
.
TimeoutS
defaultDelay
,
_wsDefaultDelay
=
B
.
TimeoutS
defaultDelay
,
_wsAdditionalDelayAfterRead
=
B
.
TimeoutS
additionalDelayAfterRead
}
,
_wsAdditionalDelayAfterRead
=
B
.
TimeoutS
additionalDelayAfterRead
,
_wsNlpConduitChunkSize
}
instance
ToValue
WorkerSettings
where
instance
ToValue
WorkerSettings
where
toValue
=
defaultTableToValue
toValue
=
defaultTableToValue
instance
ToTable
WorkerSettings
where
instance
ToTable
WorkerSettings
where
...
@@ -80,7 +84,8 @@ instance ToTable WorkerSettings where
...
@@ -80,7 +84,8 @@ instance ToTable WorkerSettings where
,
"default_visibility_timeout"
.=
_wsDefaultVisibilityTimeout
,
"default_visibility_timeout"
.=
_wsDefaultVisibilityTimeout
,
"default_delay"
.=
B
.
_TimeoutS
_wsDefaultDelay
,
"default_delay"
.=
B
.
_TimeoutS
_wsDefaultDelay
,
"additional_delay_after_read"
.=
B
.
_TimeoutS
_wsAdditionalDelayAfterRead
,
"additional_delay_after_read"
.=
B
.
_TimeoutS
_wsAdditionalDelayAfterRead
,
"definitions"
.=
_wsDefinitions
]
,
"definitions"
.=
_wsDefinitions
,
"nlp_conduit_chunk_size"
.=
_wsNlpConduitChunkSize
]
data
WorkerDefinition
=
data
WorkerDefinition
=
WorkerDefinition
{
WorkerDefinition
{
...
...
src/Gargantext/Core/Viz/Phylo/API.hs
View file @
4739268a
...
@@ -26,14 +26,12 @@ import Gargantext.API.Routes.Named.Viz qualified as Named
...
@@ -26,14 +26,12 @@ import Gargantext.API.Routes.Named.Viz qualified as Named
import
Gargantext.API.Viz.Types
import
Gargantext.API.Viz.Types
import
Gargantext.Core.Types.Phylo
(
GraphData
(
..
))
import
Gargantext.Core.Types.Phylo
(
GraphData
(
..
))
import
Gargantext.Core.Viz.LegacyPhylo
hiding
(
Phylo
(
..
))
import
Gargantext.Core.Viz.LegacyPhylo
hiding
(
Phylo
(
..
))
import
Gargantext.Core.Viz.Phylo
(
PhyloConfig
(
..
),
defaultConfig
,
_phylo_param
,
_phyloParam_config
)
import
Gargantext.Core.Viz.Phylo
(
PhyloConfig
(
..
),
_phylo_param
,
_phyloParam_config
)
import
Gargantext.Core.Viz.Phylo.API.Tools
import
Gargantext.Core.Viz.Phylo.API.Tools
import
Gargantext.Database.Prelude
import
Gargantext.Database.Prelude
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Node
-- (PhyloId, ListId, CorpusId, UserId, NodeId(..))
import
Gargantext.Database.Admin.Types.Node
-- (PhyloId, ListId, CorpusId, UserId, NodeId(..))
import
Gargantext.Database.Query.Table.Node
(
getClosestParentIdByType
,
defaultList
)
import
Gargantext.Database.Query.Table.Node
(
getClosestParentIdByType
,
defaultList
)
import
Gargantext.Database.Query.Table.Node.Error
import
Gargantext.Database.Query.Table.Node.Error
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Servant
import
Servant
import
Servant.Server.Generic
(
AsServerT
)
import
Servant.Server.Generic
(
AsServerT
)
...
@@ -42,7 +40,6 @@ import Web.HttpApiData (readTextData)
...
@@ -42,7 +40,6 @@ import Web.HttpApiData (readTextData)
phyloAPI
::
IsGargServer
err
env
m
=>
PhyloId
->
Named
.
PhyloAPI
(
AsServerT
m
)
phyloAPI
::
IsGargServer
err
env
m
=>
PhyloId
->
Named
.
PhyloAPI
(
AsServerT
m
)
phyloAPI
n
=
Named
.
PhyloAPI
phyloAPI
n
=
Named
.
PhyloAPI
{
getPhyloEp
=
getPhylo
n
{
getPhyloEp
=
getPhylo
n
,
postPhyloEp
=
postPhylo
n
}
}
-- :<|> putPhylo n
-- :<|> putPhylo n
-- :<|> deletePhylo n
-- :<|> deletePhylo n
...
@@ -94,35 +91,6 @@ getPhyloDataJson phyloId = do
...
@@ -94,35 +91,6 @@ getPhyloDataJson phyloId = do
-- pure (SVG p)
-- pure (SVG p)
-- FIXME(adn) This handler mixes DB reads with updates outside of the same
-- transaction, due to the call to 'flowPhyloAPI' in the middle.
postPhylo
::
IsGargServer
err
env
m
=>
PhyloId
->
Named
.
PostPhylo
(
AsServerT
m
)
postPhylo
phyloId
=
Named
.
PostPhylo
$
\
_lId
->
do
-- TODO get Reader settings
-- s <- ask
-- let
-- _vrs = Just ("1" :: Text)
-- _sft = Just (Software "Gargantext" "4")
-- _prm = initPhyloParam vrs sft (Just q)
corpusId
<-
runDBQuery
$
getClosestParentIdByType
phyloId
NodeCorpus
-- Being the first time we ask for the Phylo, there is no historical data
-- available about computing time, so we pass 'Nothing'.
phy
<-
flowPhyloAPI
defaultConfig
Nothing
(
fromMaybe
(
panicTrace
"[G.C.V.P.API] no corpus ID found"
)
corpusId
)
-- params
-- phyloId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just corpusId) userId]
_
<-
runDBTx
$
updateHyperdata
phyloId
(
HyperdataPhylo
Nothing
(
Just
phy
))
pure
phyloId
------------------------------------------------------------------------
-- | DELETE Phylo == delete a node
------------------------------------------------------------------------
------------------------------------------------------------------------
{-
type PutPhylo = (Put '[JSON] Phylo )
--putPhylo :: PhyloId -> Maybe ListId -> PhyloQueryBuild -> Phylo
putPhylo :: PhyloId -> GargServer PutPhylo
putPhylo = undefined
-}
-- | Instances
-- | Instances
instance
FromHttpApiData
DisplayMode
where
parseUrlPiece
=
readTextData
instance
FromHttpApiData
DisplayMode
where
parseUrlPiece
=
readTextData
...
...
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
View file @
4739268a
...
@@ -49,6 +49,7 @@ import Gargantext.Database.Schema.Context ( ContextPoly(_context_hyperdata, _con
...
@@ -49,6 +49,7 @@ import Gargantext.Database.Schema.Context ( ContextPoly(_context_hyperdata, _con
import
Gargantext.Database.Schema.Node
(
NodePoly
(
_node_hyperdata
),
node_hyperdata
)
import
Gargantext.Database.Schema.Node
(
NodePoly
(
_node_hyperdata
),
node_hyperdata
)
import
Gargantext.Prelude
hiding
(
to
)
import
Gargantext.Prelude
hiding
(
to
)
import
Gargantext.System.Logging
(
MonadLogger
,
LogLevel
(
DEBUG
),
logLocM
)
import
Gargantext.System.Logging
(
MonadLogger
,
LogLevel
(
DEBUG
),
logLocM
)
import
Gargantext.Utils.Jobs.Monad
(
JobHandle
,
MonadJobStatus
(
markProgress
,
addMoreSteps
))
import
Gargantext.Utils.UTCTime
(
timeMeasured
,
timeMeasured''
)
import
Gargantext.Utils.UTCTime
(
timeMeasured
,
timeMeasured''
)
import
Prelude
qualified
import
Prelude
qualified
import
System.FilePath
((
</>
))
import
System.FilePath
((
</>
))
...
@@ -110,25 +111,35 @@ phylo2dot phylo = do
...
@@ -110,25 +111,35 @@ phylo2dot phylo = do
_
->
pure
value
_
->
pure
value
flowPhyloAPI
::
(
HasNodeStory
env
err
m
,
HasNodeError
err
,
MonadLogger
m
)
flowPhyloAPI
::
(
HasNodeStory
env
err
m
,
HasNodeError
err
,
MonadLogger
m
,
MonadJobStatus
m
)
=>
PhyloConfig
=>
PhyloConfig
->
Maybe
ComputeTimeHistory
->
Maybe
ComputeTimeHistory
-- ^ Previous compute time historical data, if any.
-- ^ Previous compute time historical data, if any.
->
CorpusId
->
CorpusId
->
JobHandle
m
->
m
Phylo
->
m
Phylo
flowPhyloAPI
config
mbOldComputeHistory
cId
=
do
flowPhyloAPI
config
mbOldComputeHistory
cId
jobHandle
=
do
env
<-
view
hasNodeStory
env
<-
view
hasNodeStory
addMoreSteps
5
jobHandle
corpus
<-
timeMeasured
"flowPhyloAPI.corpusIdtoDocuments"
$
runDBQuery
$
corpusIdtoDocuments
env
(
timeUnit
config
)
cId
corpus
<-
timeMeasured
"flowPhyloAPI.corpusIdtoDocuments"
$
runDBQuery
$
corpusIdtoDocuments
env
(
timeUnit
config
)
cId
markProgress
1
jobHandle
-- writePhylo phyloWithCliquesFile phyloWithCliques
-- writePhylo phyloWithCliquesFile phyloWithCliques
$
(
logLocM
)
DEBUG
$
"PhyloConfig old: "
<>
show
config
$
(
logLocM
)
DEBUG
$
"PhyloConfig old: "
<>
show
config
(
t1
,
phyloWithCliques
)
<-
timeMeasured''
DEBUG
"flowPhyloAPI.phyloWithCliques"
(
pure
$!
toPhyloWithoutLink
corpus
config
)
(
t1
,
phyloWithCliques
)
<-
timeMeasured''
DEBUG
"flowPhyloAPI.phyloWithCliques"
(
pure
$!
toPhyloWithoutLink
corpus
config
)
markProgress
1
jobHandle
(
t2
,
phyloConfigured
)
<-
timeMeasured''
DEBUG
"flowPhyloAPI.phyloConfigured"
(
pure
$!
setConfig
config
phyloWithCliques
)
(
t2
,
phyloConfigured
)
<-
timeMeasured''
DEBUG
"flowPhyloAPI.phyloConfigured"
(
pure
$!
setConfig
config
phyloWithCliques
)
markProgress
1
jobHandle
(
t3
,
finalPhylo
)
<-
timeMeasured''
DEBUG
"flowPhyloAPI.toPhylo"
(
pure
$!
toPhylo
phyloConfigured
)
(
t3
,
finalPhylo
)
<-
timeMeasured''
DEBUG
"flowPhyloAPI.toPhylo"
(
pure
$!
toPhylo
phyloConfigured
)
markProgress
1
jobHandle
-- As the phylo is computed fresh every time, without looking at the one stored (if any), we
-- As the phylo is computed fresh every time, without looking at the one stored (if any), we
-- have to manually propagate computing time across.
-- have to manually propagate computing time across.
pure
$!
trackComputeTime
(
t1
+
t2
+
t3
)
(
finalPhylo
{
_phylo_computeTime
=
mbOldComputeHistory
})
let
ret
=
trackComputeTime
(
t1
+
t2
+
t3
)
(
finalPhylo
{
_phylo_computeTime
=
mbOldComputeHistory
})
markProgress
1
jobHandle
pure
ret
--------------------------------------------------------------------
--------------------------------------------------------------------
corpusIdtoDocuments
::
HasNodeError
err
corpusIdtoDocuments
::
HasNodeError
err
...
...
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
4739268a
...
@@ -16,7 +16,7 @@ module Gargantext.Core.Viz.Phylo.PhyloMaker where
...
@@ -16,7 +16,7 @@ module Gargantext.Core.Viz.Phylo.PhyloMaker where
import
Control.Lens
hiding
(
Level
)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parMap
,
rpar
)
import
Control.Parallel.Strategies
(
parMap
,
rpar
,
Strategy
)
import
Data.Containers.ListUtils
(
nubOrd
)
import
Data.Containers.ListUtils
(
nubOrd
)
import
Data.Discrimination
qualified
as
D
import
Data.Discrimination
qualified
as
D
import
Data.List
(
partition
,
intersect
,
tail
)
import
Data.List
(
partition
,
intersect
,
tail
)
...
@@ -37,6 +37,13 @@ import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
...
@@ -37,6 +37,13 @@ import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
toPhyloQuality
,
temporalMatching
,
getNextPeriods
,
filterDocs
,
filterDiago
,
reduceDiagos
,
toSimilarity
)
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
toPhyloQuality
,
temporalMatching
,
getNextPeriods
,
filterDocs
,
filterDiago
,
reduceDiagos
,
toSimilarity
)
import
Gargantext.Prelude
hiding
(
empty
,
toList
)
import
Gargantext.Prelude
hiding
(
empty
,
toList
)
defaultStrategy
::
Strategy
a
defaultStrategy
=
rpar
------------------
------------------
-- | To Phylo | --
-- | To Phylo | --
------------------
------------------
...
@@ -151,7 +158,7 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
...
@@ -151,7 +158,7 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
--------
--------
-- 1.1) for each measure of similarity, prune the flat phylo, compute the branches and estimate the quality
-- 1.1) for each measure of similarity, prune the flat phylo, compute the branches and estimate the quality
qua
::
[
Double
]
qua
::
[
Double
]
qua
=
parMap
rpar
(
\
thr
->
qua
=
parMap
defaultStrategy
(
\
thr
->
let
edges
=
filter
(
\
edge
->
snd
edge
>=
thr
)
graph
let
edges
=
filter
(
\
edge
->
snd
edge
>=
thr
)
graph
nodes
=
nubOrd
$
concatMap
(
\
((
n
,
n'
),
_
)
->
[
n
,
n'
])
edges
nodes
=
nubOrd
$
concatMap
(
\
((
n
,
n'
),
_
)
->
[
n
,
n'
])
edges
branches
=
toRelatedComponents
nodes
edges
branches
=
toRelatedComponents
nodes
edges
...
@@ -192,7 +199,7 @@ findSeaLadder phylo = case getSeaElevation phylo of
...
@@ -192,7 +199,7 @@ findSeaLadder phylo = case getSeaElevation phylo of
docs
=
filterDocs
(
getDocsByDate
phylo
)
([
period
]
++
next
)
docs
=
filterDocs
(
getDocsByDate
phylo
)
([
period
]
++
next
)
diagos
=
filterDiago
(
getCoocByDate
phylo
)
([
period
]
++
next
)
diagos
=
filterDiago
(
getCoocByDate
phylo
)
([
period
]
++
next
)
-- 1.2) compute the kinship similarities between pairs of source & target in parallel
-- 1.2) compute the kinship similarities between pairs of source & target in parallel
pairs
=
parMap
rpar
(
\
source
->
pairs
=
parMap
defaultStrategy
(
\
source
->
let
candidates
=
filter
(
\
target
->
(
>
2
)
$
length
let
candidates
=
filter
(
\
target
->
(
>
2
)
$
length
$
intersect
(
getGroupNgrams
source
)
(
getGroupNgrams
target
))
targets
$
intersect
(
getGroupNgrams
source
)
(
getGroupNgrams
target
))
targets
in
map
(
\
target
->
in
map
(
\
target
->
...
@@ -330,7 +337,7 @@ filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. clustering_roots) >=
...
@@ -330,7 +337,7 @@ filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. clustering_roots) >=
-- To filter nested Fis
-- To filter nested Fis
filterCliqueByNested
::
Map
(
Date
,
Date
)
[
Clustering
]
->
Map
(
Date
,
Date
)
[
Clustering
]
filterCliqueByNested
::
Map
(
Date
,
Date
)
[
Clustering
]
->
Map
(
Date
,
Date
)
[
Clustering
]
filterCliqueByNested
m
=
filterCliqueByNested
m
=
let
clq
=
parMap
rpar
(
\
l
->
let
clq
=
parMap
defaultStrategy
(
\
l
->
foldl
(
\
mem
f
->
if
(
any
(
\
f'
->
isNested
(
f'
^.
clustering_roots
)
(
f
^.
clustering_roots
))
mem
)
foldl
(
\
mem
f
->
if
(
any
(
\
f'
->
isNested
(
f'
^.
clustering_roots
)
(
f
^.
clustering_roots
))
mem
)
then
mem
then
mem
else
else
...
@@ -358,7 +365,7 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
...
@@ -358,7 +365,7 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
seriesOfClustering
::
Map
(
Date
,
Date
)
[
Clustering
]
seriesOfClustering
::
Map
(
Date
,
Date
)
[
Clustering
]
seriesOfClustering
=
case
(
clique
$
getConfig
phylo
)
of
seriesOfClustering
=
case
(
clique
$
getConfig
phylo
)
of
Fis
_
_
->
Fis
_
_
->
let
fis
=
parMap
rpar
(
\
(
prd
,
docs
)
->
let
fis
=
parMap
defaultStrategy
(
\
(
prd
,
docs
)
->
case
(
corpusParser
$
getConfig
phylo
)
of
case
(
corpusParser
$
getConfig
phylo
)
of
Tsv'
_
->
let
lst
=
toList
Tsv'
_
->
let
lst
=
toList
$
fisWithSizePolyMap'
(
Segment
1
20
)
1
(
map
(
\
d
->
(
ngramsToIdx
(
text
d
)
(
getRoots
phylo
),
(
weight
d
,
(
sourcesToIdx
(
sources
d
)
(
getSources
phylo
)))))
docs
)
$
fisWithSizePolyMap'
(
Segment
1
20
)
1
(
map
(
\
d
->
(
ngramsToIdx
(
text
d
)
(
getRoots
phylo
),
(
weight
d
,
(
sourcesToIdx
(
sources
d
)
(
getSources
phylo
)))))
docs
)
...
@@ -370,7 +377,7 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
...
@@ -370,7 +377,7 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
$
toList
phyloDocs
$
toList
phyloDocs
in
fromList
fis
in
fromList
fis
MaxClique
_
thr
filterType
->
MaxClique
_
thr
filterType
->
let
mcl
=
parMap
rpar
(
\
(
prd
,
docs
)
->
let
mcl
=
parMap
defaultStrategy
(
\
(
prd
,
docs
)
->
let
cooc
=
map
round
let
cooc
=
map
round
$
foldl
sumCooc
empty
$
foldl
sumCooc
empty
$
map
listToMatrix
$
map
listToMatrix
...
@@ -422,7 +429,7 @@ groupDocsByPeriodRec f prds docs acc =
...
@@ -422,7 +429,7 @@ groupDocsByPeriodRec f prds docs acc =
groupDocsByPeriod'
::
(
NFData
doc
,
Ord
date
,
Enum
date
)
=>
(
doc
->
date
)
->
[(
date
,
date
)]
->
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
groupDocsByPeriod'
::
(
NFData
doc
,
Ord
date
,
Enum
date
)
=>
(
doc
->
date
)
->
[(
date
,
date
)]
->
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
groupDocsByPeriod'
f
pds
docs
=
groupDocsByPeriod'
f
pds
docs
=
let
docs'
=
groupBy
(
\
d
d'
->
f
d
==
f
d'
)
$
sortOn
f
docs
let
docs'
=
groupBy
(
\
d
d'
->
f
d
==
f
d'
)
$
sortOn
f
docs
periods
=
parMap
rpar
(
inPeriode
f
docs'
)
pds
periods
=
parMap
defaultStrategy
(
inPeriode
f
docs'
)
pds
in
tracePhylo
(
"
\n
"
<>
"-- | Group "
in
tracePhylo
(
"
\n
"
<>
"-- | Group "
<>
show
(
length
docs
)
<>
show
(
length
docs
)
<>
" docs by "
<>
" docs by "
...
@@ -440,7 +447,7 @@ groupDocsByPeriod' f pds docs =
...
@@ -440,7 +447,7 @@ groupDocsByPeriod' f pds docs =
groupDocsByPeriod
::
(
NFData
doc
,
Ord
date
,
Enum
date
)
=>
(
doc
->
date
)
->
[(
date
,
date
)]
->
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
groupDocsByPeriod
::
(
NFData
doc
,
Ord
date
,
Enum
date
)
=>
(
doc
->
date
)
->
[(
date
,
date
)]
->
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
groupDocsByPeriod
_
_
[]
=
panic
"[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
groupDocsByPeriod
_
_
[]
=
panic
"[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
groupDocsByPeriod
f
pds
es
=
groupDocsByPeriod
f
pds
es
=
let
periods
=
parMap
rpar
(
inPeriode
f
es
)
pds
let
periods
=
parMap
defaultStrategy
(
inPeriode
f
es
)
pds
in
tracePhylo
(
"
\n
"
<>
"-- | Group "
in
tracePhylo
(
"
\n
"
<>
"-- | Group "
<>
show
(
length
es
)
<>
" docs by "
<>
show
(
length
es
)
<>
" docs by "
...
...
src/Gargantext/Core/Worker/Env.hs
View file @
4739268a
...
@@ -9,8 +9,9 @@ Portability : POSIX
...
@@ -9,8 +9,9 @@ Portability : POSIX
-}
-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE UndecidableInstances #-}
{-# OPTIONS_GHC -Wno-orphans #-}
-- orphan HasNodeError BackendInternalError
{-# OPTIONS_GHC -Wno-orphans #-}
-- orphan HasNodeError BackendInternalError
...
@@ -25,6 +26,7 @@ import Control.Lens.TH
...
@@ -25,6 +26,7 @@ import Control.Lens.TH
import
Control.Monad.Trans.Control
(
MonadBaseControl
)
import
Control.Monad.Trans.Control
(
MonadBaseControl
)
import
Data.Maybe
(
fromJust
)
import
Data.Maybe
(
fromJust
)
import
Data.Pool
qualified
as
Pool
import
Data.Pool
qualified
as
Pool
import
Data.Text
qualified
as
T
import
Database.PostgreSQL.Simple
qualified
as
PSQL
import
Database.PostgreSQL.Simple
qualified
as
PSQL
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
,
noJobLog
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
,
noJobLog
)
import
Gargantext.API.Errors
(
BackendInternalError
)
import
Gargantext.API.Errors
(
BackendInternalError
)
...
@@ -42,7 +44,7 @@ import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET
...
@@ -42,7 +44,7 @@ import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET
import
Gargantext.Core.Worker.Types
(
JobInfo
(
..
))
import
Gargantext.Core.Worker.Types
(
JobInfo
(
..
))
import
Gargantext.Database.Prelude
(
HasConnectionPool
(
..
))
import
Gargantext.Database.Prelude
(
HasConnectionPool
(
..
))
import
Gargantext.Prelude
hiding
(
to
)
import
Gargantext.Prelude
hiding
(
to
)
import
Gargantext.System.Logging
(
HasLogger
(
..
),
Logger
,
LogLevel
(
..
),
MonadLogger
(
..
),
withLogger
,
logMsg
,
withLoggerIO
)
import
Gargantext.System.Logging
(
HasLogger
(
..
),
Logger
,
LogLevel
(
..
),
MonadLogger
(
..
),
withLogger
,
logMsg
,
logLocM
,
withLoggerIO
)
import
Gargantext.System.Logging.Loggers
import
Gargantext.System.Logging.Loggers
import
Gargantext.Utils.Jobs.Monad
(
MonadJobStatus
(
..
),
JobHandle
)
import
Gargantext.Utils.Jobs.Monad
(
MonadJobStatus
(
..
),
JobHandle
)
import
System.Log.FastLogger
qualified
as
FL
import
System.Log.FastLogger
qualified
as
FL
...
@@ -182,9 +184,21 @@ instance MonadJobStatus WorkerMonad where
...
@@ -182,9 +184,21 @@ instance MonadJobStatus WorkerMonad where
type
JobEventType
WorkerMonad
=
JobLog
type
JobEventType
WorkerMonad
=
JobLog
noJobHandle
Proxy
=
WorkerNoJobHandle
noJobHandle
Proxy
=
WorkerNoJobHandle
getLatestJobStatus
_
=
WorkerMonad
(
pure
noJobLog
)
getLatestJobStatus
WorkerNoJobHandle
=
pure
noJobLog
getLatestJobStatus
(
WorkerJobHandle
ji
)
=
do
stateTVar
<-
asks
_w_env_job_state
state'
<-
liftIO
$
readTVarIO
stateTVar
pure
$
case
state'
of
Nothing
->
noJobLog
Just
wjs
->
if
_wjs_job_info
wjs
==
ji
then
_wjs_job_log
wjs
else
noJobLog
withTracer
_
jh
n
=
n
jh
withTracer
_
jh
n
=
n
jh
markStarted
n
jh
=
updateJobProgress
jh
(
const
$
jobLogStart
$
RemainingSteps
n
)
markStarted
n
jh
=
updateJobProgress
jh
(
const
$
jobLogStart
$
RemainingSteps
n
)
markProgress
steps
jh
=
updateJobProgress
jh
(
jobLogProgress
steps
)
markProgress
steps
jh
=
updateJobProgress
jh
(
jobLogProgress
steps
)
markFailure
steps
mb_msg
jh
=
markFailure
steps
mb_msg
jh
=
updateJobProgress
jh
(
\
latest
->
case
mb_msg
of
updateJobProgress
jh
(
\
latest
->
case
mb_msg
of
...
@@ -208,7 +222,9 @@ updateJobProgress (WorkerJobHandle (ji@JobInfo { _ji_message_id })) f = do
...
@@ -208,7 +222,9 @@ updateJobProgress (WorkerJobHandle (ji@JobInfo { _ji_message_id })) f = do
case
state'
of
case
state'
of
Nothing
->
pure
()
Nothing
->
pure
()
Just
wjs
->
do
Just
wjs
->
do
CET
.
ce_notify
$
CET
.
UpdateWorkerProgress
ji
(
_wjs_job_log
wjs
)
(
CET
.
ce_notify
$
CET
.
UpdateWorkerProgress
ji
(
_wjs_job_log
wjs
))
`
CES
.
catch
`
(
\
(
e
::
SomeException
)
->
$
(
logLocM
)
WARNING
$
T
.
pack
$
displayException
e
)
where
where
updateState
mwjs
=
updateState
mwjs
=
let
initJobLog
=
let
initJobLog
=
...
...
src/Gargantext/Core/Worker/Jobs.hs
View file @
4739268a
...
@@ -47,9 +47,11 @@ sendJobWithCfg gcConfig job = do
...
@@ -47,9 +47,11 @@ sendJobWithCfg gcConfig job = do
b
<-
initBrokerWithDBCreate
(
gcConfig
^.
gc_database_config
)
ws
b
<-
initBrokerWithDBCreate
(
gcConfig
^.
gc_database_config
)
ws
let
queueName
=
_wdQueue
wd
let
queueName
=
_wdQueue
wd
let
addDelayAfterRead
=
gcConfig
^.
gc_worker
.
wsAdditionalDelayAfterRead
let
addDelayAfterRead
=
gcConfig
^.
gc_worker
.
wsAdditionalDelayAfterRead
let
job'
=
(
updateJobData
ws
job
$
W
.
mkDefaultSendJob'
b
queueName
job
)
{
W
.
delay
=
_wsDefaultDelay
let
sj
=
(
W
.
mkDefaultSendJob'
b
queueName
job
)
{
W
.
delay
=
_wsDefaultDelay
,
W
.
addDelayAfterRead
=
B
.
_TimeoutS
addDelayAfterRead
,
W
.
addDelayAfterRead
=
B
.
_TimeoutS
addDelayAfterRead
,
W
.
toStrat
=
WT
.
TSDelete
}
-- don't allow to repeat infinitely (see #495)
,
W
.
toStrat
=
WT
.
TSDelete
}
let
job'
=
updateJobData
ws
job
sj
withLogger
(
gcConfig
^.
gc_logging
)
$
\
ioL
->
withLogger
(
gcConfig
^.
gc_logging
)
$
\
ioL
->
$
(
logLoc
)
ioL
DEBUG
$
"[sendJob] sending job "
<>
show
job
<>
" (delay "
<>
show
(
W
.
delay
job'
)
<>
")"
$
(
logLoc
)
ioL
DEBUG
$
"[sendJob] sending job "
<>
show
job
<>
" (delay "
<>
show
(
W
.
delay
job'
)
<>
")"
W
.
sendJob'
job'
W
.
sendJob'
job'
...
...
src/Gargantext/Core/Worker/Types.hs
View file @
4739268a
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE StandaloneDeriving #-}
{-|
{-|
Module : Gargantext.Core.Worker.Types
Module : Gargantext.Core.Worker.Types
Description : Some useful worker types
Description : Some useful worker types
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
4739268a
...
@@ -55,6 +55,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
...
@@ -55,6 +55,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
where
where
import
Conduit
import
Conduit
import
Control.Concurrent.Async.Lifted
qualified
as
AsyncL
import
Control.Exception.Safe
qualified
as
CES
import
Control.Exception.Safe
qualified
as
CES
import
Control.Lens
(
to
,
view
)
import
Control.Lens
(
to
,
view
)
import
Control.Exception.Safe
(
catch
,
MonadCatch
)
import
Control.Exception.Safe
(
catch
,
MonadCatch
)
...
@@ -70,7 +71,8 @@ import Data.Text qualified as T
...
@@ -70,7 +71,8 @@ import Data.Text qualified as T
import
Gargantext.API.Ngrams.Tools
(
getTermsWith
)
import
Gargantext.API.Ngrams.Tools
(
getTermsWith
)
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
)
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
)
import
Gargantext.Core
(
Lang
(
..
),
withDefaultLanguage
,
NLPServerConfig
)
import
Gargantext.Core
(
Lang
(
..
),
withDefaultLanguage
,
NLPServerConfig
)
import
Gargantext.Core.Config
(
GargConfig
(
..
),
hasConfig
)
import
Gargantext.Core.Config
(
GargConfig
(
..
),
hasConfig
,
gc_worker
)
import
Gargantext.Core.Config.Worker
(
wsNlpConduitChunkSize
)
import
Gargantext.Core.Config.Types
(
APIsConfig
(
..
))
import
Gargantext.Core.Config.Types
(
APIsConfig
(
..
))
import
Gargantext.Core.Ext.IMTUser
(
readFile_Annuaire
)
import
Gargantext.Core.Ext.IMTUser
(
readFile_Annuaire
)
import
Gargantext.Core.NLP
(
HasNLPServer
,
nlpServerGet
)
import
Gargantext.Core.NLP
(
HasNLPServer
,
nlpServerGet
)
...
@@ -98,7 +100,8 @@ import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact )
...
@@ -98,7 +100,8 @@ import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact )
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataAnnuaire
,
HyperdataCorpus
(
_hc_lang
)
)
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataAnnuaire
,
HyperdataCorpus
(
_hc_lang
)
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
ToHyperdataDocument
(
toHyperdataDocument
),
HyperdataDocument
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
ToHyperdataDocument
(
toHyperdataDocument
),
HyperdataDocument
)
import
Gargantext.Database.Admin.Types.Node
hiding
(
ERROR
,
DEBUG
)
-- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import
Gargantext.Database.Admin.Types.Node
hiding
(
ERROR
,
DEBUG
)
-- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import
Gargantext.Database.Prelude
import
Gargantext.Database.Class
(
DBCmdWithEnv
,
IsDBCmd
)
import
Gargantext.Database.Transactional
(
DBUpdate
,
runDBTx
)
import
Gargantext.Database.Query.Table.ContextNodeNgrams2
(
ContextNodeNgrams2Poly
(
..
),
insertContextNodeNgrams2
)
import
Gargantext.Database.Query.Table.ContextNodeNgrams2
(
ContextNodeNgrams2Poly
(
..
),
insertContextNodeNgrams2
)
import
Gargantext.Database.Query.Table.Node
(
MkCorpus
,
insertDefaultNodeIfNotExists
,
getOrMkList
,
getNodeWith
)
import
Gargantext.Database.Query.Table.Node
(
MkCorpus
,
insertDefaultNodeIfNotExists
,
getOrMkList
,
getNodeWith
)
import
Gargantext.Database.Query.Table.Node.Document.Add
qualified
as
Doc
(
add
)
import
Gargantext.Database.Query.Table.Node.Document.Add
qualified
as
Doc
(
add
)
...
@@ -108,8 +111,8 @@ import Gargantext.Database.Query.Table.NodeContext (selectDocNodesOnlyId)
...
@@ -108,8 +111,8 @@ import Gargantext.Database.Query.Table.NodeContext (selectDocNodesOnlyId)
import
Gargantext.Database.Query.Table.NodeNgrams
(
listInsertDb
,
getCgramsId
)
import
Gargantext.Database.Query.Table.NodeNgrams
(
listInsertDb
,
getCgramsId
)
import
Gargantext.Database.Query.Tree.Root
(
MkCorpusUser
(
..
),
getOrMkRoot
,
getOrMkRootWithCorpus
,
userFromMkCorpusUser
)
import
Gargantext.Database.Query.Tree.Root
(
MkCorpusUser
(
..
),
getOrMkRoot
,
getOrMkRootWithCorpus
,
userFromMkCorpusUser
)
import
Gargantext.Database.Schema.Ngrams
(
indexNgrams
,
NgramsId
)
import
Gargantext.Database.Schema.Ngrams
(
indexNgrams
,
NgramsId
)
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Schema.Node
(
NodePoly
(
_node_id
,
_node_hash_id
),
node_hyperdata
)
import
Gargantext.Database.Types
import
Gargantext.Database.Types
(
Indexed
(
Indexed
)
)
import
Gargantext.Prelude
hiding
(
catch
,
onException
,
to
)
import
Gargantext.Prelude
hiding
(
catch
,
onException
,
to
)
import
Gargantext.System.Logging
(
logLocM
,
LogLevel
(
DEBUG
,
ERROR
),
MonadLogger
)
import
Gargantext.System.Logging
(
logLocM
,
LogLevel
(
DEBUG
,
ERROR
),
MonadLogger
)
import
Gargantext.Utils.Jobs.Monad
(
JobHandle
,
MonadJobStatus
(
..
),
markFailureNoErr
)
import
Gargantext.Utils.Jobs.Monad
(
JobHandle
,
MonadJobStatus
(
..
),
markFailureNoErr
)
...
@@ -296,11 +299,12 @@ flow :: forall env err m a c.
...
@@ -296,11 +299,12 @@ flow :: forall env err m a c.
->
m
CorpusId
->
m
CorpusId
flow
c
mkCorpusUser
la
mfslw
(
count
,
docsC
)
jobHandle
=
do
flow
c
mkCorpusUser
la
mfslw
(
count
,
docsC
)
jobHandle
=
do
cfg
<-
view
hasConfig
cfg
<-
view
hasConfig
let
chunkSize
=
cfg
^.
gc_worker
.
wsNlpConduitChunkSize
(
_userId
,
userCorpusId
,
listId
,
msgs
)
<-
runDBTx
$
createNodes
cfg
mkCorpusUser
c
(
_userId
,
userCorpusId
,
listId
,
msgs
)
<-
runDBTx
$
createNodes
cfg
mkCorpusUser
c
forM_
msgs
ce_notify
forM_
msgs
ce_notify
-- TODO if public insertMasterDocs else insertUserDocs
-- TODO if public insertMasterDocs else insertUserDocs
runConduit
(
zipSources
(
yieldMany
([
1
..
]
::
[
Int
]))
docsC
runConduit
(
zipSources
(
yieldMany
([
1
..
]
::
[
Int
]))
docsC
.|
CList
.
chunksOf
5
.|
CList
.
chunksOf
chunkSize
.|
mapM_C
(
addDocumentsWithProgress
userCorpusId
)
.|
mapM_C
(
addDocumentsWithProgress
userCorpusId
)
.|
sinkNull
)
`
CES
.
catches
`
.|
sinkNull
)
`
CES
.
catches
`
[
CES
.
Handler
$
\
(
e
::
ClientError
)
->
do
[
CES
.
Handler
$
\
(
e
::
ClientError
)
->
do
...
@@ -544,13 +548,20 @@ extractNgramsFromDocuments :: forall doc env err m.
...
@@ -544,13 +548,20 @@ extractNgramsFromDocuments :: forall doc env err m.
->
TermType
Lang
->
TermType
Lang
->
[
doc
]
->
[
doc
]
->
m
(
UncommittedNgrams
doc
)
->
m
(
UncommittedNgrams
doc
)
extractNgramsFromDocuments
nlpServer
lang
docs
=
extractNgramsFromDocuments
nlpServer
lang
docs
=
do
foldlM
go
mempty
docs
ret
<-
AsyncL
.
mapConcurrently
(
extractNgramsFromDocument
nlpServer
lang
)
docs
where
-- sem <- QSemL.newQSem 10
go
::
UncommittedNgrams
doc
->
doc
->
m
(
UncommittedNgrams
doc
)
-- let f = extractNgramsFromDocument nlpServer lang
go
!
acc
inputDoc
=
do
-- ret <- AsyncL.mapConcurrently (\doc ->
ngrams
<-
extractNgramsFromDocument
nlpServer
lang
inputDoc
-- CEL.bracket_ (QSemL.waitQSem sem) (QSemL.signalQSem sem) (f doc)
pure
$
acc
<>
ngrams
-- ) docs
pure
$
foldl
(
<>
)
mempty
ret
-- foldlM go mempty docs
-- where
-- go :: UncommittedNgrams doc -> doc -> m (UncommittedNgrams doc)
-- go !acc inputDoc = do
-- ngrams <- extractNgramsFromDocument nlpServer lang inputDoc
-- pure $ acc <> ngrams
commitNgramsForDocuments
::
UniqParameters
doc
commitNgramsForDocuments
::
UniqParameters
doc
=>
UncommittedNgrams
doc
=>
UncommittedNgrams
doc
...
...
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
View file @
4739268a
...
@@ -132,7 +132,8 @@ getOccByNgramsOnlyFast_withSample cId int nt ngs =
...
@@ -132,7 +132,8 @@ getOccByNgramsOnlyFast_withSample cId int nt ngs =
HM
.
fromListWith
(
+
)
<$>
selectNgramsOccurrencesOnlyByContextUser_withSample
cId
int
nt
ngs
HM
.
fromListWith
(
+
)
<$>
selectNgramsOccurrencesOnlyByContextUser_withSample
cId
int
nt
ngs
-- Returns occurrences of ngrams in given corpus/list (for each ngram, a list of contexts is returned)
-- | Returns occurrences of ngrams in given corpus/list (for each
-- ngram, a list of contexts is returned)
getOccByNgramsOnlyFast
::
CorpusId
getOccByNgramsOnlyFast
::
CorpusId
->
ListId
->
ListId
->
NgramsType
->
NgramsType
...
@@ -154,34 +155,39 @@ getOccByNgramsOnlyFast cId lId nt = do
...
@@ -154,34 +155,39 @@ getOccByNgramsOnlyFast cId lId nt = do
query
::
DPS
.
Query
query
::
DPS
.
Query
query
=
[
sql
|
query
=
[
sql
|
WITH cnnv AS
WITH nc AS (
( SELECT DISTINCT context_node_ngrams.context_id,
SELECT DISTINCT context_id
context_node_ngrams.ngrams_id,
FROM nodes_contexts
nodes_contexts.node_id,
WHERE node_id = ?
nodes_contexts.category
AND category > 0
FROM nodes_contexts
),
JOIN context_node_ngrams ON context_node_ngrams.context_id = nodes_contexts.context_id
cnnv AS
( SELECT DISTINCT context_id,
ngrams_id
FROM context_node_ngrams
WHERE context_id IN (SELECT context_id FROM nc)
),
),
node_context_ids AS
(SELECT context_id, ngrams_id, terms
FROM cnnv
JOIN ngrams ON cnnv.ngrams_id = ngrams.id
WHERE node_id = ? AND cnnv.category > 0
),
ncids_agg AS
ncids_agg AS
(SELECT ngrams_id, terms, array_agg(DISTINCT context_id) AS agg
( SELECT array_agg(DISTINCT context_id) AS agg,
FROM node_context_ids
ngrams_id,
GROUP BY (ngrams_id, terms)),
terms
FROM cnnv
JOIN ngrams
ON cnnv.ngrams_id = ngrams.id
GROUP BY (ngrams_id, terms)
),
ns AS
ns AS
(SELECT ngrams_id, terms
(SELECT ngrams_id, terms
FROM node_stories
FROM node_stories
JOIN ngrams ON ngrams_id = ngrams.id
JOIN ngrams
WHERE node_id = ? AND ngrams_type_id = ?
ON ngrams_id = ngrams.id
WHERE node_id = ? AND ngrams_type_id = ?
)
)
SELECT ns.terms, CASE WHEN agg IS NULL THEN '{}' ELSE agg END
SELECT ns.terms, CASE WHEN agg IS NULL THEN '{}' ELSE agg END
FROM ns
FROM ns
LEFT JOIN ncids_agg ON ns.ngrams_id = ncids_agg.ngrams_id
LEFT JOIN ncids_agg
ON ns.ngrams_id = ncids_agg.ngrams_id
|]
|]
-- query = [sql|
-- query = [sql|
-- WITH node_context_ids AS
-- WITH node_context_ids AS
...
...
src/Gargantext/System/Logging/Types.hs
View file @
4739268a
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TypeFamilies #-}
module
Gargantext.System.Logging.Types
(
module
Gargantext.System.Logging.Types
(
LogLevel
(
..
)
LogLevel
(
..
)
...
...
test-data/test_config.toml
View file @
4739268a
...
@@ -99,6 +99,10 @@ default_job_timeout = 60
...
@@ -99,6 +99,10 @@ default_job_timeout = 60
# default timeout for "long" jobs (in seconds)
# default timeout for "long" jobs (in seconds)
long_job_timeout
=
3000
long_job_timeout
=
3000
# Batch size when sending data to NLP.
# Preferably, set as much as the number of CPUs
nlp_conduit_chunk_size
=
10
# NOTE This is overridden by Test.Database.Setup
# NOTE This is overridden by Test.Database.Setup
[worker.database]
[worker.database]
host
=
"127.0.0.1"
host
=
"127.0.0.1"
...
...
test/Test/Utils/Jobs.hs
View file @
4739268a
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment