Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
160
Issues
160
List
Board
Labels
Milestones
Merge Requests
14
Merge Requests
14
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
0d4e0554
Commit
0d4e0554
authored
Jan 13, 2025
by
Alfredo Di Napoli
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Move terms updating to separate job as well
parent
c62480c7
Pipeline
#7207
passed with stages
in 49 minutes and 14 seconds
Changes
6
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
80 additions
and
33 deletions
+80
-33
List.hs
src/Gargantext/API/Ngrams/List.hs
+12
-5
DocumentUpload.hs
src/Gargantext/API/Node/DocumentUpload.hs
+11
-14
Remote.hs
src/Gargantext/API/Server/Named/Remote.hs
+4
-6
Worker.hs
src/Gargantext/Core/Worker.hs
+18
-6
Jobs.hs
src/Gargantext/Core/Worker/Jobs.hs
+2
-0
Types.hs
src/Gargantext/Core/Worker/Jobs/Types.hs
+33
-2
No files found.
src/Gargantext/API/Ngrams/List.hs
View file @
0d4e0554
...
...
@@ -10,6 +10,7 @@ Portability : POSIX
-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE ViewPatterns #-}
...
...
@@ -20,17 +21,17 @@ import Data.ByteString.Lazy qualified as BSL
import
Data.Csv
qualified
as
Tsv
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.Map.Strict
(
toList
)
import
Data.Map.Strict
qualified
as
Map
import
Data.Map.Strict
(
toList
)
import
Data.Set
qualified
as
Set
import
Data.Text
(
concat
,
pack
,
splitOn
)
import
Data.Vector
(
Vector
)
import
Data.Vector
qualified
as
Vec
import
Data.Vector
(
Vector
)
import
Gargantext.API.Admin.EnvTypes
(
Env
)
import
Gargantext.API.Errors.Types
(
BackendInternalError
(
InternalServerError
))
import
Gargantext.API.Ngrams
(
setListNgrams
)
import
Gargantext.API.Ngrams.List.Types
(
_wjf_data
,
_wtf_data
)
import
Gargantext.API.Ngrams.Prelude
(
getNgramsList
)
import
Gargantext.API.Ngrams
(
setListNgrams
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Prelude
(
GargM
,
serverError
,
HasServerError
)
import
Gargantext.API.Routes.Named.List
qualified
as
Named
...
...
@@ -46,11 +47,13 @@ import Gargantext.Database.Schema.Ngrams ( text2ngrams, NgramsId )
import
Gargantext.Database.Schema.Node
(
_node_parent_id
)
import
Gargantext.Database.Types
(
Indexed
(
..
))
import
Gargantext.Prelude
hiding
(
concat
,
toList
)
import
Gargantext.System.Logging
(
logLocM
,
MonadLogger
)
import
Gargantext.Utils.Jobs.Monad
(
MonadJobStatus
(
..
))
import
Prelude
qualified
import
Protolude
qualified
as
P
import
Servant
import
Servant.Server.Generic
(
AsServerT
)
import
Gargantext.System.Logging
(
LogLevel
(
..
))
getAPI
::
Named
.
GETAPI
(
AsServerT
(
GargM
Env
BackendInternalError
))
...
...
@@ -114,7 +117,7 @@ jsonPostAsync = Named.JSONAPI {
}
------------------------------------------------------------------------
postAsyncJSON
::
(
HasNodeStory
env
err
m
,
MonadJobStatus
m
)
postAsyncJSON
::
(
HasNodeStory
env
err
m
,
MonadJobStatus
m
,
MonadLogger
m
)
=>
ListId
->
NgramsList
->
JobHandle
m
...
...
@@ -123,13 +126,17 @@ postAsyncJSON l ngramsList jobHandle = do
markStarted
2
jobHandle
$
(
logLocM
)
DEBUG
"[postAsyncJSON] Setting the Ngrams list ..."
setList
$
(
logLocM
)
DEBUG
"[postAsyncJSON] Done."
markProgress
1
jobHandle
corpus_node
<-
getNode
l
-- (Proxy :: Proxy HyperdataList)
let
corpus_id
=
fromMaybe
(
panicTrace
"no parent_id"
)
(
_node_parent_id
corpus_node
)
$
(
logLocM
)
DEBUG
"[postAsyncJSON] Executing re-indexing..."
_
<-
reIndexWith
corpus_id
l
NgramsTerms
(
Set
.
fromList
[
MapTerm
,
CandidateTerm
])
$
(
logLocM
)
DEBUG
"[postAsyncJSON] Re-indexing done."
markComplete
jobHandle
...
...
@@ -205,7 +212,7 @@ tsvToNgramsTableMap record = case Vec.toList record of
-- | This is for debugging the TSV parser in the REPL
importTsvFile
::
forall
env
err
m
.
(
HasNodeStory
env
err
m
,
HasServerError
err
,
MonadJobStatus
m
)
importTsvFile
::
forall
env
err
m
.
(
HasNodeStory
env
err
m
,
HasServerError
err
,
MonadJobStatus
m
,
MonadLogger
m
)
=>
ListId
->
P
.
FilePath
->
m
()
importTsvFile
lId
fp
=
do
contents
<-
liftBase
$
P
.
readFile
fp
...
...
src/Gargantext/API/Node/DocumentUpload.hs
View file @
0d4e0554
...
...
@@ -38,10 +38,10 @@ import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataCorpus
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Node
(
DocId
,
NodeId
,
NodeType
(
NodeCorpus
)
)
import
Gargantext.Database.Admin.Types.Node
(
DocId
,
NodeId
,
NodeType
(
NodeCorpus
)
,
ParentId
)
import
Gargantext.Database.Prelude
(
IsDBCmd
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
)
import
Gargantext.Database.Query.Table.Node
(
getClosestParentIdByType'
,
getClosestParentIdByType
)
import
Gargantext.Database.Query.Table.Node
(
getClosestParentIdByType'
)
import
Gargantext.Database.Schema.Node
(
_node_hyperdata
)
import
Gargantext.Prelude
import
Gargantext.System.Logging
(
logLocM
,
LogLevel
(
..
),
MonadLogger
)
...
...
@@ -114,18 +114,15 @@ remoteImportDocuments :: ( HasNodeError err
,
MonadLogger
m
,
MonadIO
m
)
=>
AuthenticatedUser
->
ParentId
->
NodeId
->
DocumentExport
->
m
[
NodeId
]
remoteImportDocuments
loggedInUser
nodeId
(
DocumentExport
documents
_gargVersion
)
=
do
mb_corpusId
<-
getClosestParentIdByType
nodeId
NodeCorpus
case
mb_corpusId
of
Nothing
->
panicTrace
$
"remoteImportDocuments: impossible, freshly imported doc node without parent corpus"
Just
corpusId
->
do
let
la
=
Multi
EN
nlpServerConfig
<-
view
$
nlpServerGet
(
_tt_lang
la
)
$
(
logLocM
)
INFO
$
"Importing "
<>
T
.
pack
(
show
$
length
documents
)
<>
" documents for corpus node "
<>
T
.
pack
(
show
nodeId
)
docs
<-
addDocumentsToHyperCorpus
nlpServerConfig
(
Nothing
::
Maybe
HyperdataCorpus
)
la
corpusId
(
map
(
_node_hyperdata
.
_d_document
)
documents
)
_versioned
<-
commitCorpus
corpusId
(
RootId
$
_auth_node_id
loggedInUser
)
$
(
logLocM
)
INFO
$
"Done importing "
<>
T
.
pack
(
show
$
length
documents
)
<>
" documents for corpus node "
<>
T
.
pack
(
show
nodeId
)
pure
docs
remoteImportDocuments
loggedInUser
corpusId
nodeId
(
DocumentExport
documents
_gargVersion
)
=
do
let
la
=
Multi
EN
nlpServerConfig
<-
view
$
nlpServerGet
(
_tt_lang
la
)
$
(
logLocM
)
INFO
$
"Importing "
<>
T
.
pack
(
show
$
length
documents
)
<>
" documents for corpus node "
<>
T
.
pack
(
show
nodeId
)
docs
<-
addDocumentsToHyperCorpus
nlpServerConfig
(
Nothing
::
Maybe
HyperdataCorpus
)
la
corpusId
(
map
(
_node_hyperdata
.
_d_document
)
documents
)
_versioned
<-
commitCorpus
corpusId
(
RootId
$
_auth_node_id
loggedInUser
)
$
(
logLocM
)
INFO
$
"Done importing "
<>
T
.
pack
(
show
$
length
documents
)
<>
" documents for corpus node "
<>
T
.
pack
(
show
nodeId
)
pure
docs
src/Gargantext/API/Server/Named/Remote.hs
View file @
0d4e0554
...
...
@@ -16,7 +16,7 @@ import Control.Exception.Safe qualified as Safe
import
Control.Exception
(
toException
)
import
Control.Lens
(
view
,
(
#
))
import
Control.Monad.Except
(
throwError
,
MonadError
)
import
Control.Monad
(
void
)
import
Control.Monad
(
void
,
liftM2
)
import
Data.Aeson
qualified
as
JSON
import
Data.ByteString.Builder
qualified
as
B
import
Data.ByteString.Lazy
qualified
as
BL
...
...
@@ -116,15 +116,13 @@ remoteImportHandler loggedInUser c = do
Just
ty
->
do
new_node
<-
insertNodeWithHyperdata
ty
(
_node_name
x
)
(
_node_hyperdata
x
)
mb_parent
(
_auth_user_id
loggedInUser
)
$
(
logLocM
)
INFO
$
"Created a new node "
<>
T
.
pack
(
show
$
new_node
)
<>
" of type "
<>
T
.
pack
(
show
ty
)
for_
mb_docs
$
\
docsList
->
do
for_
(
liftM2
(,)
mb_docs
mb_parent
)
$
\
(
docsList
,
parentId
)
->
do
$
(
logLocM
)
INFO
$
"Found document list to import..."
let
payload
=
Jobs
.
ImportRemoteDocumentsPayload
loggedInUser
new_node
docsList
let
payload
=
Jobs
.
ImportRemoteDocumentsPayload
loggedInUser
parentId
new_node
docsList
void
$
sendJob
$
Jobs
.
ImportRemoteDocuments
payload
for_
mb_terms
$
\
ngramsList
->
do
$
(
logLocM
)
INFO
$
"Found ngrams list to import..."
void
$
sendJob
$
Jobs
.
JSONPost
{
_jp_list_id
=
new_node
,
_jp_ngrams_list
=
ngramsList
}
void
$
sendJob
$
Jobs
.
ImportRemoteTerms
$
Jobs
.
ImportRemoteTermsPayload
new_node
ngramsList
pure
new_node
insertTrees
::
Maybe
NodeId
->
[
NodeId
]
->
Tree
ExportableNode
->
m
[
NodeId
]
...
...
src/Gargantext/Core/Worker.hs
View file @
0d4e0554
...
...
@@ -26,14 +26,15 @@ import Data.Text qualified as T
import
Gargantext.API.Admin.Auth
(
forgotUserPassword
)
import
Gargantext.API.Admin.Auth.Types
(
ForgotPasswordAsyncParams
(
..
))
import
Gargantext.API.Ngrams.List
(
postAsyncJSON
)
import
Gargantext.API.Node.Corpus.Annuaire
qualified
as
Annuaire
import
Gargantext.API.Node.Contact
(
addContact
)
import
Gargantext.API.Node.Corpus.Annuaire
qualified
as
Annuaire
import
Gargantext.API.Node.Corpus.New
(
addToCorpusWithForm
,
addToCorpusWithQuery
)
import
Gargantext.API.Node.DocumentsFromWriteNodes
(
documentsFromWriteNodes
)
import
Gargantext.API.Node.DocumentUpload
(
documentUploadAsync
,
remoteImportDocuments
)
import
Gargantext.API.Node.FrameCalcUpload
(
frameCalcUploadAsync
)
import
Gargantext.API.Node.File
(
addWithFile
)
import
Gargantext.API.Node.FrameCalcUpload
(
frameCalcUploadAsync
)
import
Gargantext.API.Node.New
(
postNode'
)
import
Gargantext.API.Node.Update.Types
(
UpdateNodeParams
(
..
),
Granularity
(
..
))
import
Gargantext.API.Node.Update
(
updateNode
)
import
Gargantext.API.Server.Named.Ngrams
(
tableNgramsPostChartsAsync
)
import
Gargantext.Core.Config
(
hasConfig
,
gc_database_config
,
gc_jobs
,
gc_notifications_config
,
gc_worker
)
...
...
@@ -44,8 +45,8 @@ import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET
import
Gargantext.Core.Viz.Graph.API
(
graphRecompute
)
import
Gargantext.Core.Worker.Broker
(
initBrokerWithDBCreate
)
import
Gargantext.Core.Worker.Env
import
Gargantext.Core.Worker.Jobs.Types
(
Job
(
..
),
getWorkerMNodeId
,
ImportRemoteDocumentsPayload
(
..
),
ImportRemoteTermsPayload
(
..
))
import
Gargantext.Core.Worker.PGMQTypes
(
BrokerMessage
,
HasWorkerBroker
,
WState
)
import
Gargantext.Core.Worker.Jobs.Types
(
Job
(
..
),
getWorkerMNodeId
,
ImportRemoteDocumentsPayload
(
..
))
import
Gargantext.Core.Worker.Types
(
JobInfo
(
..
))
import
Gargantext.Database.Query.Table.User
(
getUsersWithEmail
)
import
Gargantext.Prelude
hiding
(
to
)
...
...
@@ -298,6 +299,17 @@ performAction env _state bm = do
void
$
documentUploadAsync
_ud_node_id
_ud_args
jh
-- | Remotely import documents
ImportRemoteDocuments
(
ImportRemoteDocumentsPayload
loggedInUser
corpusId
docs
)
->
runWorkerMonad
env
$
do
$
(
logLocM
)
DEBUG
$
"[performAction] import remote documents"
void
$
remoteImportDocuments
loggedInUser
corpusId
docs
ImportRemoteTerms
(
ImportRemoteTermsPayload
list_id
ngrams_list
)
->
runWorkerMonad
env
$
do
$
(
logLocM
)
DEBUG
$
"[performAction] import remote terms"
void
$
postAsyncJSON
list_id
ngrams_list
jh
-- Trigger an 'UpdateNode' job to update the score(s)
$
(
logLocM
)
DEBUG
$
"Updating node scores for corpus node "
<>
T
.
pack
(
show
list_id
)
void
$
updateNode
list_id
(
UpdateNodeParamsTexts
Both
)
jh
$
(
logLocM
)
DEBUG
$
"Done updating node scores for corpus node "
<>
T
.
pack
(
show
list_id
)
-- | Remotely import documents
ImportRemoteDocuments
(
ImportRemoteDocumentsPayload
loggedInUser
parentId
corpusId
docs
)
->
runWorkerMonad
env
$
do
$
(
logLocM
)
DEBUG
$
"[performAction] import remote documents"
void
$
remoteImportDocuments
loggedInUser
parentId
corpusId
docs
src/Gargantext/Core/Worker/Jobs.hs
View file @
0d4e0554
...
...
@@ -61,6 +61,8 @@ updateJobData (NgramsPostCharts {}) sj = sj { W.timeout = 3000 }
updateJobData
(
RecomputeGraph
{})
sj
=
sj
{
W
.
timeout
=
3000
}
updateJobData
(
UpdateNode
{})
sj
=
sj
{
W
.
timeout
=
3000
}
updateJobData
(
UploadDocument
{})
sj
=
sj
{
W
.
timeout
=
3000
}
updateJobData
(
ImportRemoteDocuments
{})
sj
=
sj
{
W
.
timeout
=
3000
}
updateJobData
(
ImportRemoteTerms
{})
sj
=
sj
{
W
.
timeout
=
3000
}
-- | ForgotPasswordAsync, PostNodeAsync
updateJobData
_
sj
=
sj
{
W
.
resendOnKill
=
False
,
W
.
timeout
=
60
}
src/Gargantext/Core/Worker/Jobs/Types.hs
View file @
0d4e0554
...
...
@@ -31,12 +31,31 @@ import Gargantext.API.Node.New.Types ( PostNode(..) )
import
Gargantext.API.Node.Types
(
NewWithFile
,
NewWithForm
,
WithQuery
(
..
))
import
Gargantext.API.Node.Update.Types
(
UpdateNodeParams
)
import
Gargantext.Core.Types.Individu
(
User
)
import
Gargantext.Database.Admin.Types.Node
(
AnnuaireId
,
CorpusId
,
ListId
,
NodeId
(
UnsafeMkNodeId
))
import
Gargantext.Database.Admin.Types.Node
(
AnnuaireId
,
CorpusId
,
ListId
,
NodeId
(
UnsafeMkNodeId
)
,
ParentId
)
import
Gargantext.Prelude
data
ImportRemoteTermsPayload
=
ImportRemoteTermsPayload
{
_irtp_list_id
::
ListId
,
_irtp_ngrams_list
::
NgramsList
}
deriving
(
Show
,
Eq
)
instance
ToJSON
ImportRemoteTermsPayload
where
toJSON
ImportRemoteTermsPayload
{
..
}
=
object
[
"list_id"
.=
_irtp_list_id
,
"ngrams_list"
.=
_irtp_ngrams_list
]
instance
FromJSON
ImportRemoteTermsPayload
where
parseJSON
=
withObject
"ImportRemoteTermsPayload"
$
\
o
->
do
_irtp_list_id
<-
o
.:
"list_id"
_irtp_ngrams_list
<-
o
.:
"ngrams_list"
pure
ImportRemoteTermsPayload
{
..
}
data
ImportRemoteDocumentsPayload
=
ImportRemoteDocumentsPayload
{
_irdp_user
::
AuthenticatedUser
,
_irdp_parent_id
::
ParentId
,
_irdp_corpus_id
::
NodeId
,
_irdp_document_export
::
DocumentExport
}
deriving
(
Show
,
Eq
)
...
...
@@ -45,12 +64,14 @@ instance ToJSON ImportRemoteDocumentsPayload where
toJSON
ImportRemoteDocumentsPayload
{
..
}
=
object
[
"user"
.=
_irdp_user
,
"corpus_id"
.=
_irdp_corpus_id
,
"parent_id"
.=
_irdp_parent_id
,
"document_export"
.=
_irdp_document_export
]
instance
FromJSON
ImportRemoteDocumentsPayload
where
parseJSON
=
withObject
"ImportRemoteDocumentsPayload"
$
\
o
->
do
_irdp_user
<-
o
.:
"user"
_irdp_parent_id
<-
o
.:
"parent_id"
_irdp_corpus_id
<-
o
.:
"corpus_id"
_irdp_document_export
<-
o
.:
"document_export"
pure
ImportRemoteDocumentsPayload
{
..
}
...
...
@@ -91,6 +112,7 @@ data Job =
|
UploadDocument
{
_ud_node_id
::
NodeId
,
_ud_args
::
DocumentUpload
}
|
ImportRemoteDocuments
!
ImportRemoteDocumentsPayload
|
ImportRemoteTerms
!
ImportRemoteTermsPayload
deriving
(
Show
,
Eq
)
instance
FromJSON
Job
where
parseJSON
=
withObject
"Job"
$
\
o
->
do
...
...
@@ -160,6 +182,8 @@ instance FromJSON Job where
return
$
UploadDocument
{
..
}
"ImportRemoteDocuments"
->
ImportRemoteDocuments
<$>
parseJSON
(
JS
.
Object
o
)
"ImportRemoteTerms"
->
ImportRemoteTerms
<$>
parseJSON
(
JS
.
Object
o
)
s
->
prependFailure
"parsing job type failed, "
(
typeMismatch
"type"
s
)
instance
ToJSON
Job
where
toJSON
Ping
=
object
[
"type"
.=
(
"Ping"
::
Text
)
]
...
...
@@ -230,6 +254,12 @@ instance ToJSON Job where
let
o1
=
KM
.
fromList
[
(
"type"
,
toJSON
@
T
.
Text
"ImportRemoteDocuments"
)
]
in
JS
.
Object
$
o1
<>
o
_
->
errorTrace
"impossible, toJSON ImportRemoteDocuments did not return an Object."
toJSON
(
ImportRemoteTerms
payload
)
=
case
toJSON
payload
of
(
JS
.
Object
o
)
->
let
o1
=
KM
.
fromList
[
(
"type"
,
toJSON
@
T
.
Text
"ImportRemoteTerms"
)
]
in
JS
.
Object
$
o1
<>
o
_
->
errorTrace
"impossible, toJSON ImportRemoteTerms did not return an Object."
-- | We want to have a way to specify 'Maybe NodeId' from given worker
-- parameters. The given 'Maybe CorpusId' is an alternative, when
...
...
@@ -253,4 +283,5 @@ getWorkerMNodeId (PostNodeAsync { _pna_node_id }) = Just _pna_node_id
getWorkerMNodeId
(
RecomputeGraph
{
_rg_node_id
})
=
Just
_rg_node_id
getWorkerMNodeId
(
UpdateNode
{
_un_node_id
})
=
Just
_un_node_id
getWorkerMNodeId
(
UploadDocument
{
_ud_node_id
})
=
Just
_ud_node_id
getWorkerMNodeId
(
ImportRemoteDocuments
(
ImportRemoteDocumentsPayload
_
corpusId
_
))
=
Just
corpusId
getWorkerMNodeId
(
ImportRemoteDocuments
(
ImportRemoteDocumentsPayload
_
_
corpusId
_
))
=
Just
corpusId
getWorkerMNodeId
(
ImportRemoteTerms
(
ImportRemoteTermsPayload
listId
_
))
=
Just
listId
Przemyslaw Kaminski
@cgenie
mentioned in commit
942e663f
·
Jan 29, 2025
mentioned in commit
942e663f
mentioned in commit 942e663f539b287b4cc0469fe2bcf735813b4ff2
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment