[worker] various fixes

- fix openalex parser
- fix cli message about 'init' needed
- more granular progress reports for node updates
- fix error message when worker fails (though I'm not sure such a
- detailed message is neede for the end user?)
parent 945fd8d0
Pipeline #7359 failed with stages
in 24 minutes and 18 seconds
...@@ -104,7 +104,7 @@ source-repository-package ...@@ -104,7 +104,7 @@ source-repository-package
source-repository-package source-repository-package
type: git type: git
location: https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git location: https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git
tag: a80e0ea57379d23f5e18a412606a71471b8ef681 tag: d2949cdeaaf3193e2b53e06bcae1d2234581e519
source-repository-package source-repository-package
type: git type: git
......
...@@ -101,7 +101,7 @@ startGargantext mode port sf@(SettingsFile settingsFile) = withLoggerHoisted mod ...@@ -101,7 +101,7 @@ startGargantext mode port sf@(SettingsFile settingsFile) = withLoggerHoisted mod
case r of case r of
Right True -> pure () Right True -> pure ()
Right False -> panicTrace $ Right False -> panicTrace $
"You must run 'gargantext init " <> pack settingsFile <> "You must run 'gargantext init -c " <> pack settingsFile <>
"' before running gargantext-server (only the first time)." "' before running gargantext-server (only the first time)."
Left err -> panicTrace $ "Unexpected exception:" <> show err Left err -> panicTrace $ "Unexpected exception:" <> show err
oneHour = Clock.fromNanoSecs 3600_000_000_000 oneHour = Clock.fromNanoSecs 3600_000_000_000
......
...@@ -10,7 +10,6 @@ Portability : POSIX ...@@ -10,7 +10,6 @@ Portability : POSIX
-} -}
{-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.Update module Gargantext.API.Node.Update
where where
...@@ -67,6 +66,7 @@ updateNode nId (UpdateNodeParamsGraph ...@@ -67,6 +66,7 @@ updateNode nId (UpdateNodeParamsGraph
(UpdateNodeConfigGraph metric partitionMethod bridgeMethod strength nt1 nt2)) jobHandle = do (UpdateNodeConfigGraph metric partitionMethod bridgeMethod strength nt1 nt2)) jobHandle = do
markStarted 2 jobHandle markStarted 2 jobHandle
markProgress 1 jobHandle
-- printDebug "Computing graph: " method -- printDebug "Computing graph: " method
_ <- recomputeGraph nId partitionMethod bridgeMethod (Just metric) (Just strength) nt1 nt2 True _ <- recomputeGraph nId partitionMethod bridgeMethod (Just metric) (Just strength) nt1 nt2 True
-- printDebug "Graph computed: " method -- printDebug "Graph computed: " method
...@@ -74,6 +74,7 @@ updateNode nId (UpdateNodeParamsGraph ...@@ -74,6 +74,7 @@ updateNode nId (UpdateNodeParamsGraph
updateNode nid1 (LinkNodeReq nt nid2) jobHandle = do updateNode nid1 (LinkNodeReq nt nid2) jobHandle = do
markStarted 2 jobHandle markStarted 2 jobHandle
markProgress 1 jobHandle
_ <- case nt of _ <- case nt of
NodeAnnuaire -> pairing nid2 nid1 Nothing -- defaultList NodeAnnuaire -> pairing nid2 nid1 Nothing -- defaultList
NodeCorpus -> pairing nid1 nid2 Nothing -- defaultList NodeCorpus -> pairing nid1 nid2 Nothing -- defaultList
...@@ -84,7 +85,7 @@ updateNode nid1 (LinkNodeReq nt nid2) jobHandle = do ...@@ -84,7 +85,7 @@ updateNode nid1 (LinkNodeReq nt nid2) jobHandle = do
-- | `Advanced` to update graphs -- | `Advanced` to update graphs
updateNode lId (UpdateNodeParamsList Advanced) jobHandle = do updateNode lId (UpdateNodeParamsList Advanced) jobHandle = do
markStarted 3 jobHandle markStarted 4 jobHandle
corpusId <- view node_parent_id <$> getNode lId corpusId <- view node_parent_id <$> getNode lId
markProgress 1 jobHandle markProgress 1 jobHandle
...@@ -92,7 +93,9 @@ updateNode lId (UpdateNodeParamsList Advanced) jobHandle = do ...@@ -92,7 +93,9 @@ updateNode lId (UpdateNodeParamsList Advanced) jobHandle = do
_ <- case corpusId of _ <- case corpusId of
Just cId -> do Just cId -> do
_ <- Metrics.updatePie cId (Just lId) NgramsTypes.Authors Nothing _ <- Metrics.updatePie cId (Just lId) NgramsTypes.Authors Nothing
markProgress 1 jobHandle
_ <- Metrics.updateTree cId (Just lId) NgramsTypes.Institutes MapTerm _ <- Metrics.updateTree cId (Just lId) NgramsTypes.Institutes MapTerm
markProgress 1 jobHandle
_ <- Metrics.updatePie cId (Just lId) NgramsTypes.Sources Nothing _ <- Metrics.updatePie cId (Just lId) NgramsTypes.Sources Nothing
pure () pure ()
Nothing -> pure () Nothing -> pure ()
...@@ -108,6 +111,7 @@ updateNode lId (UpdateNodeParamsList _mode) jobHandle = do ...@@ -108,6 +111,7 @@ updateNode lId (UpdateNodeParamsList _mode) jobHandle = do
_ <- case corpusId of _ <- case corpusId of
Just cId -> do Just cId -> do
_ <- reIndexWith cId lId NgramsTerms (Set.singleton MapTerm) _ <- reIndexWith cId lId NgramsTerms (Set.singleton MapTerm)
markProgress 1 jobHandle
_ <- updateNgramsOccurrences cId lId _ <- updateNgramsOccurrences cId lId
pure () pure ()
Nothing -> pure () Nothing -> pure ()
...@@ -124,7 +128,7 @@ updateNode phyloId (UpdateNodePhylo config) jobHandle = do ...@@ -124,7 +128,7 @@ updateNode phyloId (UpdateNodePhylo config) jobHandle = do
let corpusId = fromMaybe (panicTrace "no corpus id") corpusId' let corpusId = fromMaybe (panicTrace "no corpus id") corpusId'
phy <- timeMeasured "updateNode.flowPhyloAPI" $ flowPhyloAPI (subConfigAPI2config config) mbComputeHistory corpusId phy <- timeMeasured "updateNode.flowPhyloAPI" $ flowPhyloAPI (subConfigAPI2config config) mbComputeHistory corpusId
markProgress 2 jobHandle markProgress 1 jobHandle
{- {-
logStatus JobLog { _scst_succeeded = Just 2 logStatus JobLog { _scst_succeeded = Just 2
...@@ -140,12 +144,12 @@ updateNode phyloId (UpdateNodePhylo config) jobHandle = do ...@@ -140,12 +144,12 @@ updateNode phyloId (UpdateNodePhylo config) jobHandle = do
markComplete jobHandle markComplete jobHandle
updateNode tId (UpdateNodeParamsTexts _mode) jobHandle = do updateNode tId (UpdateNodeParamsTexts _mode) jobHandle = do
markStarted 3 jobHandle markStarted 2 jobHandle
corpusId <- view node_parent_id <$> getNode tId corpusId <- view node_parent_id <$> getNode tId
markProgress 1 jobHandle markProgress 1 jobHandle
_ <- case corpusId of _ <- case corpusId of
Just cId -> updateDocs cId Just cId -> updateDocs cId jobHandle
Nothing -> do Nothing -> do
_ <- panicTrace "[G.A.N.Update] updateNode/UpdateNodeParamsText: no corpus Id given" _ <- panicTrace "[G.A.N.Update] updateNode/UpdateNodeParamsText: no corpus Id given"
pure () pure ()
...@@ -155,7 +159,7 @@ updateNode tId (UpdateNodeParamsTexts _mode) jobHandle = do ...@@ -155,7 +159,7 @@ updateNode tId (UpdateNodeParamsTexts _mode) jobHandle = do
updateNode tId updateNode tId
(UpdateNodeParamsCorpus methodGraph methodPhylo methodTexts methodList) (UpdateNodeParamsCorpus methodGraph methodPhylo methodTexts methodList)
jobHandle = do jobHandle = do
markStarted 3 jobHandle markStarted 5 jobHandle
markProgress 1 jobHandle markProgress 1 jobHandle
_ <- getNode tId _ <- getNode tId
...@@ -165,8 +169,11 @@ updateNode tId ...@@ -165,8 +169,11 @@ updateNode tId
childNodeLists <- getChildrenByType tId NodeList childNodeLists <- getChildrenByType tId NodeList
mapM_ (\cId -> updateNode cId (UpdateNodeParamsTexts methodTexts) jobHandle) childTexts mapM_ (\cId -> updateNode cId (UpdateNodeParamsTexts methodTexts) jobHandle) childTexts
markProgress 1 jobHandle
mapM_ (\cId -> updateNode cId (UpdateNodeParamsGraph methodGraph) jobHandle) childGraphs mapM_ (\cId -> updateNode cId (UpdateNodeParamsGraph methodGraph) jobHandle) childGraphs
markProgress 1 jobHandle
mapM_ (\cId -> updateNode cId (UpdateNodePhylo methodPhylo) jobHandle) childPhylos mapM_ (\cId -> updateNode cId (UpdateNodePhylo methodPhylo) jobHandle) childPhylos
markProgress 1 jobHandle
mapM_ (\cId -> updateNode cId (UpdateNodeParamsList methodList) jobHandle) childNodeLists mapM_ (\cId -> updateNode cId (UpdateNodeParamsList methodList) jobHandle) childNodeLists
markComplete jobHandle markComplete jobHandle
...@@ -175,14 +182,23 @@ updateNode _nId _p jobHandle = do ...@@ -175,14 +182,23 @@ updateNode _nId _p jobHandle = do
simuLogs jobHandle 10 simuLogs jobHandle 10
------------------------------------------------------------------------ ------------------------------------------------------------------------
updateDocs :: (HasNodeStory env err m) updateDocs :: ( HasNodeStory env err m
=> NodeId -> m () , MonadJobStatus m
updateDocs cId = do , MonadLogger m )
=> NodeId
-> JobHandle m
-> m ()
updateDocs cId jobHandle = do
markStarted 4 jobHandle
lId <- defaultList cId lId <- defaultList cId
_ <- reIndexWith cId lId NgramsTerms (Set.singleton MapTerm) _ <- reIndexWith cId lId NgramsTerms (Set.singleton MapTerm)
markProgress 1 jobHandle
_ <- updateNgramsOccurrences cId lId _ <- updateNgramsOccurrences cId lId
markProgress 1 jobHandle
_ <- updateContextScore cId lId _ <- updateContextScore cId lId
markProgress 1 jobHandle
_ <- Metrics.updateChart' cId lId NgramsTypes.Docs Nothing _ <- Metrics.updateChart' cId lId NgramsTypes.Docs Nothing
markProgress 1 jobHandle
-- printDebug "updateContextsScore" (cId, lId, u) -- printDebug "updateContextsScore" (cId, lId, u)
pure () pure ()
...@@ -137,7 +137,7 @@ notifyJobFailed env (W.State { name }) bm exc = do ...@@ -137,7 +137,7 @@ notifyJobFailed env (W.State { name }) bm exc = do
let ji = JobInfo { _ji_message_id = messageId bm let ji = JobInfo { _ji_message_id = messageId bm
, _ji_mNode_id = getWorkerMNodeId job } , _ji_mNode_id = getWorkerMNodeId job }
let jh = WorkerJobHandle { _w_job_info = ji } let jh = WorkerJobHandle { _w_job_info = ji }
runWorkerMonad env $ markFailed (Just $ UnsafeMkHumanFriendlyErrorText "Worker job failed") jh runWorkerMonad env $ markFailed (Just $ UnsafeMkHumanFriendlyErrorText $ "Worker job failed: " <> show exc) jh
notifyJobKilled :: (HasWorkerBroker, HasCallStack) notifyJobKilled :: (HasWorkerBroker, HasCallStack)
=> WorkerEnv => WorkerEnv
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment