[phylo] add more progress reporting to phylo

Also, factor out the common strategy used by par.
parent 1a7797aa
......@@ -131,7 +131,7 @@ updateNode phyloId (UpdateNodePhylo config) jobHandle = do
let corpusId = fromMaybe (panicTrace "no corpus id") corpusId'
phy <- timeMeasured "updateNode.flowPhyloAPI" $ flowPhyloAPI (subConfigAPI2config config) mbComputeHistory corpusId
phy <- timeMeasured "updateNode.flowPhyloAPI" $ flowPhyloAPI (subConfigAPI2config config) mbComputeHistory corpusId jobHandle
markProgress 1 jobHandle
{-
......
......@@ -49,6 +49,7 @@ import Gargantext.Database.Schema.Context ( ContextPoly(_context_hyperdata, _con
import Gargantext.Database.Schema.Node ( NodePoly(_node_hyperdata), node_hyperdata )
import Gargantext.Prelude hiding (to)
import Gargantext.System.Logging ( MonadLogger, LogLevel(DEBUG), logLocM )
import Gargantext.Utils.Jobs.Monad (JobHandle, MonadJobStatus(markProgress, addMoreSteps))
import Gargantext.Utils.UTCTime (timeMeasured, timeMeasured'')
import Prelude qualified
import System.FilePath ((</>))
......@@ -110,25 +111,35 @@ phylo2dot phylo = do
_ -> pure value
flowPhyloAPI :: (HasNodeStory env err m, HasNodeError err, MonadLogger m)
flowPhyloAPI :: (HasNodeStory env err m, HasNodeError err, MonadLogger m, MonadJobStatus m)
=> PhyloConfig
-> Maybe ComputeTimeHistory
-- ^ Previous compute time historical data, if any.
-> CorpusId
-> JobHandle m
-> m Phylo
flowPhyloAPI config mbOldComputeHistory cId = do
flowPhyloAPI config mbOldComputeHistory cId jobHandle = do
env <- view hasNodeStory
addMoreSteps 5 jobHandle
corpus <- timeMeasured "flowPhyloAPI.corpusIdtoDocuments" $ runDBQuery $ corpusIdtoDocuments env (timeUnit config) cId
markProgress 1 jobHandle
-- writePhylo phyloWithCliquesFile phyloWithCliques
$(logLocM) DEBUG $ "PhyloConfig old: " <> show config
(t1, phyloWithCliques) <- timeMeasured'' DEBUG "flowPhyloAPI.phyloWithCliques" (pure $! toPhyloWithoutLink corpus config)
markProgress 1 jobHandle
(t2, phyloConfigured) <- timeMeasured'' DEBUG "flowPhyloAPI.phyloConfigured" (pure $! setConfig config phyloWithCliques)
markProgress 1 jobHandle
(t3, finalPhylo) <- timeMeasured'' DEBUG "flowPhyloAPI.toPhylo" (pure $! toPhylo phyloConfigured)
markProgress 1 jobHandle
-- As the phylo is computed fresh every time, without looking at the one stored (if any), we
-- have to manually propagate computing time across.
pure $! trackComputeTime (t1 + t2 + t3) (finalPhylo { _phylo_computeTime = mbOldComputeHistory })
let ret = trackComputeTime (t1 + t2 + t3) (finalPhylo { _phylo_computeTime = mbOldComputeHistory })
markProgress 1 jobHandle
pure ret
--------------------------------------------------------------------
corpusIdtoDocuments :: HasNodeError err
......
......@@ -16,7 +16,7 @@ module Gargantext.Core.Viz.Phylo.PhyloMaker where
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parMap, rpar)
import Control.Parallel.Strategies (parMap, rpar, Strategy)
import Data.Containers.ListUtils (nubOrd)
import Data.Discrimination qualified as D
import Data.List (partition, intersect, tail)
......@@ -37,6 +37,13 @@ import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (toPhyloQuality, temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toSimilarity)
import Gargantext.Prelude hiding (empty, toList)
defaultStrategy :: Strategy a
defaultStrategy = rpar
------------------
-- | To Phylo | --
------------------
......@@ -135,7 +142,7 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
--------
-- 1.1) for each measure of similarity, prune the flat phylo, compute the branches and estimate the quality
qua :: [Double]
qua = parMap rpar (\thr ->
qua = parMap defaultStrategy (\thr ->
let edges = filter (\edge -> snd edge >= thr) graph
nodes = nubOrd $ concat $ map (\((n,n'),_) -> [n,n']) edges
branches = toRelatedComponents nodes edges
......@@ -176,7 +183,7 @@ findSeaLadder phylo = case getSeaElevation phylo of
docs = filterDocs (getDocsByDate phylo) ([period] ++ next)
diagos = filterDiago (getCoocByDate phylo) ([period] ++ next)
-- 1.2) compute the kinship similarities between pairs of source & target in parallel
pairs = parMap rpar (\source ->
pairs = parMap defaultStrategy (\source ->
let candidates = filter (\target -> (> 2) $ length
$ intersect (getGroupNgrams source) (getGroupNgrams target)) targets
in map (\target ->
......@@ -314,7 +321,7 @@ filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. clustering_roots) >=
-- To filter nested Fis
filterCliqueByNested :: Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
filterCliqueByNested m =
let clq = parMap rpar (\l ->
let clq = parMap defaultStrategy (\l ->
foldl (\mem f -> if (any (\f' -> isNested (f' ^. clustering_roots) (f ^. clustering_roots)) mem)
then mem
else
......@@ -342,7 +349,7 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
seriesOfClustering :: Map (Date,Date) [Clustering]
seriesOfClustering = case (clique $ getConfig phylo) of
Fis _ _ ->
let fis = parMap rpar (\(prd,docs) ->
let fis = parMap defaultStrategy (\(prd,docs) ->
case (corpusParser $ getConfig phylo) of
Tsv' _ -> let lst = toList
$ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs)
......@@ -354,7 +361,7 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
$ toList phyloDocs
in fromList fis
MaxClique _ thr filterType ->
let mcl = parMap rpar (\(prd,docs) ->
let mcl = parMap defaultStrategy (\(prd,docs) ->
let cooc = map round
$ foldl sumCooc empty
$ map listToMatrix
......@@ -406,7 +413,7 @@ groupDocsByPeriodRec f prds docs acc =
groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
periods = parMap rpar (inPeriode f docs') pds
periods = parMap defaultStrategy (inPeriode f docs') pds
in tracePhylo ("\n" <> "-- | Group "
<> show(length docs)
<> " docs by "
......@@ -424,7 +431,7 @@ groupDocsByPeriod' f pds docs =
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es =
let periods = parMap rpar (inPeriode f es) pds
let periods = parMap defaultStrategy (inPeriode f es) pds
in tracePhylo ("\n" <> "-- | Group "
<> show(length es) <> " docs by "
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment