Commit 504b2514 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Thread old compute time in flowPhyloAPI

This allows us to collect a /history/ of computing times.
parent 7beb5d72
Pipeline #6899 failed with stages
in 49 minutes and 22 seconds
......@@ -9,12 +9,13 @@ Portability : POSIX
-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.Update
where
import Control.Lens (view)
import Control.Lens (view, (^?), _Just)
import Data.Set qualified as Set
import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
import Gargantext.API.Admin.Orchestrator.Types (AsyncJobs(..))
......@@ -28,16 +29,16 @@ import Gargantext.Core.NodeStory.Types (HasNodeStory)
import Gargantext.Core.Text.Ngrams (NgramsType(NgramsTerms))
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Graph.API (recomputeGraph)
import Gargantext.Core.Viz.Phylo (subConfigAPI2config)
import Gargantext.Core.Viz.Phylo (subConfigAPI2config, phylo_computeTime)
import Gargantext.Core.Viz.Phylo.API.Tools (flowPhyloAPI)
import Gargantext.Database.Action.Flow (reIndexWith)
import Gargantext.Database.Action.Flow.Pairing (pairing)
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore)
import Gargantext.Database.Admin.Types.Hyperdata.Phylo ( HyperdataPhylo(HyperdataPhylo) )
import Gargantext.Database.Admin.Types.Hyperdata.Phylo ( HyperdataPhylo(..), hp_data )
import Gargantext.Database.Admin.Types.Node ( NodeId, NodeType(NodeCorpus, NodeAnnuaire, NodeTexts, NodeGraph, NodePhylo, NodeList) )
import Gargantext.Database.Query.Table.Node (defaultList, getNode, getChildrenByType)
import Gargantext.Database.Query.Table.Node (defaultList, getNode, getChildrenByType, getNodeWith)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Node (node_parent_id)
import Gargantext.Database.Schema.Node (node_parent_id, node_hyperdata)
import Gargantext.Prelude
import Gargantext.System.Logging ( MonadLogger )
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
......@@ -111,12 +112,14 @@ updateNode lId (UpdateNodeParamsList _mode) jobHandle = do
updateNode phyloId (UpdateNodePhylo config) jobHandle = do
markStarted 3 jobHandle
corpusId' <- view node_parent_id <$> getNode phyloId
oldPhylo <- getNodeWith phyloId (Proxy @HyperdataPhylo)
let corpusId' = view node_parent_id oldPhylo
let mbComputeHistory = oldPhylo ^? node_hyperdata . hp_data . traverse . phylo_computeTime . _Just
markProgress 1 jobHandle
let corpusId = fromMaybe (panicTrace "no corpus id") corpusId'
phy <- timeMeasured "updateNode.flowPhyloAPI" $ flowPhyloAPI (subConfigAPI2config config) corpusId
phy <- timeMeasured "updateNode.flowPhyloAPI" $ flowPhyloAPI (subConfigAPI2config config) mbComputeHistory corpusId
markProgress 2 jobHandle
{-
......
......@@ -101,7 +101,9 @@ postPhylo phyloId = Named.PostPhylo $ \_lId -> do
-- _sft = Just (Software "Gargantext" "4")
-- _prm = initPhyloParam vrs sft (Just q)
corpusId <- getClosestParentIdByType phyloId NodeCorpus
phy <- flowPhyloAPI defaultConfig (fromMaybe (panicTrace "[G.C.V.P.API] no corpus ID found") corpusId) -- params
-- Being the first time we ask for the Phylo, there is no historical data
-- available about computing time, so we pass 'Nothing'.
phy <- flowPhyloAPI defaultConfig Nothing (fromMaybe (panicTrace "[G.C.V.P.API] no corpus ID found") corpusId) -- params
-- phyloId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just corpusId) userId]
_ <- updateHyperdata phyloId (HyperdataPhylo Nothing (Just phy))
pure phyloId
......
......@@ -33,7 +33,7 @@ import Gargantext.Core.NodeStory.Types (HasNodeStory)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, termsInText)
import Gargantext.Core.Types.Main (ListType(MapTerm))
import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo, trackComputeTime)
import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo (_phylo_computeTime), trackComputeTime, ComputeTimeHistory)
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} setConfig)
......@@ -111,8 +111,12 @@ phylo2dot phylo = do
flowPhyloAPI :: (HasNodeStory env err m, HasNodeError err, MonadLogger m)
=> PhyloConfig -> CorpusId -> m Phylo
flowPhyloAPI config cId = do
=> PhyloConfig
-> Maybe ComputeTimeHistory
-- ^ Previous compute time historical data, if any.
-> CorpusId
-> m Phylo
flowPhyloAPI config mbOldComputeHistory cId = do
corpus <- timeMeasured "flowPhyloAPI.corpusIdtoDocuments" $ corpusIdtoDocuments (timeUnit config) cId
-- writePhylo phyloWithCliquesFile phyloWithCliques
$(logLocM) DEBUG $ "PhyloConfig old: " <> show config
......@@ -121,7 +125,9 @@ flowPhyloAPI config cId = do
(t2, phyloConfigured) <- timeMeasured'' DEBUG "flowPhyloAPI.phyloConfigured" (pure $! setConfig config phyloWithCliques)
(t3, finalPhylo) <- timeMeasured'' DEBUG "flowPhyloAPI.toPhylo" (pure $! toPhylo phyloConfigured)
pure $! trackComputeTime (t1 + t2 + t3) finalPhylo
-- As the phylo is computed fresh every time, without looking at the one stored (if any), we
-- have to manually propagate computing time across.
pure $! trackComputeTime (t1 + t2 + t3) (finalPhylo { _phylo_computeTime = mbOldComputeHistory })
--------------------------------------------------------------------
corpusIdtoDocuments :: (HasNodeStory env err m, HasNodeError err)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment