Commit 7b630704 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge remote-tracking branch 'origin/adinapoli/issue-409' into dev

parents c1eede02 c08eb0a0
......@@ -9,12 +9,13 @@ Portability : POSIX
-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.Update
where
import Control.Lens (view)
import Control.Lens (view, (^?), _Just)
import Data.Set qualified as Set
import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
import Gargantext.API.Admin.Orchestrator.Types (AsyncJobs(..))
......@@ -28,16 +29,16 @@ import Gargantext.Core.NodeStory.Types (HasNodeStory)
import Gargantext.Core.Text.Ngrams (NgramsType(NgramsTerms))
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Graph.API (recomputeGraph)
import Gargantext.Core.Viz.Phylo (subConfigAPI2config)
import Gargantext.Core.Viz.Phylo (subConfigAPI2config, phylo_computeTime)
import Gargantext.Core.Viz.Phylo.API.Tools (flowPhyloAPI)
import Gargantext.Database.Action.Flow (reIndexWith)
import Gargantext.Database.Action.Flow.Pairing (pairing)
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore)
import Gargantext.Database.Admin.Types.Hyperdata.Phylo ( HyperdataPhylo(HyperdataPhylo) )
import Gargantext.Database.Admin.Types.Hyperdata.Phylo ( HyperdataPhylo(..), hp_data )
import Gargantext.Database.Admin.Types.Node ( NodeId, NodeType(NodeCorpus, NodeAnnuaire, NodeTexts, NodeGraph, NodePhylo, NodeList) )
import Gargantext.Database.Query.Table.Node (defaultList, getNode, getChildrenByType)
import Gargantext.Database.Query.Table.Node (defaultList, getNode, getChildrenByType, getNodeWith)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Node (node_parent_id)
import Gargantext.Database.Schema.Node (node_parent_id, node_hyperdata)
import Gargantext.Prelude
import Gargantext.System.Logging ( MonadLogger )
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
......@@ -111,12 +112,14 @@ updateNode lId (UpdateNodeParamsList _mode) jobHandle = do
updateNode phyloId (UpdateNodePhylo config) jobHandle = do
markStarted 3 jobHandle
corpusId' <- view node_parent_id <$> getNode phyloId
oldPhylo <- getNodeWith phyloId (Proxy @HyperdataPhylo)
let corpusId' = view node_parent_id oldPhylo
let mbComputeHistory = oldPhylo ^? node_hyperdata . hp_data . traverse . phylo_computeTime . _Just
markProgress 1 jobHandle
let corpusId = fromMaybe (panicTrace "no corpus id") corpusId'
phy <- timeMeasured "updateNode.flowPhyloAPI" $ flowPhyloAPI (subConfigAPI2config config) corpusId
phy <- timeMeasured "updateNode.flowPhyloAPI" $ flowPhyloAPI (subConfigAPI2config config) mbComputeHistory corpusId
markProgress 2 jobHandle
{-
......
......@@ -23,16 +23,23 @@ one 8, e54847.
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE TypeApplications #-}
module Gargantext.Core.Viz.Phylo where
import Control.Lens (over)
import Data.Swagger
import Data.Text (pack)
import Data.Text.Lazy qualified as TextLazy
import Data.TreeDiff (ToExpr)
import Data.Text (pack)
import Data.TreeDiff (ToExpr (..))
import Data.Vector (Vector)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Prelude
import Gargantext.Utils.UTCTime (ElapsedSeconds)
import qualified Data.Aeson.Types as JS
import qualified Data.List.NonEmpty as NE
import Test.QuickCheck
import Test.QuickCheck.Instances.Text()
import Test.QuickCheck.Instances.Vector()
......@@ -110,8 +117,6 @@ data Synchrony =
instance ToSchema Synchrony where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
data TimeUnit =
Epoch
{ _epoch_period :: Int
......@@ -425,7 +430,13 @@ type Period = (Date,Date)
type PeriodStr = (DateStr,DateStr)
newtype ComputeTimeHistory
= ComputeTimeHistory (NonEmpty ElapsedSeconds)
deriving stock (Show, Eq, Generic)
deriving newtype ToExpr
instance ToSchema ComputeTimeHistory where
declareNamedSchema _ = declareNamedSchema (Proxy @[ElapsedSeconds])
-- | Phylo datatype of a phylomemy
-- foundations : the foundations of the phylo
......@@ -442,6 +453,12 @@ data Phylo =
, _phylo_periods :: Map Period PhyloPeriod
, _phylo_quality :: Double
, _phylo_level :: Double
-- See #409, store historical data on
-- how many seconds it took to generate
-- a given phylomemy graph, to give a rough
-- estimate to end users. The field is optional
-- to make it backward compatible.
, _phylo_computeTime :: !(Maybe ComputeTimeHistory)
}
deriving (Generic, Show, Eq, ToExpr)
......@@ -685,6 +702,15 @@ instance ToJSON Software
instance FromJSON PhyloGroup
instance ToJSON PhyloGroup
instance ToJSON ComputeTimeHistory where
toJSON = \case
ComputeTimeHistory runs
-> toJSON runs
instance FromJSON ComputeTimeHistory where
parseJSON (JS.Array runs) = ComputeTimeHistory <$> parseJSON (JS.Array runs)
parseJSON ty = JS.typeMismatch "ComputeTimeHistory" ty
$(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)
instance FromJSON Phylo
......@@ -708,6 +734,7 @@ instance NFData PhyloParam
instance NFData PhyloFoundations
instance NFData PhyloCounts
instance NFData PhyloSources
instance NFData ComputeTimeHistory
instance NFData Phylo
instance NFData PhyloPeriod
instance NFData PhyloScale
......@@ -798,3 +825,36 @@ instance Arbitrary Filter where
instance Arbitrary PhyloParam where
arbitrary = pure defaultPhyloParam
instance Arbitrary ComputeTimeHistory where
arbitrary = oneof [ ComputeTimeHistory . NE.fromList . getNonEmpty <$> arbitrary ]
-- The 'resize' ensure our tests won't take too long as
-- we won't be generating very long lists.
instance Arbitrary Phylo where
arbitrary = Phylo <$> resize 6 arbitrary
<*> resize 6 arbitrary
<*> resize 6 arbitrary
<*> resize 6 arbitrary
<*> resize 6 arbitrary
<*> resize 6 arbitrary
<*> resize 6 arbitrary
<*> resize 6 arbitrary
<*> resize 6 arbitrary
--
-- Functions that uses the lenses
--
-- | Adds the input 'ElapsedSeconds' to the 'Philo', in the 'ComputeTimeHistory'.
trackComputeTime :: ElapsedSeconds -> Phylo -> Phylo
trackComputeTime elapsedSecs = over phylo_computeTime update_time
where
-- In case we have more than one historical data available, we take only the last 5
-- runs, to not make the list unbounded.
update_time :: Maybe ComputeTimeHistory -> Maybe ComputeTimeHistory
update_time Nothing
= Just $ ComputeTimeHistory (NE.singleton elapsedSecs)
update_time (Just (ComputeTimeHistory (r NE.:| runs))) =
Just $ ComputeTimeHistory (elapsedSecs NE.:| (r : take 3 runs))
......@@ -101,7 +101,9 @@ postPhylo phyloId = Named.PostPhylo $ \_lId -> do
-- _sft = Just (Software "Gargantext" "4")
-- _prm = initPhyloParam vrs sft (Just q)
corpusId <- getClosestParentIdByType phyloId NodeCorpus
phy <- flowPhyloAPI defaultConfig (fromMaybe (panicTrace "[G.C.V.P.API] no corpus ID found") corpusId) -- params
-- Being the first time we ask for the Phylo, there is no historical data
-- available about computing time, so we pass 'Nothing'.
phy <- flowPhyloAPI defaultConfig Nothing (fromMaybe (panicTrace "[G.C.V.P.API] no corpus ID found") corpusId) -- params
-- phyloId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just corpusId) userId]
_ <- updateHyperdata phyloId (HyperdataPhylo Nothing (Just phy))
pure phyloId
......
......@@ -33,7 +33,7 @@ import Gargantext.Core.NodeStory.Types (HasNodeStory)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, termsInText)
import Gargantext.Core.Types.Main (ListType(MapTerm))
import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo)
import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo (_phylo_computeTime), trackComputeTime, ComputeTimeHistory)
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} setConfig)
......@@ -49,7 +49,7 @@ import Gargantext.Database.Schema.Context ( ContextPoly(_context_hyperdata, _con
import Gargantext.Database.Schema.Node ( NodePoly(_node_hyperdata), node_hyperdata )
import Gargantext.Prelude hiding (to)
import Gargantext.System.Logging ( MonadLogger, LogLevel(DEBUG), logLocM )
import Gargantext.Utils.UTCTime (timeMeasured)
import Gargantext.Utils.UTCTime (timeMeasured, timeMeasured'')
import Prelude qualified
import System.FilePath ((</>))
import System.IO.Temp (withTempDirectory)
......@@ -111,19 +111,23 @@ phylo2dot phylo = do
flowPhyloAPI :: (HasNodeStory env err m, HasNodeError err, MonadLogger m)
=> PhyloConfig -> CorpusId -> m Phylo
flowPhyloAPI config cId = do
corpus <- corpusIdtoDocuments (timeUnit config) cId
let !phyloWithCliques = toPhyloWithoutLink corpus config
=> PhyloConfig
-> Maybe ComputeTimeHistory
-- ^ Previous compute time historical data, if any.
-> CorpusId
-> m Phylo
flowPhyloAPI config mbOldComputeHistory cId = do
corpus <- timeMeasured "flowPhyloAPI.corpusIdtoDocuments" $ corpusIdtoDocuments (timeUnit config) cId
-- writePhylo phyloWithCliquesFile phyloWithCliques
$(logLocM) DEBUG $ "PhyloConfig old: " <> show config
_ <- timeMeasured "flowPhyloAPI.phyloWithCliques" (pure $! phyloWithCliques)
(t1, phyloWithCliques) <- timeMeasured'' DEBUG "flowPhyloAPI.phyloWithCliques" (pure $! toPhyloWithoutLink corpus config)
(t2, phyloConfigured) <- timeMeasured'' DEBUG "flowPhyloAPI.phyloConfigured" (pure $! setConfig config phyloWithCliques)
(t3, finalPhylo) <- timeMeasured'' DEBUG "flowPhyloAPI.toPhylo" (pure $! toPhylo phyloConfigured)
let !phyloConfigured = setConfig config phyloWithCliques
_ <- timeMeasured "flowPhyloAPI.phyloConfigured" (pure $! phyloConfigured)
pure $! toPhylo phyloConfigured
-- As the phylo is computed fresh every time, without looking at the one stored (if any), we
-- have to manually propagate computing time across.
pure $! trackComputeTime (t1 + t2 + t3) (finalPhylo { _phylo_computeTime = mbOldComputeHistory })
--------------------------------------------------------------------
corpusIdtoDocuments :: (HasNodeStory env err m, HasNodeError err)
......
......@@ -551,3 +551,4 @@ initPhylo docs conf =
(fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloScales 1 prd))) periods)
0
(_qua_granularity $ phyloQuality $ _phyloParam_config params)
Nothing
......@@ -12,20 +12,25 @@ Portability : POSIX
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE TypeApplications #-}
module Gargantext.Utils.UTCTime where
import Data.Fixed (Fixed(..))
import Data.Morpheus.Kind (SCALAR)
import Data.Morpheus.Types (GQLType(..), DecodeScalar(..), EncodeScalar(..))
import Data.Morpheus.Types qualified as DMT
import Data.String (fromString)
import Data.Swagger (ToSchema)
import Data.Swagger (ToSchema (..))
import Data.Text qualified as T
import Data.Time (UTCTime)
import Data.Time.Clock.POSIX (getPOSIXTime)
import Data.Time.Clock.POSIX (getPOSIXTime, POSIXTime)
import Data.Time (UTCTime, nominalDiffTimeToSeconds)
import Data.TreeDiff.Class
import Gargantext.Prelude
import Gargantext.System.Logging
import Prelude (String)
import Test.QuickCheck hiding (label)
newtype NUTCTime = NUTCTime UTCTime
......@@ -43,6 +48,23 @@ instance FromJSON NUTCTime
instance ToJSON NUTCTime
instance ToSchema NUTCTime
newtype ElapsedSeconds = ElapsedSeconds { _Seconds :: POSIXTime }
deriving stock (Show, Eq, Generic)
deriving newtype (FromJSON, ToJSON, Num)
instance NFData ElapsedSeconds
instance ToExpr ElapsedSeconds where
toExpr (ElapsedSeconds x) =
let (MkFixed secs) = nominalDiffTimeToSeconds x
in toExpr secs
instance ToSchema ElapsedSeconds where
declareNamedSchema _ = declareNamedSchema (Proxy @Int)
instance Arbitrary ElapsedSeconds where
arbitrary = ElapsedSeconds . fromInteger . getPositive <$> arbitrary
timeMeasured :: (MonadLogger m, MonadBase IO m, HasCallStack)
=> String
-- ^ A label
......@@ -51,18 +73,31 @@ timeMeasured :: (MonadLogger m, MonadBase IO m, HasCallStack)
-> m a
timeMeasured = withFrozenCallStack $ timeMeasured' DEBUG
timeMeasured' :: (MonadLogger m, MonadBase IO m, HasCallStack)
-- | A version of timeMeasured that also returns the elapsed time, in seconds.
timeMeasured'' :: (MonadLogger m, MonadBase IO m, HasCallStack)
=> LogLevel
-- ^ The severity of the log
-> String
-- ^ A label to identify the action.
-> m a
-- ^ The action to run
-> m a
timeMeasured' severity label action = withFrozenCallStack $ do
-> m (ElapsedSeconds, a)
timeMeasured'' severity label action = withFrozenCallStack $ do
startTime <- liftBase getPOSIXTime
res <- action
endTime <- liftBase getPOSIXTime
let msg = label <> " took " <> (show $ endTime - startTime) <> " seconds to execute."
let finalTime = endTime - startTime
let msg = label <> " took " <> (show finalTime) <> " seconds to execute."
$(logLocM) severity (fromString msg)
return res
pure (ElapsedSeconds finalTime, res)
timeMeasured' :: (MonadLogger m, MonadBase IO m, HasCallStack)
=> LogLevel
-- ^ The severity of the log
-> String
-- ^ A label to identify the action.
-> m a
-- ^ The action to run
-> m a
timeMeasured' severity label action = withFrozenCallStack $
snd <$> timeMeasured'' severity label action
{
"_phylo_computeTime": null,
"_phylo_counts": {
"coocByDate": {
"2006": [
{
"_phylo_computeTime": null,
"_phylo_counts": {
"coocByDate": {
"1": [
......
{
"_phylo_computeTime": null,
"_phylo_counts": {
"coocByDate": {
"1000": [],
......@@ -6,14 +6,15 @@
module Test.Offline.JSON (tests) where
import Data.Aeson
import Data.ByteString qualified as B
import Data.ByteString.Lazy.Char8 qualified as C8
import Data.ByteString qualified as B
import Data.Either
import Gargantext.API.Errors
import Gargantext.API.Node.Corpus.Types
import Gargantext.API.Node.Types
import Gargantext.API.Viz.Types
import Gargantext.Core.Types.Phylo
import qualified Gargantext.Core.Viz.Phylo as VizPhylo
import Gargantext.Database.Admin.Types.Node
import Paths_gargantext
import Prelude
......@@ -62,6 +63,8 @@ tests = testGroup "JSON" [
, testProperty "GraphDataData" (jsonRoundtrip @GraphDataData)
, testProperty "ObjectData" (jsonRoundtrip @ObjectData)
, testProperty "PhyloData" (jsonRoundtrip @PhyloData)
, testProperty "ComputeTimeHistory" (jsonRoundtrip @VizPhylo.ComputeTimeHistory)
, testProperty "Phylo" (jsonRoundtrip @VizPhylo.Phylo)
, testProperty "LayerData" (jsonRoundtrip @LayerData)
, testCase "can parse bpa_phylo_test.json" testParseBpaPhylo
, testCase "can parse open_science.json" testOpenSciencePhylo
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment