Verified Commit 7d263e4d authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 494-dev-phylo-hh-mm-ss-2

parents d36a9126 706734c5
Pipeline #7981 passed with stages
in 53 minutes and 43 seconds
## Version 0.0.7.5.3
* [BACK][FIX][Resolve "[Server slowness] With the dev branch on the dev instance, we're experiencing a real slowness" (JobInfo changes)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/merge_requests/429)
* [BACK][OPTIM][Concurrent queries in NLP](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/merge_requests/451)
* [FRONT][FIX][[graph] restore proportional labels and make edges transparent](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/merge_requests/522)
* [BACK][FIX][Resolve "On IMT Instance : Error message or crashes when changing the status of terms in large batch"](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/merge_requests/445)
* [FRONT][FIX][graph: rework edge renderer based on rectangle renderer](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/merge_requests/523)
* [BACK][FIX][Break loops in Ngrams graphs](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/merge_requests/453)
## Version 0.0.7.5.2
* [BACK/FRONT][FIX][[query] a prefixed query like ~prefix should transform to 'prefix', not '"prefix'](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/merge_requests/443)
......
......@@ -88,7 +88,8 @@ convertConfigs ini@(Ini.GargConfig { .. }) iniMail nlpConfig connInfo =
, _wsLongJobTimeout = 3000
, _wsDefaultDelay = 0
, _wsAdditionalDelayAfterRead = 5
, _wsDatabase = connInfo { PGS.connectDatabase = "pgmq"} }
, _wsDatabase = connInfo { PGS.connectDatabase = "pgmq"}
, _wsNlpConduitChunkSize = 10 }
, _gc_logging = Config.LogConfig {
_lc_log_level = INFO
, _lc_log_file = Nothing
......
......@@ -166,6 +166,10 @@ default_job_timeout = 60
# default timeout for "long" jobs (in seconds)
long_job_timeout = 3000
# Batch size when sending data to NLP.
# Preferably, set as much as the number of CPUs
nlp_conduit_chunk_size = 10
# if you leave the same credentials as in [database] section above,
# workers will try to set up the `gargantext_pgmq` database
# automatically
......
......@@ -5,7 +5,7 @@ cabal-version: 3.4
-- see: https://github.com/sol/hpack
name: gargantext
version: 0.0.7.5.2
version: 0.0.7.5.3
synopsis: Search, map, share
description: Please see README.md
category: Data
......@@ -570,6 +570,7 @@ library
, json-stream ^>= 0.4.2.4
, lens >= 5.2.2 && < 5.3
, lens-aeson < 1.3
, lifted-async >= 0.10 && < 0.12
, list-zipper
, massiv < 1.1
, matrix ^>= 0.3.6.1
......@@ -753,6 +754,7 @@ common commonTestDependencies
, generic-arbitrary >= 1.0.1 && < 2
, graphviz ^>= 2999.20.1.0
, haskell-bee
, haskell-bee-pgmq
, hspec ^>= 2.11.1
, hspec-expectations >= 0.8 && < 0.9
, hspec-expectations-lifted < 0.11
......
......@@ -23,7 +23,7 @@ module Gargantext.API.Job (
, addWarningEvent
) where
import Control.Lens (over, _Just)
import Control.Lens ((%~), over, _Just)
import Data.Text qualified as T
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.Prelude
......@@ -66,7 +66,12 @@ jobLogComplete jl =
& over scst_remaining (const (Just 0))
jobLogAddMore :: Int -> JobLog -> JobLog
jobLogAddMore moreSteps jl = jl & over (scst_remaining . _Just) (+ moreSteps)
jobLogAddMore moreSteps jl =
jl & scst_remaining %~ (maybe (Just 0) Just)
& scst_succeeded %~ (maybe (Just 0) Just)
& scst_failed %~ (maybe (Just 0) Just)
& scst_events %~ (maybe (Just []) Just)
& (scst_remaining . _Just) %~ (+ moreSteps)
jobLogFailures :: Int -> JobLog -> JobLog
jobLogFailures n jl = over (scst_failed . _Just) (+ n) $
......
......@@ -17,14 +17,12 @@ add get
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE IncoherentInstances #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.API.Ngrams
(
......@@ -114,7 +112,7 @@ import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory hiding (buildForest)
import Gargantext.Core.NodeStory qualified as NodeStory
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, TODO, assertValid, ContextId, HasValidationError)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, TODO, assertValid, HasValidationError)
import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
import Gargantext.Database.Prelude
......@@ -451,21 +449,23 @@ matchingNode listType minSize maxSize searchFn (Node inputNode children) =
-- | Version of 'buildForest' specialised over the 'NgramsElement' as the values of the tree.
-- /IMPORTANT/: This functions returns an error in case we found a loop.
buildForest :: Map NgramsTerm NgramsElement -> Either BuildForestError (Forest NgramsElement)
buildForest = fmap (map (fmap snd)) . NodeStory.buildForest
buildForest :: OnLoopDetectedStrategy -> Map NgramsTerm NgramsElement -> Either BuildForestError (Forest NgramsElement)
buildForest onLoopStrategy = fmap (map (fmap snd)) . NodeStory.buildForest onLoopStrategy
-- | Folds an Ngrams forest back to a table map.
-- This function doesn't aggregate information, but merely just recostructs the original
-- map without loss of information. To perform operations on the forest, use the appropriate
-- functions.
destroyForest :: Forest NgramsElement -> Map NgramsTerm NgramsElement
destroyForest f = Map.fromList . map (foldTree destroyTree) $ f
-- /NOTA BENE:/ We return a list and not a Map because we might have sorted the forest, and
-- converting into a map would trash the carefully-constructed sorting.
destroyForest :: Forest NgramsElement -> [(NgramsTerm, NgramsElement)]
destroyForest f = concatMap (map (\el -> (_ne_ngrams el, el)) . flatten) $ f
where
destroyTree :: NgramsElement -> [(NgramsTerm, NgramsElement)] -> (NgramsTerm, NgramsElement)
destroyTree rootEl childrenEl = (_ne_ngrams rootEl, squashElements rootEl childrenEl)
-- _destroyTree :: NgramsElement -> [(NgramsTerm, [NgramsElement])] -> (NgramsTerm, [NgramsElement])
-- _destroyTree rootEl childrenEl = (_ne_ngrams rootEl, childrenEl)
squashElements :: NgramsElement -> [(NgramsTerm, NgramsElement)] -> NgramsElement
squashElements r _ = r
-- _squashElements :: NgramsElement -> [(NgramsTerm, NgramsElement)] -> NgramsElement
-- _squashElements r _ = r
-- | TODO Errors management
-- TODO: polymorphic for Annuaire or Corpus or ...
......@@ -482,58 +482,63 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
-> Either BuildForestError (VersionedWithCount NgramsTable)
searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
let tableMap = versionedTableMap ^. v_data
in case buildForest tableMap of
in case keepRoots <$> buildForest (BreakLoop LBA_just_do_it) tableMap of
Left err -> Left err
Right fs ->
let forestRoots = Set.fromList
. Map.elems
. destroyForest
. filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery
$ fs
let forestRoots = filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery $ fs
tableMapSorted = versionedTableMap
& v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ forestRoots)
in Right $ toVersionedWithCount (Set.size forestRoots) tableMapSorted
& v_data .~ (NgramsTable . map snd
. destroyForest
. sortAndPaginateForest _nsq_offset _nsq_limit _nsq_orderBy
. withInnersForest
$ forestRoots
)
in Right $ toVersionedWithCount (length forestRoots) tableMapSorted
keepRoots :: Forest NgramsElement -> Forest NgramsElement
keepRoots = filter (\(Node r _) -> isNothing (_ne_root r) || isNothing (_ne_parent r))
-- | For each input root, extends its occurrence count with
-- the information found in the subforest.
withInnersForest :: Forest NgramsElement -> Forest NgramsElement
withInnersForest = map sumSubitemsOccurrences
where
-- Sorts the input 'NgramsElement' list.
-- /IMPORTANT/: As we might be sorting ngrams in all sorts of language,
-- some of them might include letters with accents and other unicode symbols,
-- but we need to filter those /diacritics/ out so that the sorting would
-- happen in the way users would expect. See ticket #331.
sortOnOrder :: Maybe OrderBy -> ([NgramsElement] -> [NgramsElement])
sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
sortOnOrder (Just TermAsc) = List.sortBy ngramTermsAscSorter
sortOnOrder (Just TermDesc) = List.sortBy ngramTermsDescSorter
sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to Set.size)
sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to Set.size)
ngramTermsAscSorter = on unicodeDUCETSorter (unNgramsTerm . view ne_ngrams)
ngramTermsDescSorter = on (\n1 n2 -> unicodeDUCETSorter n2 n1) (unNgramsTerm . view ne_ngrams)
-- | For each input root, extends its occurrence count with
-- the information found in the subitems.
withInners :: Map NgramsTerm NgramsElement -> Set NgramsElement -> Set NgramsElement
withInners tblMap roots = Set.map addSubitemsOccurrences roots
where
addSubitemsOccurrences :: NgramsElement -> NgramsElement
addSubitemsOccurrences e =
e { _ne_occurrences = foldl' alterOccurrences (e ^. ne_occurrences) (e ^. ne_children) }
alterOccurrences :: Set ContextId -> NgramsTerm -> Set ContextId
alterOccurrences occs t = case Map.lookup t tblMap of
Nothing -> occs
Just e' -> occs <> e' ^. ne_occurrences
-- | Paginate the results
sortAndPaginate :: Set NgramsElement -> [NgramsElement]
sortAndPaginate xs =
let offset' = getOffset $ maybe 0 identity _nsq_offset
in take (getLimit _nsq_limit)
. drop offset'
. sortOnOrder _nsq_orderBy
. Set.toList
$ xs
sumSubitemsOccurrences :: Tree NgramsElement -> Tree NgramsElement
sumSubitemsOccurrences (Node root children) =
let children' = withInnersForest children
root' = root { _ne_occurrences = (_ne_occurrences root) <> foldMap (_ne_occurrences . rootLabel) children' }
in Node root' children'
sortAndPaginateForest :: Maybe Offset
-> Limit
-> Maybe OrderBy
-> Forest NgramsElement
-> Forest NgramsElement
sortAndPaginateForest mb_offset limit orderBy xs =
let offset' = getOffset $ maybe 0 identity mb_offset
in take (getLimit limit)
. drop offset'
. sortOnOrderForest orderBy
$ xs
-- Sorts the input 'NgramsElement' list.
-- /IMPORTANT/: As we might be sorting ngrams in all sorts of language,
-- some of them might include letters with accents and other unicode symbols,
-- but we need to filter those /diacritics/ out so that the sorting would
-- happen in the way users would expect. See ticket #331.
sortOnOrderForest :: Maybe OrderBy -> (Forest NgramsElement -> Forest NgramsElement)
sortOnOrderForest Nothing = sortOnOrderForest (Just ScoreDesc)
sortOnOrderForest (Just TermAsc) = List.sortBy (\(Node t1 _) (Node t2 _) -> ngramTermsAscSorter t1 t2)
sortOnOrderForest (Just TermDesc) = List.sortBy (\(Node t1 _) (Node t2 _) -> ngramTermsDescSorter t1 t2)
sortOnOrderForest (Just ScoreAsc) = List.sortOn $ \(Node root _) -> root ^. (ne_occurrences . to Set.size)
sortOnOrderForest (Just ScoreDesc) = List.sortOn $ Down . (\(Node root _) -> root ^. (ne_occurrences . to Set.size))
ngramTermsAscSorter :: NgramsElement -> NgramsElement -> Ordering
ngramTermsAscSorter = on unicodeDUCETSorter (unNgramsTerm . view ne_ngrams)
ngramTermsDescSorter :: NgramsElement -> NgramsElement -> Ordering
ngramTermsDescSorter = on (\n1 n2 -> unicodeDUCETSorter n2 n1) (unNgramsTerm . view ne_ngrams)
-- | This function allows sorting two texts via their unicode sorting
-- (as opposed as the standard lexicographical sorting) by relying on
......
......@@ -111,6 +111,9 @@ mSetFromSet = MSet . Map.fromSet (const ())
mSetFromList :: Ord a => [a] -> MSet a
mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
mSetDifference :: Ord a => MSet a -> MSet a -> MSet a
mSetDifference (MSet m1) (MSet m2) = MSet (m1 `Map.difference` m2)
-- mSetToSet :: Ord a => MSet a -> Set a
-- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
mSetToSet :: Ord a => MSet a -> Set a
......
......@@ -131,7 +131,7 @@ updateNode phyloId (UpdateNodePhylo config) jobHandle = do
let corpusId = fromMaybe (panicTrace "no corpus id") corpusId'
phy <- timeMeasured "updateNode.flowPhyloAPI" $ flowPhyloAPI (subConfigAPI2config config) mbComputeHistory corpusId
phy <- timeMeasured "updateNode.flowPhyloAPI" $ flowPhyloAPI (subConfigAPI2config config) mbComputeHistory corpusId jobHandle
markProgress 1 jobHandle
{-
......
......@@ -4,7 +4,6 @@ module Gargantext.API.Routes.Named.Viz (
-- * Routes types
PhyloAPI(..)
, GetPhylo(..)
, PostPhylo(..)
, GraphAPI(..)
, GraphAsyncAPI(..)
, GraphVersionsAPI(..)
......@@ -31,7 +30,6 @@ import Servant.XML.Conduit (XML)
data PhyloAPI mode = PhyloAPI
{ getPhyloEp :: mode :- Summary "Phylo API" :> NamedRoutes GetPhylo
, postPhyloEp :: mode :- NamedRoutes PostPhylo
} deriving Generic
......@@ -43,9 +41,9 @@ newtype GetPhylo mode = GetPhylo
} deriving Generic
newtype PostPhylo mode = PostPhylo
{ postPhyloByListIdEp :: mode :- QueryParam "listId" ListId :> (Post '[JSON] NodeId)
} deriving Generic
-- newtype PostPhylo mode = PostPhylo
-- { postPhyloByListIdEp :: mode :- QueryParam "listId" ListId :> (Post '[JSON] NodeId)
-- } deriving Generic
-- | There is no Delete specific API for Graph since it can be deleted
......
......@@ -9,19 +9,12 @@ Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Config.Ini.NLP (
-- * Types
NLPConfig(..)
-- * Utility functions
, readConfig
-- * Lenses
, nlp_default
, nlp_languages
)
where
......@@ -59,4 +52,3 @@ readConfig fp = do
, T.pack $ show m_nlp_other ]
Just ret -> pure ret
makeLenses ''NLPConfig
......@@ -19,7 +19,6 @@ module Gargantext.Core.Config.NLP (
-- * Lenses
, nlp_default
, nlp_languages
)
where
......@@ -48,9 +47,9 @@ data NLPConfig = NLPConfig { _nlp_default :: URI
instance FromValue NLPConfig where
fromValue v = do
_nlp_default <- parseTableFromValue (reqKey "EN") v
-- _nlp_languages <- fromValue <$> getTable
MkTable t <- parseTableFromValue getTable v
_nlp_languages <- mapM fromValue (snd <$> t)
return $ NLPConfig { .. }
instance ToValue NLPConfig where
toValue = defaultTableToValue
......@@ -58,7 +57,7 @@ instance ToTable NLPConfig where
toTable (NLPConfig { .. }) =
table ([ k .= v | (k, v) <- Map.toList _nlp_languages ]
-- output the default "EN" language as well
<> [ ("EN" :: Text) .= _nlp_default ])
<> [ ("EN" :: Text) .= _nlp_default ] )
-- readConfig :: SettingsFile -> IO NLPConfig
......
......@@ -53,6 +53,8 @@ data WorkerSettings =
, _wsDefaultDelay :: B.TimeoutS
, _wsAdditionalDelayAfterRead :: B.TimeoutS
, _wsDefinitions :: ![WorkerDefinition]
, _wsNlpConduitChunkSize :: Int
} deriving (Show, Eq)
instance FromValue WorkerSettings where
fromValue = parseTableFromValue $ do
......@@ -61,6 +63,7 @@ instance FromValue WorkerSettings where
_wsDefaultVisibilityTimeout <- reqKey "default_visibility_timeout"
_wsDefaultJobTimeout <- reqKey "default_job_timeout"
_wsLongJobTimeout <- reqKey "long_job_timeout"
_wsNlpConduitChunkSize <- reqKey "nlp_conduit_chunk_size"
defaultDelay <- reqKey "default_delay"
additionalDelayAfterRead <- reqKey "additional_delay_after_read"
return $ WorkerSettings { _wsDatabase = unTOMLConnectInfo dbConfig
......@@ -69,7 +72,8 @@ instance FromValue WorkerSettings where
, _wsDefinitions
, _wsDefaultVisibilityTimeout
, _wsDefaultDelay = B.TimeoutS defaultDelay
, _wsAdditionalDelayAfterRead = B.TimeoutS additionalDelayAfterRead }
, _wsAdditionalDelayAfterRead = B.TimeoutS additionalDelayAfterRead
, _wsNlpConduitChunkSize }
instance ToValue WorkerSettings where
toValue = defaultTableToValue
instance ToTable WorkerSettings where
......@@ -80,7 +84,8 @@ instance ToTable WorkerSettings where
, "default_visibility_timeout" .= _wsDefaultVisibilityTimeout
, "default_delay" .= B._TimeoutS _wsDefaultDelay
, "additional_delay_after_read" .= B._TimeoutS _wsAdditionalDelayAfterRead
, "definitions" .= _wsDefinitions ]
, "definitions" .= _wsDefinitions
, "nlp_conduit_chunk_size" .= _wsNlpConduitChunkSize ]
data WorkerDefinition =
WorkerDefinition {
......
This diff is collapsed.
......@@ -42,6 +42,7 @@ module Gargantext.Core.NodeStory.Types
, ArchiveState
, ArchiveStateSet
, ArchiveStateList
, allVisitedNgramsTerms
-- * Errors
, HasNodeStoryError(..)
......@@ -197,7 +198,7 @@ data BuildForestError
= -- We found a loop, something that shouldn't normally happen if the calling
-- code is correct by construction, but if that does happen, the value will
-- contain the full path to the cycle.
BFE_loop_detected !(Set VisitedNode)
BFE_loop_detected !(Set VisitedNode)
deriving (Show, Eq)
instance ToHumanFriendlyError BuildForestError where
......@@ -217,6 +218,9 @@ data VisitedNode =
VN { _vn_position :: !Int, _vn_term :: !NgramsTerm }
deriving (Show)
allVisitedNgramsTerms :: Set VisitedNode -> Set NgramsTerm
allVisitedNgramsTerms = Set.map _vn_term
-- /NOTA BENE/: It's important to use this custom instance for the loop detector
-- to work correctly. If we stop comparing on the terms the loop detector .. will loop.
instance Eq VisitedNode where
......
......@@ -26,14 +26,12 @@ import Gargantext.API.Routes.Named.Viz qualified as Named
import Gargantext.API.Viz.Types
import Gargantext.Core.Types.Phylo (GraphData(..))
import Gargantext.Core.Viz.LegacyPhylo hiding (Phylo(..))
import Gargantext.Core.Viz.Phylo (PhyloConfig(..), defaultConfig, _phylo_param, _phyloParam_config)
import Gargantext.Core.Viz.Phylo (PhyloConfig(..), _phylo_param, _phyloParam_config)
import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Database.Prelude
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node -- (PhyloId, ListId, CorpusId, UserId, NodeId(..))
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType, defaultList)
import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Prelude
import Servant
import Servant.Server.Generic (AsServerT)
......@@ -42,7 +40,6 @@ import Web.HttpApiData (readTextData)
phyloAPI :: IsGargServer err env m => PhyloId -> Named.PhyloAPI (AsServerT m)
phyloAPI n = Named.PhyloAPI
{ getPhyloEp = getPhylo n
, postPhyloEp = postPhylo n
}
-- :<|> putPhylo n
-- :<|> deletePhylo n
......@@ -94,35 +91,6 @@ getPhyloDataJson phyloId = do
-- pure (SVG p)
-- FIXME(adn) This handler mixes DB reads with updates outside of the same
-- transaction, due to the call to 'flowPhyloAPI' in the middle.
postPhylo :: IsGargServer err env m => PhyloId -> Named.PostPhylo (AsServerT m)
postPhylo phyloId = Named.PostPhylo $ \_lId -> do
-- TODO get Reader settings
-- s <- ask
-- let
-- _vrs = Just ("1" :: Text)
-- _sft = Just (Software "Gargantext" "4")
-- _prm = initPhyloParam vrs sft (Just q)
corpusId <- runDBQuery $ getClosestParentIdByType phyloId NodeCorpus
-- Being the first time we ask for the Phylo, there is no historical data
-- available about computing time, so we pass 'Nothing'.
phy <- flowPhyloAPI defaultConfig Nothing (fromMaybe (panicTrace "[G.C.V.P.API] no corpus ID found") corpusId) -- params
-- phyloId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just corpusId) userId]
_ <- runDBTx $ updateHyperdata phyloId (HyperdataPhylo Nothing (Just phy))
pure phyloId
------------------------------------------------------------------------
-- | DELETE Phylo == delete a node
------------------------------------------------------------------------
------------------------------------------------------------------------
{-
type PutPhylo = (Put '[JSON] Phylo )
--putPhylo :: PhyloId -> Maybe ListId -> PhyloQueryBuild -> Phylo
putPhylo :: PhyloId -> GargServer PutPhylo
putPhylo = undefined
-}
-- | Instances
instance FromHttpApiData DisplayMode where parseUrlPiece = readTextData
......
......@@ -51,6 +51,7 @@ import Gargantext.Database.Schema.Context ( ContextPoly(_context_hyperdata, _con
import Gargantext.Database.Schema.Node ( NodePoly(_node_hyperdata), node_hyperdata )
import Gargantext.Prelude hiding (to)
import Gargantext.System.Logging ( MonadLogger, LogLevel(DEBUG), logLocM )
import Gargantext.Utils.Jobs.Monad (JobHandle, MonadJobStatus(markProgress, addMoreSteps))
import Gargantext.Utils.UTCTime (timeMeasured, timeMeasured'')
import Prelude qualified
import System.FilePath ((</>))
......@@ -112,25 +113,35 @@ phylo2dot phylo = do
_ -> pure value
flowPhyloAPI :: (HasNodeStory env err m, HasNodeError err, MonadLogger m)
flowPhyloAPI :: (HasNodeStory env err m, HasNodeError err, MonadLogger m, MonadJobStatus m)
=> PhyloConfig
-> Maybe ComputeTimeHistory
-- ^ Previous compute time historical data, if any.
-> CorpusId
-> JobHandle m
-> m Phylo
flowPhyloAPI config mbOldComputeHistory cId = do
flowPhyloAPI config mbOldComputeHistory cId jobHandle = do
env <- view hasNodeStory
addMoreSteps 5 jobHandle
corpus <- timeMeasured "flowPhyloAPI.corpusIdtoDocuments" $ runDBQuery $ corpusIdtoDocuments env (timeUnit config) cId
markProgress 1 jobHandle
-- writePhylo phyloWithCliquesFile phyloWithCliques
$(logLocM) DEBUG $ "PhyloConfig old: " <> show config
(t1, phyloWithCliques) <- timeMeasured'' DEBUG "flowPhyloAPI.phyloWithCliques" (pure $! toPhyloWithoutLink corpus config)
markProgress 1 jobHandle
(t2, phyloConfigured) <- timeMeasured'' DEBUG "flowPhyloAPI.phyloConfigured" (pure $! setConfig config phyloWithCliques)
markProgress 1 jobHandle
(t3, finalPhylo) <- timeMeasured'' DEBUG "flowPhyloAPI.toPhylo" (pure $! toPhylo phyloConfigured)
markProgress 1 jobHandle
-- As the phylo is computed fresh every time, without looking at the one stored (if any), we
-- have to manually propagate computing time across.
pure $! trackComputeTime (t1 + t2 + t3) (finalPhylo { _phylo_computeTime = mbOldComputeHistory })
let ret = trackComputeTime (t1 + t2 + t3) (finalPhylo { _phylo_computeTime = mbOldComputeHistory })
markProgress 1 jobHandle
pure ret
--------------------------------------------------------------------
corpusIdtoDocuments :: HasNodeError err
......
......@@ -16,7 +16,7 @@ module Gargantext.Core.Viz.Phylo.PhyloMaker where
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parMap, rpar)
import Control.Parallel.Strategies (parMap, rpar, Strategy)
import Data.Containers.ListUtils (nubOrd)
import Data.Discrimination qualified as D
import Data.List (partition, intersect)
......@@ -37,6 +37,13 @@ import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (toPhyloQuality, temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toSimilarity)
import Gargantext.Prelude hiding (empty, toList)
defaultStrategy :: Strategy a
defaultStrategy = rpar
------------------
-- | To Phylo | --
------------------
......@@ -151,7 +158,7 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
--------
-- 1.1) for each measure of similarity, prune the flat phylo, compute the branches and estimate the quality
qua :: [Double]
qua = parMap rpar (\thr ->
qua = parMap defaultStrategy (\thr ->
let edges = filter (\edge -> snd edge >= thr) graph
nodes = nubOrd $ concatMap (\((n,n'),_) -> [n,n']) edges
branches = toRelatedComponents nodes edges
......@@ -192,7 +199,7 @@ findSeaLadder phylo = case getSeaElevation phylo of
docs = filterDocs (getDocsByDate phylo) ([period] ++ next)
diagos = filterDiago (getCoocByDate phylo) ([period] ++ next)
-- 1.2) compute the kinship similarities between pairs of source & target in parallel
pairs = parMap rpar (\source ->
pairs = parMap defaultStrategy (\source ->
let candidates = filter (\target -> (> 2) $ length
$ intersect (getGroupNgrams source) (getGroupNgrams target)) targets
in map (\target ->
......@@ -329,7 +336,7 @@ filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. clustering_roots) >=
-- To filter nested Fis
filterCliqueByNested :: Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
filterCliqueByNested m =
let clq = parMap rpar (\l ->
let clq = parMap defaultStrategy (\l ->
foldl (\mem f -> if (any (\f' -> isNested (f' ^. clustering_roots) (f ^. clustering_roots)) mem)
then mem
else
......@@ -448,7 +455,7 @@ groupDocsByPeriodRec f prds docs acc =
groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
periods = parMap rpar (inPeriode f docs') pds
periods = parMap defaultStrategy (inPeriode f docs') pds
in tracePhylo ("\n" <> "-- | Group "
<> show (length docs)
<> " docs by "
......@@ -466,7 +473,7 @@ groupDocsByPeriod' f pds docs =
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es =
let periods = parMap rpar (inPeriode f es) pds
let periods = parMap defaultStrategy (inPeriode f es) pds
in tracePhylo ("\n" <> "-- | Group "
<> show (length es) <> " docs by "
......
......@@ -9,8 +9,9 @@ Portability : POSIX
-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE UndecidableInstances #-}
{-# OPTIONS_GHC -Wno-orphans #-} -- orphan HasNodeError BackendInternalError
......@@ -25,6 +26,7 @@ import Control.Lens.TH
import Control.Monad.Trans.Control (MonadBaseControl)
import Data.Maybe (fromJust)
import Data.Pool qualified as Pool
import Data.Text qualified as T
import Database.PostgreSQL.Simple qualified as PSQL
import Gargantext.API.Admin.Orchestrator.Types (JobLog, noJobLog)
import Gargantext.API.Errors (BackendInternalError)
......@@ -42,7 +44,7 @@ import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET
import Gargantext.Core.Worker.Types (JobInfo(..))
import Gargantext.Database.Prelude (HasConnectionPool(..))
import Gargantext.Prelude hiding (to)
import Gargantext.System.Logging (HasLogger(..), Logger, LogLevel(..), MonadLogger(..), withLogger, logMsg, withLoggerIO)
import Gargantext.System.Logging (HasLogger(..), Logger, LogLevel(..), MonadLogger(..), withLogger, logMsg, logLocM, withLoggerIO)
import Gargantext.System.Logging.Loggers
import Gargantext.Utils.Jobs.Monad ( MonadJobStatus(..), JobHandle )
import System.Log.FastLogger qualified as FL
......@@ -182,9 +184,21 @@ instance MonadJobStatus WorkerMonad where
type JobEventType WorkerMonad = JobLog
noJobHandle Proxy = WorkerNoJobHandle
getLatestJobStatus _ = WorkerMonad (pure noJobLog)
getLatestJobStatus WorkerNoJobHandle = pure noJobLog
getLatestJobStatus (WorkerJobHandle ji) = do
stateTVar <- asks _w_env_job_state
state' <- liftIO $ readTVarIO stateTVar
pure $ case state' of
Nothing -> noJobLog
Just wjs ->
if _wjs_job_info wjs == ji
then
_wjs_job_log wjs
else
noJobLog
withTracer _ jh n = n jh
markStarted n jh = updateJobProgress jh (const $ jobLogStart $ RemainingSteps n)
markStarted n jh =
updateJobProgress jh (const $ jobLogStart $ RemainingSteps n)
markProgress steps jh = updateJobProgress jh (jobLogProgress steps)
markFailure steps mb_msg jh =
updateJobProgress jh (\latest -> case mb_msg of
......@@ -208,7 +222,9 @@ updateJobProgress (WorkerJobHandle (ji@JobInfo { _ji_message_id })) f = do
case state' of
Nothing -> pure ()
Just wjs -> do
CET.ce_notify $ CET.UpdateWorkerProgress ji (_wjs_job_log wjs)
(CET.ce_notify $ CET.UpdateWorkerProgress ji (_wjs_job_log wjs))
`CES.catch` (\(e :: SomeException) ->
$(logLocM) WARNING $ T.pack $ displayException e)
where
updateState mwjs =
let initJobLog =
......
......@@ -47,9 +47,11 @@ sendJobWithCfg gcConfig job = do
b <- initBrokerWithDBCreate (gcConfig ^. gc_database_config) ws
let queueName = _wdQueue wd
let addDelayAfterRead = gcConfig ^. gc_worker . wsAdditionalDelayAfterRead
let job' = (updateJobData ws job $ W.mkDefaultSendJob' b queueName job) { W.delay = _wsDefaultDelay
, W.addDelayAfterRead = B._TimeoutS addDelayAfterRead
, W.toStrat = WT.TSDelete }
let sj = (W.mkDefaultSendJob' b queueName job) { W.delay = _wsDefaultDelay
, W.addDelayAfterRead = B._TimeoutS addDelayAfterRead
-- don't allow to repeat infinitely (see #495)
, W.toStrat = WT.TSDelete }
let job' = updateJobData ws job sj
withLogger (gcConfig ^. gc_logging) $ \ioL ->
$(logLoc) ioL DEBUG $ "[sendJob] sending job " <> show job <> " (delay " <> show (W.delay job') <> ")"
W.sendJob' job'
......
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE StandaloneDeriving #-}
{-|
Module : Gargantext.Core.Worker.Types
Description : Some useful worker types
......
......@@ -55,6 +55,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
where
import Conduit
import Control.Concurrent.Async.Lifted qualified as AsyncL
import Control.Exception.Safe qualified as CES
import Control.Lens ( to, view )
import Control.Exception.Safe (catch, MonadCatch)
......@@ -70,7 +71,8 @@ import Data.Text qualified as T
import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types (NgramsTerm)
import Gargantext.Core (Lang(..), withDefaultLanguage, NLPServerConfig)
import Gargantext.Core.Config (GargConfig(..), hasConfig)
import Gargantext.Core.Config (GargConfig(..), hasConfig, gc_worker)
import Gargantext.Core.Config.Worker (wsNlpConduitChunkSize)
import Gargantext.Core.Config.Types (APIsConfig(..))
import Gargantext.Core.Ext.IMTUser (readFile_Annuaire)
import Gargantext.Core.NLP (HasNLPServer, nlpServerGet)
......@@ -98,7 +100,8 @@ import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact )
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataAnnuaire, HyperdataCorpus(_hc_lang) )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( ToHyperdataDocument(toHyperdataDocument), HyperdataDocument )
import Gargantext.Database.Admin.Types.Node hiding (ERROR, DEBUG) -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import Gargantext.Database.Prelude
import Gargantext.Database.Class ( DBCmdWithEnv, IsDBCmd )
import Gargantext.Database.Transactional ( DBUpdate, runDBTx )
import Gargantext.Database.Query.Table.ContextNodeNgrams2 ( ContextNodeNgrams2Poly(..), insertContextNodeNgrams2 )
import Gargantext.Database.Query.Table.Node ( MkCorpus, insertDefaultNodeIfNotExists, getOrMkList, getNodeWith )
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add)
......@@ -108,8 +111,8 @@ import Gargantext.Database.Query.Table.NodeContext (selectDocNodesOnlyId)
import Gargantext.Database.Query.Table.NodeNgrams (listInsertDb , getCgramsId)
import Gargantext.Database.Query.Tree.Root (MkCorpusUser(..), getOrMkRoot, getOrMkRootWithCorpus, userFromMkCorpusUser)
import Gargantext.Database.Schema.Ngrams ( indexNgrams, NgramsId )
import Gargantext.Database.Schema.Node
import Gargantext.Database.Types
import Gargantext.Database.Schema.Node ( NodePoly(_node_id, _node_hash_id), node_hyperdata )
import Gargantext.Database.Types ( Indexed(Indexed) )
import Gargantext.Prelude hiding (catch, onException, to)
import Gargantext.System.Logging ( logLocM, LogLevel(DEBUG, ERROR), MonadLogger )
import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..), markFailureNoErr )
......@@ -296,11 +299,12 @@ flow :: forall env err m a c.
-> m CorpusId
flow c mkCorpusUser la mfslw (count, docsC) jobHandle = do
cfg <- view hasConfig
let chunkSize = cfg ^. gc_worker . wsNlpConduitChunkSize
(_userId, userCorpusId, listId, msgs) <- runDBTx $ createNodes cfg mkCorpusUser c
forM_ msgs ce_notify
-- TODO if public insertMasterDocs else insertUserDocs
runConduit (zipSources (yieldMany ([1..] :: [Int])) docsC
.| CList.chunksOf 5
.| CList.chunksOf chunkSize
.| mapM_C (addDocumentsWithProgress userCorpusId)
.| sinkNull) `CES.catches`
[ CES.Handler $ \(e :: ClientError) -> do
......@@ -544,13 +548,20 @@ extractNgramsFromDocuments :: forall doc env err m.
-> TermType Lang
-> [doc]
-> m (UncommittedNgrams doc)
extractNgramsFromDocuments nlpServer lang docs =
foldlM go mempty docs
where
go :: UncommittedNgrams doc -> doc -> m (UncommittedNgrams doc)
go !acc inputDoc = do
ngrams <- extractNgramsFromDocument nlpServer lang inputDoc
pure $ acc <> ngrams
extractNgramsFromDocuments nlpServer lang docs = do
ret <- AsyncL.mapConcurrently (extractNgramsFromDocument nlpServer lang) docs
-- sem <- QSemL.newQSem 10
-- let f = extractNgramsFromDocument nlpServer lang
-- ret <- AsyncL.mapConcurrently (\doc ->
-- CEL.bracket_ (QSemL.waitQSem sem) (QSemL.signalQSem sem) (f doc)
-- ) docs
pure $ foldl (<>) mempty ret
-- foldlM go mempty docs
-- where
-- go :: UncommittedNgrams doc -> doc -> m (UncommittedNgrams doc)
-- go !acc inputDoc = do
-- ngrams <- extractNgramsFromDocument nlpServer lang inputDoc
-- pure $ acc <> ngrams
commitNgramsForDocuments :: UniqParameters doc
=> UncommittedNgrams doc
......
......@@ -132,7 +132,8 @@ getOccByNgramsOnlyFast_withSample cId int nt ngs =
HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByContextUser_withSample cId int nt ngs
-- Returns occurrences of ngrams in given corpus/list (for each ngram, a list of contexts is returned)
-- | Returns occurrences of ngrams in given corpus/list (for each
-- ngram, a list of contexts is returned)
getOccByNgramsOnlyFast :: CorpusId
-> ListId
-> NgramsType
......@@ -154,34 +155,39 @@ getOccByNgramsOnlyFast cId lId nt = do
query :: DPS.Query
query = [sql|
WITH cnnv AS
( SELECT DISTINCT context_node_ngrams.context_id,
context_node_ngrams.ngrams_id,
nodes_contexts.node_id,
nodes_contexts.category
FROM nodes_contexts
JOIN context_node_ngrams ON context_node_ngrams.context_id = nodes_contexts.context_id
WITH nc AS (
SELECT DISTINCT context_id
FROM nodes_contexts
WHERE node_id = ?
AND category > 0
),
cnnv AS
( SELECT DISTINCT context_id,
ngrams_id
FROM context_node_ngrams
WHERE context_id IN (SELECT context_id FROM nc)
),
node_context_ids AS
(SELECT context_id, ngrams_id, terms
FROM cnnv
JOIN ngrams ON cnnv.ngrams_id = ngrams.id
WHERE node_id = ? AND cnnv.category > 0
),
ncids_agg AS
(SELECT ngrams_id, terms, array_agg(DISTINCT context_id) AS agg
FROM node_context_ids
GROUP BY (ngrams_id, terms)),
( SELECT array_agg(DISTINCT context_id) AS agg,
ngrams_id,
terms
FROM cnnv
JOIN ngrams
ON cnnv.ngrams_id = ngrams.id
GROUP BY (ngrams_id, terms)
),
ns AS
(SELECT ngrams_id, terms
FROM node_stories
JOIN ngrams ON ngrams_id = ngrams.id
WHERE node_id = ? AND ngrams_type_id = ?
JOIN ngrams
ON ngrams_id = ngrams.id
WHERE node_id = ? AND ngrams_type_id = ?
)
SELECT ns.terms, CASE WHEN agg IS NULL THEN '{}' ELSE agg END
FROM ns
LEFT JOIN ncids_agg ON ns.ngrams_id = ncids_agg.ngrams_id
LEFT JOIN ncids_agg
ON ns.ngrams_id = ncids_agg.ngrams_id
|]
-- query = [sql|
-- WITH node_context_ids AS
......
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.System.Logging.Types (
LogLevel(..)
......
......@@ -99,6 +99,10 @@ default_job_timeout = 60
# default timeout for "long" jobs (in seconds)
long_job_timeout = 3000
# Batch size when sending data to NLP.
# Preferably, set as much as the number of CPUs
nlp_conduit_chunk_size = 10
# NOTE This is overridden by Test.Database.Setup
[worker.database]
host = "127.0.0.1"
......
......@@ -214,22 +214,23 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
eRes <- runClientM (get_table_ngrams token cId APINgrams.Terms listId 50 Nothing (Just MapTerm) Nothing Nothing Nothing Nothing) clientEnv
eRes `shouldSatisfy` isRight
let (Right res) = eRes
-- /NOTA BENE/ The count is 1 because the count applies to roots only.
Just res `shouldBe` JSON.decode [json| {"version":5
,"count":3
,"count":1
,"data":[
{"ngrams":"guitar pedals"
{"ngrams":"overdrives"
,"size":1
,"list":"MapTerm"
,"root":"overdrives"
,"parent":"overdrives"
,"occurrences":[]
,"children":["tube screamers"]
,"children":["guitar pedals"]
},
{"ngrams":"overdrives"
{"ngrams":"guitar pedals"
,"size":1
,"list":"MapTerm"
,"root":"overdrives"
,"parent":"overdrives"
,"occurrences":[]
,"children":["guitar pedals"]
,"children":["tube screamers"]
},
{"ngrams":"tube screamers"
,"size":1
......@@ -309,16 +310,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
-- check that new term is parent of old one
checkNgrams getNgrams [json| {"version": 2
,"count":2
,"count":1
,"data":[
{"ngrams":"abelian group"
,"size":2
,"list":"MapTerm"
,"root": "new abelian group"
,"parent": "new abelian group"
,"occurrences":[]
,"children":[]
},
{"ngrams":"new abelian group"
,"size":1
,"list":"MapTerm"
......@@ -326,6 +319,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
,"parent":null
,"occurrences":[]
,"children":["abelian group"]
},
{"ngrams":"abelian group"
,"size":2
,"list":"MapTerm"
,"root": "new abelian group"
,"parent": "new abelian group"
,"occurrences":[]
,"children":[]
}
]
}
......@@ -341,16 +342,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
-- In essence, this JSON needs to be exactly the same as the previous one,
-- i.e. important doesn't change the topology.
checkNgrams getNgrams [json| {"version": 2
,"count":2
,"count":1
,"data":[
{"ngrams":"abelian group"
,"size":2
,"list":"MapTerm"
,"root": "new abelian group"
,"parent": "new abelian group"
,"occurrences":[]
,"children":[]
},
{"ngrams":"new abelian group"
,"size":1
,"list":"MapTerm"
......@@ -358,6 +351,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
,"parent":null
,"occurrences":[]
,"children":["abelian group"]
},
{"ngrams":"abelian group"
,"size":2
,"list":"MapTerm"
,"root": "new abelian group"
,"parent": "new abelian group"
,"occurrences":[]
,"children":[]
}
]
}
......@@ -610,8 +611,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
liftIO $ exportedNgrams `shouldBe` exportedNgrams2
-- We test that if we try to import terms which, when merged with the existing,
-- would cause a loop, GGTX is capable of rejecting the request.
it "refuses to import terms which will lead to a loop" $ \(SpecContext testEnv port app _) -> do
-- would cause a loop but GGTX is capable of breaking them, serving the request.
it "allows importing terms which will lead to a loop (because it can break them)" $ \(SpecContext testEnv port app _) -> do
cId <- newCorpusForUser testEnv "alice"
let log_cfg = (test_config testEnv) ^. gc_logging
withApplication app $ do
......@@ -684,14 +685,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
}
ji <- checkEither $ liftIO $ runClientM (add_form_to_list token listId params) clientEnv
ji' <- pollUntilWorkFinished log_cfg port ji
-- Unfortunately we don't have a better way then to match on the stringified exception, sigh.
case _scst_events ji' of
Just [ScraperEvent{..}]
| Just msg <- _scev_message
-> liftIO $ msg `shouldSatisfy` \txt -> "Loop detected in terms: foo -> bar -> foo" `T.isInfixOf` txt
, "Loop detected in terms: foo -> bar -> foo" `T.isInfixOf` msg
-> fail (T.unpack msg)
| otherwise
-> fail "No suitable message in ScraperEvent."
_ -> fail "Expected job to fail, but it didn't"
-> pure () -- no loop!
_ -> pure () -- no loop!
createDocsList :: FilePath
-> TestEnv
......
......@@ -13,6 +13,7 @@ Portability : POSIX
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module Test.Instances where
......@@ -52,6 +53,7 @@ import Gargantext.API.Search.Types (SearchQuery(..), SearchResult(..), SearchRes
import Gargantext.API.Table.Types (TableQuery(..))
import Gargantext.API.Viz.Types (PhyloData)
import Gargantext.Core (Lang)
import Gargantext.Core.NodeStory hiding (buildForest)
import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET
import Gargantext.Core.Notifications.Dispatcher.Types qualified as DET
import Gargantext.Core.Text.Corpus.Query (RawQuery(..))
......@@ -785,16 +787,23 @@ genCorpusWithMatchingElement = do
depth <- choose (1, 5)
let mkEntry = do
trm <- arbitrary
el <- over ne_children (breakLoop trm) <$> (resize depth arbitrary)
el <- over ne_children (breakLoop trm) . makeItRoot <$> (resize depth arbitrary)
pure (trm, el { _ne_ngrams = trm })
-- Let's build the map first, so that duplicates will be overwritten.
fullMap <- (Map.fromList <$> vectorOf depth mkEntry) `suchThat` (\x -> isRight (buildForest x)) -- exclude loops
let (hd NE.:| _) = NE.fromList $ Map.elems fullMap
pure $ AcyclicTableMap fullMap hd
fullMapE <- buildForest (BreakLoop LBA_just_do_it) . Map.fromList <$> vectorOf depth mkEntry
case fullMapE of
Left e -> panicTrace (show e)
Right (destroyForest -> fullMap) -> do
let (hd NE.:| _) = NE.fromList $ map snd fullMap
pure $ AcyclicTableMap (Map.fromList fullMap) hd
where
breakLoop :: NgramsTerm -> MSet NgramsTerm -> MSet NgramsTerm
breakLoop t = mSetFromSet . Set.delete t . mSetToSet
makeItRoot :: NgramsElement -> NgramsElement
makeItRoot ne = ne & ne_root .~ Nothing
& ne_parent .~ Nothing
instance Arbitrary AcyclicTableMap where
arbitrary = genCorpusWithMatchingElement
shrink = shrinkTree
......
......@@ -235,8 +235,8 @@ testFlat05 = do
testForestSearchProp :: Property
testForestSearchProp = forAll arbitrary $ \(AcyclicTableMap ngramsTable el) -> do
case searchTableNgrams (Versioned 0 ngramsTable) (searchQuery el) of
Left (BFE_loop_detected err) -> fail (T.unpack $ renderLoop err)
Right res -> res ^. vc_data `shouldSatisfy` (elem (_ne_ngrams el) . map _ne_ngrams . getNgramsTable)
Left (BFE_loop_detected err) -> fail (T.unpack $ renderLoop err)
Right res -> res ^. vc_data `shouldSatisfy` (any (containsTerm (_ne_ngrams el)) . getNgramsTable)
where
searchQuery term = NgramsSearchQuery {
_nsq_limit = Limit 5
......@@ -255,7 +255,9 @@ testSearchNestedTerms :: Assertion
testSearchNestedTerms = do
case searchTableNgrams (Versioned 0 hierarchicalTableMap) searchQuery of
Left (BFE_loop_detected err) -> fail (T.unpack $ renderLoop err)
Right res -> res ^. vc_data `shouldSatisfy` (elem "ford" . map _ne_ngrams . getNgramsTable)
Right res ->
-- it should appear at the top level or as one of the children.
res ^. vc_data `shouldSatisfy` (any (containsTerm "ford") . getNgramsTable)
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 5
......@@ -267,6 +269,11 @@ testSearchNestedTerms = do
, _nsq_searchQuery = mockQueryFn (Just "ford")
}
-- | Returns True if the input 'NgramsElement' contains (either in the root or in the children)
-- the input term.
containsTerm :: NgramsTerm -> NgramsElement -> Bool
containsTerm t (NgramsElement{..}) = _ne_ngrams == t || any ((==) t) (mSetToList _ne_children)
-- Pagination tests
test_pagination_allTerms :: Assertion
......@@ -390,7 +397,7 @@ test_paginationQuantum = do
Left err -> fail (show err)
Right res -> do
let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res
length elems @?= 10
countRoots elems @?= 10
forM_ elems $ \term ->
assertBool ("found " <> show (_ne_list term) <> " in: " <> show elems) (_ne_list term == MapTerm)
where
......@@ -404,13 +411,20 @@ test_paginationQuantum = do
, _nsq_searchQuery = mockQueryFn Nothing
}
countRoots :: [NgramsElement] -> Int
countRoots [] = 0
countRoots (x:xs) =
if isNothing (_ne_root x) || isNothing (_ne_parent x)
then 1 + countRoots xs
else countRoots xs
test_paginationQuantum_02 :: Assertion
test_paginationQuantum_02 = do
case searchTableNgrams quantumComputingCorpus searchQuery of
Left err -> fail (show err)
Right res -> do
let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res
assertBool ("found only " <> show (length elems) <> " in: " <> show elems) (length elems == 10)
assertBool ("found only " <> show (length elems) <> " in: " <> show elems) (countRoots elems == 10)
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 10
......
......@@ -11,12 +11,15 @@ import Data.Char (isSpace)
import Data.Char qualified as Char
import Data.Map.Strict (Map)
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text qualified as T
import Data.Tree
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Types qualified as NT
import Gargantext.Core
import Gargantext.Core.NodeStory (LoopBreakAlgorithm(..), OnLoopDetectedStrategy(..))
import Gargantext.Core.NodeStory.Types (VisitedNode(..))
import Gargantext.Core.Text.Terms.Mono (isSep)
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Types
......@@ -109,6 +112,8 @@ tests = describe "Ngrams" $ do
it "building a complex deep tree works" testBuildNgramsTree_03
it "pruning a simple tree works" testPruningNgramsForest_01
it "pruning a complex tree works" testPruningNgramsForest_02
it "fails on loops if asked to do nothing" testLoopBreaker_01
it "breaks a simple loop (just do it algorithm)" testLoopBreaker_02
prop "destroyForest . buildForest === id" buildDestroyForestRoundtrips
describe "hierarchical grouping" $ do
it "filterNgramsNodes with empty query is identity" testFilterNgramsNodesEmptyQuery
......@@ -140,7 +145,12 @@ instance Show ASCIIForest where
show (ASCIIForest x) = x
buildForestOrFail :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForestOrFail mp = case buildForest mp of
buildForestOrFail mp = case buildForest FailOnLoop mp of
Left (BFE_loop_detected treeLoop) -> error (T.unpack $ renderLoop treeLoop)
Right x -> x
buildForestOrBreakLoop :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForestOrBreakLoop mp = case buildForest (BreakLoop LBA_just_do_it) mp of
Left (BFE_loop_detected treeLoop) -> error (T.unpack $ renderLoop treeLoop)
Right x -> x
......@@ -269,7 +279,7 @@ testBuildNgramsTree_03 =
-- /PRECONDITION/: The '_ne_ngrams' field always matches the 'NgramsTerm', key of the map.
buildDestroyForestRoundtrips :: AcyclicTableMap -> Property
buildDestroyForestRoundtrips (AcyclicTableMap mp _) =
(destroyForest . buildForestOrFail $ mp) === mp
(Map.fromList . destroyForest . buildForestOrFail $ mp) === mp
testPruningNgramsForest_01 :: Property
testPruningNgramsForest_01 =
......@@ -292,3 +302,21 @@ testPruningNgramsForest_02 =
`- ford
|]
testLoopBreaker_01 :: Property
testLoopBreaker_01 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_children .~ mSetFromList ["foo"])
]
in (buildForest FailOnLoop t1) === Left (BFE_loop_detected $ Set.fromList [ VN 0 "foo", VN 1 "bar"])
testLoopBreaker_02 :: Property
testLoopBreaker_02 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_children .~ mSetFromList ["foo"])
]
in (pruneForest $ buildForestOrBreakLoop t1) `compareForestVisually` [r|
bar
|
`- foo
|]
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment