Verified Commit 7d263e4d authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 494-dev-phylo-hh-mm-ss-2

parents d36a9126 706734c5
Pipeline #7981 passed with stages
in 53 minutes and 43 seconds
## Version 0.0.7.5.3
* [BACK][FIX][Resolve "[Server slowness] With the dev branch on the dev instance, we're experiencing a real slowness" (JobInfo changes)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/merge_requests/429)
* [BACK][OPTIM][Concurrent queries in NLP](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/merge_requests/451)
* [FRONT][FIX][[graph] restore proportional labels and make edges transparent](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/merge_requests/522)
* [BACK][FIX][Resolve "On IMT Instance : Error message or crashes when changing the status of terms in large batch"](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/merge_requests/445)
* [FRONT][FIX][graph: rework edge renderer based on rectangle renderer](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/merge_requests/523)
* [BACK][FIX][Break loops in Ngrams graphs](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/merge_requests/453)
## Version 0.0.7.5.2 ## Version 0.0.7.5.2
* [BACK/FRONT][FIX][[query] a prefixed query like ~prefix should transform to 'prefix', not '"prefix'](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/merge_requests/443) * [BACK/FRONT][FIX][[query] a prefixed query like ~prefix should transform to 'prefix', not '"prefix'](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/merge_requests/443)
......
...@@ -88,7 +88,8 @@ convertConfigs ini@(Ini.GargConfig { .. }) iniMail nlpConfig connInfo = ...@@ -88,7 +88,8 @@ convertConfigs ini@(Ini.GargConfig { .. }) iniMail nlpConfig connInfo =
, _wsLongJobTimeout = 3000 , _wsLongJobTimeout = 3000
, _wsDefaultDelay = 0 , _wsDefaultDelay = 0
, _wsAdditionalDelayAfterRead = 5 , _wsAdditionalDelayAfterRead = 5
, _wsDatabase = connInfo { PGS.connectDatabase = "pgmq"} } , _wsDatabase = connInfo { PGS.connectDatabase = "pgmq"}
, _wsNlpConduitChunkSize = 10 }
, _gc_logging = Config.LogConfig { , _gc_logging = Config.LogConfig {
_lc_log_level = INFO _lc_log_level = INFO
, _lc_log_file = Nothing , _lc_log_file = Nothing
......
...@@ -166,6 +166,10 @@ default_job_timeout = 60 ...@@ -166,6 +166,10 @@ default_job_timeout = 60
# default timeout for "long" jobs (in seconds) # default timeout for "long" jobs (in seconds)
long_job_timeout = 3000 long_job_timeout = 3000
# Batch size when sending data to NLP.
# Preferably, set as much as the number of CPUs
nlp_conduit_chunk_size = 10
# if you leave the same credentials as in [database] section above, # if you leave the same credentials as in [database] section above,
# workers will try to set up the `gargantext_pgmq` database # workers will try to set up the `gargantext_pgmq` database
# automatically # automatically
......
...@@ -5,7 +5,7 @@ cabal-version: 3.4 ...@@ -5,7 +5,7 @@ cabal-version: 3.4
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
name: gargantext name: gargantext
version: 0.0.7.5.2 version: 0.0.7.5.3
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
...@@ -570,6 +570,7 @@ library ...@@ -570,6 +570,7 @@ library
, json-stream ^>= 0.4.2.4 , json-stream ^>= 0.4.2.4
, lens >= 5.2.2 && < 5.3 , lens >= 5.2.2 && < 5.3
, lens-aeson < 1.3 , lens-aeson < 1.3
, lifted-async >= 0.10 && < 0.12
, list-zipper , list-zipper
, massiv < 1.1 , massiv < 1.1
, matrix ^>= 0.3.6.1 , matrix ^>= 0.3.6.1
...@@ -753,6 +754,7 @@ common commonTestDependencies ...@@ -753,6 +754,7 @@ common commonTestDependencies
, generic-arbitrary >= 1.0.1 && < 2 , generic-arbitrary >= 1.0.1 && < 2
, graphviz ^>= 2999.20.1.0 , graphviz ^>= 2999.20.1.0
, haskell-bee , haskell-bee
, haskell-bee-pgmq
, hspec ^>= 2.11.1 , hspec ^>= 2.11.1
, hspec-expectations >= 0.8 && < 0.9 , hspec-expectations >= 0.8 && < 0.9
, hspec-expectations-lifted < 0.11 , hspec-expectations-lifted < 0.11
......
...@@ -23,7 +23,7 @@ module Gargantext.API.Job ( ...@@ -23,7 +23,7 @@ module Gargantext.API.Job (
, addWarningEvent , addWarningEvent
) where ) where
import Control.Lens (over, _Just) import Control.Lens ((%~), over, _Just)
import Data.Text qualified as T import Data.Text qualified as T
import Gargantext.API.Admin.Orchestrator.Types import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.Prelude import Gargantext.Prelude
...@@ -66,7 +66,12 @@ jobLogComplete jl = ...@@ -66,7 +66,12 @@ jobLogComplete jl =
& over scst_remaining (const (Just 0)) & over scst_remaining (const (Just 0))
jobLogAddMore :: Int -> JobLog -> JobLog jobLogAddMore :: Int -> JobLog -> JobLog
jobLogAddMore moreSteps jl = jl & over (scst_remaining . _Just) (+ moreSteps) jobLogAddMore moreSteps jl =
jl & scst_remaining %~ (maybe (Just 0) Just)
& scst_succeeded %~ (maybe (Just 0) Just)
& scst_failed %~ (maybe (Just 0) Just)
& scst_events %~ (maybe (Just []) Just)
& (scst_remaining . _Just) %~ (+ moreSteps)
jobLogFailures :: Int -> JobLog -> JobLog jobLogFailures :: Int -> JobLog -> JobLog
jobLogFailures n jl = over (scst_failed . _Just) (+ n) $ jobLogFailures n jl = over (scst_failed . _Just) (+ n) $
......
...@@ -17,14 +17,12 @@ add get ...@@ -17,14 +17,12 @@ add get
{-# LANGUAGE BangPatterns #-} {-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE IncoherentInstances #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.API.Ngrams module Gargantext.API.Ngrams
( (
...@@ -114,7 +112,7 @@ import Gargantext.API.Ngrams.Types ...@@ -114,7 +112,7 @@ import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory hiding (buildForest) import Gargantext.Core.NodeStory hiding (buildForest)
import Gargantext.Core.NodeStory qualified as NodeStory import Gargantext.Core.NodeStory qualified as NodeStory
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType) import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, TODO, assertValid, ContextId, HasValidationError) import Gargantext.Core.Types (ListType(..), NodeId, ListId, TODO, assertValid, HasValidationError)
import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..)) import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast) import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
import Gargantext.Database.Prelude import Gargantext.Database.Prelude
...@@ -451,21 +449,23 @@ matchingNode listType minSize maxSize searchFn (Node inputNode children) = ...@@ -451,21 +449,23 @@ matchingNode listType minSize maxSize searchFn (Node inputNode children) =
-- | Version of 'buildForest' specialised over the 'NgramsElement' as the values of the tree. -- | Version of 'buildForest' specialised over the 'NgramsElement' as the values of the tree.
-- /IMPORTANT/: This functions returns an error in case we found a loop. -- /IMPORTANT/: This functions returns an error in case we found a loop.
buildForest :: Map NgramsTerm NgramsElement -> Either BuildForestError (Forest NgramsElement) buildForest :: OnLoopDetectedStrategy -> Map NgramsTerm NgramsElement -> Either BuildForestError (Forest NgramsElement)
buildForest = fmap (map (fmap snd)) . NodeStory.buildForest buildForest onLoopStrategy = fmap (map (fmap snd)) . NodeStory.buildForest onLoopStrategy
-- | Folds an Ngrams forest back to a table map. -- | Folds an Ngrams forest back to a table map.
-- This function doesn't aggregate information, but merely just recostructs the original -- This function doesn't aggregate information, but merely just recostructs the original
-- map without loss of information. To perform operations on the forest, use the appropriate -- map without loss of information. To perform operations on the forest, use the appropriate
-- functions. -- functions.
destroyForest :: Forest NgramsElement -> Map NgramsTerm NgramsElement -- /NOTA BENE:/ We return a list and not a Map because we might have sorted the forest, and
destroyForest f = Map.fromList . map (foldTree destroyTree) $ f -- converting into a map would trash the carefully-constructed sorting.
destroyForest :: Forest NgramsElement -> [(NgramsTerm, NgramsElement)]
destroyForest f = concatMap (map (\el -> (_ne_ngrams el, el)) . flatten) $ f
where where
destroyTree :: NgramsElement -> [(NgramsTerm, NgramsElement)] -> (NgramsTerm, NgramsElement) -- _destroyTree :: NgramsElement -> [(NgramsTerm, [NgramsElement])] -> (NgramsTerm, [NgramsElement])
destroyTree rootEl childrenEl = (_ne_ngrams rootEl, squashElements rootEl childrenEl) -- _destroyTree rootEl childrenEl = (_ne_ngrams rootEl, childrenEl)
squashElements :: NgramsElement -> [(NgramsTerm, NgramsElement)] -> NgramsElement -- _squashElements :: NgramsElement -> [(NgramsTerm, NgramsElement)] -> NgramsElement
squashElements r _ = r -- _squashElements r _ = r
-- | TODO Errors management -- | TODO Errors management
-- TODO: polymorphic for Annuaire or Corpus or ... -- TODO: polymorphic for Annuaire or Corpus or ...
...@@ -482,58 +482,63 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement) ...@@ -482,58 +482,63 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
-> Either BuildForestError (VersionedWithCount NgramsTable) -> Either BuildForestError (VersionedWithCount NgramsTable)
searchTableNgrams versionedTableMap NgramsSearchQuery{..} = searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
let tableMap = versionedTableMap ^. v_data let tableMap = versionedTableMap ^. v_data
in case buildForest tableMap of in case keepRoots <$> buildForest (BreakLoop LBA_just_do_it) tableMap of
Left err -> Left err Left err -> Left err
Right fs -> Right fs ->
let forestRoots = Set.fromList let forestRoots = filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery $ fs
. Map.elems
. destroyForest
. filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery
$ fs
tableMapSorted = versionedTableMap tableMapSorted = versionedTableMap
& v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ forestRoots) & v_data .~ (NgramsTable . map snd
. destroyForest
in Right $ toVersionedWithCount (Set.size forestRoots) tableMapSorted . sortAndPaginateForest _nsq_offset _nsq_limit _nsq_orderBy
. withInnersForest
$ forestRoots
)
in Right $ toVersionedWithCount (length forestRoots) tableMapSorted
keepRoots :: Forest NgramsElement -> Forest NgramsElement
keepRoots = filter (\(Node r _) -> isNothing (_ne_root r) || isNothing (_ne_parent r))
-- | For each input root, extends its occurrence count with
-- the information found in the subforest.
withInnersForest :: Forest NgramsElement -> Forest NgramsElement
withInnersForest = map sumSubitemsOccurrences
where where
sumSubitemsOccurrences :: Tree NgramsElement -> Tree NgramsElement
-- Sorts the input 'NgramsElement' list. sumSubitemsOccurrences (Node root children) =
-- /IMPORTANT/: As we might be sorting ngrams in all sorts of language, let children' = withInnersForest children
-- some of them might include letters with accents and other unicode symbols, root' = root { _ne_occurrences = (_ne_occurrences root) <> foldMap (_ne_occurrences . rootLabel) children' }
-- but we need to filter those /diacritics/ out so that the sorting would in Node root' children'
-- happen in the way users would expect. See ticket #331.
sortOnOrder :: Maybe OrderBy -> ([NgramsElement] -> [NgramsElement]) sortAndPaginateForest :: Maybe Offset
sortOnOrder Nothing = sortOnOrder (Just ScoreDesc) -> Limit
sortOnOrder (Just TermAsc) = List.sortBy ngramTermsAscSorter -> Maybe OrderBy
sortOnOrder (Just TermDesc) = List.sortBy ngramTermsDescSorter -> Forest NgramsElement
sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to Set.size) -> Forest NgramsElement
sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to Set.size) sortAndPaginateForest mb_offset limit orderBy xs =
let offset' = getOffset $ maybe 0 identity mb_offset
ngramTermsAscSorter = on unicodeDUCETSorter (unNgramsTerm . view ne_ngrams) in take (getLimit limit)
ngramTermsDescSorter = on (\n1 n2 -> unicodeDUCETSorter n2 n1) (unNgramsTerm . view ne_ngrams) . drop offset'
. sortOnOrderForest orderBy
-- | For each input root, extends its occurrence count with $ xs
-- the information found in the subitems.
withInners :: Map NgramsTerm NgramsElement -> Set NgramsElement -> Set NgramsElement -- Sorts the input 'NgramsElement' list.
withInners tblMap roots = Set.map addSubitemsOccurrences roots -- /IMPORTANT/: As we might be sorting ngrams in all sorts of language,
where -- some of them might include letters with accents and other unicode symbols,
addSubitemsOccurrences :: NgramsElement -> NgramsElement -- but we need to filter those /diacritics/ out so that the sorting would
addSubitemsOccurrences e = -- happen in the way users would expect. See ticket #331.
e { _ne_occurrences = foldl' alterOccurrences (e ^. ne_occurrences) (e ^. ne_children) } sortOnOrderForest :: Maybe OrderBy -> (Forest NgramsElement -> Forest NgramsElement)
sortOnOrderForest Nothing = sortOnOrderForest (Just ScoreDesc)
alterOccurrences :: Set ContextId -> NgramsTerm -> Set ContextId sortOnOrderForest (Just TermAsc) = List.sortBy (\(Node t1 _) (Node t2 _) -> ngramTermsAscSorter t1 t2)
alterOccurrences occs t = case Map.lookup t tblMap of sortOnOrderForest (Just TermDesc) = List.sortBy (\(Node t1 _) (Node t2 _) -> ngramTermsDescSorter t1 t2)
Nothing -> occs sortOnOrderForest (Just ScoreAsc) = List.sortOn $ \(Node root _) -> root ^. (ne_occurrences . to Set.size)
Just e' -> occs <> e' ^. ne_occurrences sortOnOrderForest (Just ScoreDesc) = List.sortOn $ Down . (\(Node root _) -> root ^. (ne_occurrences . to Set.size))
-- | Paginate the results ngramTermsAscSorter :: NgramsElement -> NgramsElement -> Ordering
sortAndPaginate :: Set NgramsElement -> [NgramsElement] ngramTermsAscSorter = on unicodeDUCETSorter (unNgramsTerm . view ne_ngrams)
sortAndPaginate xs =
let offset' = getOffset $ maybe 0 identity _nsq_offset ngramTermsDescSorter :: NgramsElement -> NgramsElement -> Ordering
in take (getLimit _nsq_limit) ngramTermsDescSorter = on (\n1 n2 -> unicodeDUCETSorter n2 n1) (unNgramsTerm . view ne_ngrams)
. drop offset'
. sortOnOrder _nsq_orderBy
. Set.toList
$ xs
-- | This function allows sorting two texts via their unicode sorting -- | This function allows sorting two texts via their unicode sorting
-- (as opposed as the standard lexicographical sorting) by relying on -- (as opposed as the standard lexicographical sorting) by relying on
......
...@@ -111,6 +111,9 @@ mSetFromSet = MSet . Map.fromSet (const ()) ...@@ -111,6 +111,9 @@ mSetFromSet = MSet . Map.fromSet (const ())
mSetFromList :: Ord a => [a] -> MSet a mSetFromList :: Ord a => [a] -> MSet a
mSetFromList = MSet . Map.fromList . map (\x -> (x, ())) mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
mSetDifference :: Ord a => MSet a -> MSet a -> MSet a
mSetDifference (MSet m1) (MSet m2) = MSet (m1 `Map.difference` m2)
-- mSetToSet :: Ord a => MSet a -> Set a -- mSetToSet :: Ord a => MSet a -> Set a
-- mSetToSet (MSet a) = Set.fromList ( Map.keys a) -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
mSetToSet :: Ord a => MSet a -> Set a mSetToSet :: Ord a => MSet a -> Set a
......
...@@ -131,7 +131,7 @@ updateNode phyloId (UpdateNodePhylo config) jobHandle = do ...@@ -131,7 +131,7 @@ updateNode phyloId (UpdateNodePhylo config) jobHandle = do
let corpusId = fromMaybe (panicTrace "no corpus id") corpusId' let corpusId = fromMaybe (panicTrace "no corpus id") corpusId'
phy <- timeMeasured "updateNode.flowPhyloAPI" $ flowPhyloAPI (subConfigAPI2config config) mbComputeHistory corpusId phy <- timeMeasured "updateNode.flowPhyloAPI" $ flowPhyloAPI (subConfigAPI2config config) mbComputeHistory corpusId jobHandle
markProgress 1 jobHandle markProgress 1 jobHandle
{- {-
......
...@@ -4,7 +4,6 @@ module Gargantext.API.Routes.Named.Viz ( ...@@ -4,7 +4,6 @@ module Gargantext.API.Routes.Named.Viz (
-- * Routes types -- * Routes types
PhyloAPI(..) PhyloAPI(..)
, GetPhylo(..) , GetPhylo(..)
, PostPhylo(..)
, GraphAPI(..) , GraphAPI(..)
, GraphAsyncAPI(..) , GraphAsyncAPI(..)
, GraphVersionsAPI(..) , GraphVersionsAPI(..)
...@@ -31,7 +30,6 @@ import Servant.XML.Conduit (XML) ...@@ -31,7 +30,6 @@ import Servant.XML.Conduit (XML)
data PhyloAPI mode = PhyloAPI data PhyloAPI mode = PhyloAPI
{ getPhyloEp :: mode :- Summary "Phylo API" :> NamedRoutes GetPhylo { getPhyloEp :: mode :- Summary "Phylo API" :> NamedRoutes GetPhylo
, postPhyloEp :: mode :- NamedRoutes PostPhylo
} deriving Generic } deriving Generic
...@@ -43,9 +41,9 @@ newtype GetPhylo mode = GetPhylo ...@@ -43,9 +41,9 @@ newtype GetPhylo mode = GetPhylo
} deriving Generic } deriving Generic
newtype PostPhylo mode = PostPhylo -- newtype PostPhylo mode = PostPhylo
{ postPhyloByListIdEp :: mode :- QueryParam "listId" ListId :> (Post '[JSON] NodeId) -- { postPhyloByListIdEp :: mode :- QueryParam "listId" ListId :> (Post '[JSON] NodeId)
} deriving Generic -- } deriving Generic
-- | There is no Delete specific API for Graph since it can be deleted -- | There is no Delete specific API for Graph since it can be deleted
......
...@@ -9,19 +9,12 @@ Portability : POSIX ...@@ -9,19 +9,12 @@ Portability : POSIX
-} -}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Config.Ini.NLP ( module Gargantext.Core.Config.Ini.NLP (
-- * Types -- * Types
NLPConfig(..) NLPConfig(..)
-- * Utility functions -- * Utility functions
, readConfig , readConfig
-- * Lenses
, nlp_default
, nlp_languages
) )
where where
...@@ -59,4 +52,3 @@ readConfig fp = do ...@@ -59,4 +52,3 @@ readConfig fp = do
, T.pack $ show m_nlp_other ] , T.pack $ show m_nlp_other ]
Just ret -> pure ret Just ret -> pure ret
makeLenses ''NLPConfig
...@@ -19,7 +19,6 @@ module Gargantext.Core.Config.NLP ( ...@@ -19,7 +19,6 @@ module Gargantext.Core.Config.NLP (
-- * Lenses -- * Lenses
, nlp_default , nlp_default
, nlp_languages , nlp_languages
) )
where where
...@@ -48,9 +47,9 @@ data NLPConfig = NLPConfig { _nlp_default :: URI ...@@ -48,9 +47,9 @@ data NLPConfig = NLPConfig { _nlp_default :: URI
instance FromValue NLPConfig where instance FromValue NLPConfig where
fromValue v = do fromValue v = do
_nlp_default <- parseTableFromValue (reqKey "EN") v _nlp_default <- parseTableFromValue (reqKey "EN") v
-- _nlp_languages <- fromValue <$> getTable
MkTable t <- parseTableFromValue getTable v MkTable t <- parseTableFromValue getTable v
_nlp_languages <- mapM fromValue (snd <$> t) _nlp_languages <- mapM fromValue (snd <$> t)
return $ NLPConfig { .. } return $ NLPConfig { .. }
instance ToValue NLPConfig where instance ToValue NLPConfig where
toValue = defaultTableToValue toValue = defaultTableToValue
...@@ -58,7 +57,7 @@ instance ToTable NLPConfig where ...@@ -58,7 +57,7 @@ instance ToTable NLPConfig where
toTable (NLPConfig { .. }) = toTable (NLPConfig { .. }) =
table ([ k .= v | (k, v) <- Map.toList _nlp_languages ] table ([ k .= v | (k, v) <- Map.toList _nlp_languages ]
-- output the default "EN" language as well -- output the default "EN" language as well
<> [ ("EN" :: Text) .= _nlp_default ]) <> [ ("EN" :: Text) .= _nlp_default ] )
-- readConfig :: SettingsFile -> IO NLPConfig -- readConfig :: SettingsFile -> IO NLPConfig
......
...@@ -53,6 +53,8 @@ data WorkerSettings = ...@@ -53,6 +53,8 @@ data WorkerSettings =
, _wsDefaultDelay :: B.TimeoutS , _wsDefaultDelay :: B.TimeoutS
, _wsAdditionalDelayAfterRead :: B.TimeoutS , _wsAdditionalDelayAfterRead :: B.TimeoutS
, _wsDefinitions :: ![WorkerDefinition] , _wsDefinitions :: ![WorkerDefinition]
, _wsNlpConduitChunkSize :: Int
} deriving (Show, Eq) } deriving (Show, Eq)
instance FromValue WorkerSettings where instance FromValue WorkerSettings where
fromValue = parseTableFromValue $ do fromValue = parseTableFromValue $ do
...@@ -61,6 +63,7 @@ instance FromValue WorkerSettings where ...@@ -61,6 +63,7 @@ instance FromValue WorkerSettings where
_wsDefaultVisibilityTimeout <- reqKey "default_visibility_timeout" _wsDefaultVisibilityTimeout <- reqKey "default_visibility_timeout"
_wsDefaultJobTimeout <- reqKey "default_job_timeout" _wsDefaultJobTimeout <- reqKey "default_job_timeout"
_wsLongJobTimeout <- reqKey "long_job_timeout" _wsLongJobTimeout <- reqKey "long_job_timeout"
_wsNlpConduitChunkSize <- reqKey "nlp_conduit_chunk_size"
defaultDelay <- reqKey "default_delay" defaultDelay <- reqKey "default_delay"
additionalDelayAfterRead <- reqKey "additional_delay_after_read" additionalDelayAfterRead <- reqKey "additional_delay_after_read"
return $ WorkerSettings { _wsDatabase = unTOMLConnectInfo dbConfig return $ WorkerSettings { _wsDatabase = unTOMLConnectInfo dbConfig
...@@ -69,7 +72,8 @@ instance FromValue WorkerSettings where ...@@ -69,7 +72,8 @@ instance FromValue WorkerSettings where
, _wsDefinitions , _wsDefinitions
, _wsDefaultVisibilityTimeout , _wsDefaultVisibilityTimeout
, _wsDefaultDelay = B.TimeoutS defaultDelay , _wsDefaultDelay = B.TimeoutS defaultDelay
, _wsAdditionalDelayAfterRead = B.TimeoutS additionalDelayAfterRead } , _wsAdditionalDelayAfterRead = B.TimeoutS additionalDelayAfterRead
, _wsNlpConduitChunkSize }
instance ToValue WorkerSettings where instance ToValue WorkerSettings where
toValue = defaultTableToValue toValue = defaultTableToValue
instance ToTable WorkerSettings where instance ToTable WorkerSettings where
...@@ -80,7 +84,8 @@ instance ToTable WorkerSettings where ...@@ -80,7 +84,8 @@ instance ToTable WorkerSettings where
, "default_visibility_timeout" .= _wsDefaultVisibilityTimeout , "default_visibility_timeout" .= _wsDefaultVisibilityTimeout
, "default_delay" .= B._TimeoutS _wsDefaultDelay , "default_delay" .= B._TimeoutS _wsDefaultDelay
, "additional_delay_after_read" .= B._TimeoutS _wsAdditionalDelayAfterRead , "additional_delay_after_read" .= B._TimeoutS _wsAdditionalDelayAfterRead
, "definitions" .= _wsDefinitions ] , "definitions" .= _wsDefinitions
, "nlp_conduit_chunk_size" .= _wsNlpConduitChunkSize ]
data WorkerDefinition = data WorkerDefinition =
WorkerDefinition { WorkerDefinition {
......
This diff is collapsed.
...@@ -42,6 +42,7 @@ module Gargantext.Core.NodeStory.Types ...@@ -42,6 +42,7 @@ module Gargantext.Core.NodeStory.Types
, ArchiveState , ArchiveState
, ArchiveStateSet , ArchiveStateSet
, ArchiveStateList , ArchiveStateList
, allVisitedNgramsTerms
-- * Errors -- * Errors
, HasNodeStoryError(..) , HasNodeStoryError(..)
...@@ -197,7 +198,7 @@ data BuildForestError ...@@ -197,7 +198,7 @@ data BuildForestError
= -- We found a loop, something that shouldn't normally happen if the calling = -- We found a loop, something that shouldn't normally happen if the calling
-- code is correct by construction, but if that does happen, the value will -- code is correct by construction, but if that does happen, the value will
-- contain the full path to the cycle. -- contain the full path to the cycle.
BFE_loop_detected !(Set VisitedNode) BFE_loop_detected !(Set VisitedNode)
deriving (Show, Eq) deriving (Show, Eq)
instance ToHumanFriendlyError BuildForestError where instance ToHumanFriendlyError BuildForestError where
...@@ -217,6 +218,9 @@ data VisitedNode = ...@@ -217,6 +218,9 @@ data VisitedNode =
VN { _vn_position :: !Int, _vn_term :: !NgramsTerm } VN { _vn_position :: !Int, _vn_term :: !NgramsTerm }
deriving (Show) deriving (Show)
allVisitedNgramsTerms :: Set VisitedNode -> Set NgramsTerm
allVisitedNgramsTerms = Set.map _vn_term
-- /NOTA BENE/: It's important to use this custom instance for the loop detector -- /NOTA BENE/: It's important to use this custom instance for the loop detector
-- to work correctly. If we stop comparing on the terms the loop detector .. will loop. -- to work correctly. If we stop comparing on the terms the loop detector .. will loop.
instance Eq VisitedNode where instance Eq VisitedNode where
......
...@@ -26,14 +26,12 @@ import Gargantext.API.Routes.Named.Viz qualified as Named ...@@ -26,14 +26,12 @@ import Gargantext.API.Routes.Named.Viz qualified as Named
import Gargantext.API.Viz.Types import Gargantext.API.Viz.Types
import Gargantext.Core.Types.Phylo (GraphData(..)) import Gargantext.Core.Types.Phylo (GraphData(..))
import Gargantext.Core.Viz.LegacyPhylo hiding (Phylo(..)) import Gargantext.Core.Viz.LegacyPhylo hiding (Phylo(..))
import Gargantext.Core.Viz.Phylo (PhyloConfig(..), defaultConfig, _phylo_param, _phyloParam_config) import Gargantext.Core.Viz.Phylo (PhyloConfig(..), _phylo_param, _phyloParam_config)
import Gargantext.Core.Viz.Phylo.API.Tools import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Database.Prelude import Gargantext.Database.Prelude
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node -- (PhyloId, ListId, CorpusId, UserId, NodeId(..)) import Gargantext.Database.Admin.Types.Node -- (PhyloId, ListId, CorpusId, UserId, NodeId(..))
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType, defaultList) import Gargantext.Database.Query.Table.Node (getClosestParentIdByType, defaultList)
import Gargantext.Database.Query.Table.Node.Error import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Prelude import Gargantext.Prelude
import Servant import Servant
import Servant.Server.Generic (AsServerT) import Servant.Server.Generic (AsServerT)
...@@ -42,7 +40,6 @@ import Web.HttpApiData (readTextData) ...@@ -42,7 +40,6 @@ import Web.HttpApiData (readTextData)
phyloAPI :: IsGargServer err env m => PhyloId -> Named.PhyloAPI (AsServerT m) phyloAPI :: IsGargServer err env m => PhyloId -> Named.PhyloAPI (AsServerT m)
phyloAPI n = Named.PhyloAPI phyloAPI n = Named.PhyloAPI
{ getPhyloEp = getPhylo n { getPhyloEp = getPhylo n
, postPhyloEp = postPhylo n
} }
-- :<|> putPhylo n -- :<|> putPhylo n
-- :<|> deletePhylo n -- :<|> deletePhylo n
...@@ -94,35 +91,6 @@ getPhyloDataJson phyloId = do ...@@ -94,35 +91,6 @@ getPhyloDataJson phyloId = do
-- pure (SVG p) -- pure (SVG p)
-- FIXME(adn) This handler mixes DB reads with updates outside of the same
-- transaction, due to the call to 'flowPhyloAPI' in the middle.
postPhylo :: IsGargServer err env m => PhyloId -> Named.PostPhylo (AsServerT m)
postPhylo phyloId = Named.PostPhylo $ \_lId -> do
-- TODO get Reader settings
-- s <- ask
-- let
-- _vrs = Just ("1" :: Text)
-- _sft = Just (Software "Gargantext" "4")
-- _prm = initPhyloParam vrs sft (Just q)
corpusId <- runDBQuery $ getClosestParentIdByType phyloId NodeCorpus
-- Being the first time we ask for the Phylo, there is no historical data
-- available about computing time, so we pass 'Nothing'.
phy <- flowPhyloAPI defaultConfig Nothing (fromMaybe (panicTrace "[G.C.V.P.API] no corpus ID found") corpusId) -- params
-- phyloId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just corpusId) userId]
_ <- runDBTx $ updateHyperdata phyloId (HyperdataPhylo Nothing (Just phy))
pure phyloId
------------------------------------------------------------------------
-- | DELETE Phylo == delete a node
------------------------------------------------------------------------
------------------------------------------------------------------------
{-
type PutPhylo = (Put '[JSON] Phylo )
--putPhylo :: PhyloId -> Maybe ListId -> PhyloQueryBuild -> Phylo
putPhylo :: PhyloId -> GargServer PutPhylo
putPhylo = undefined
-}
-- | Instances -- | Instances
instance FromHttpApiData DisplayMode where parseUrlPiece = readTextData instance FromHttpApiData DisplayMode where parseUrlPiece = readTextData
......
...@@ -51,6 +51,7 @@ import Gargantext.Database.Schema.Context ( ContextPoly(_context_hyperdata, _con ...@@ -51,6 +51,7 @@ import Gargantext.Database.Schema.Context ( ContextPoly(_context_hyperdata, _con
import Gargantext.Database.Schema.Node ( NodePoly(_node_hyperdata), node_hyperdata ) import Gargantext.Database.Schema.Node ( NodePoly(_node_hyperdata), node_hyperdata )
import Gargantext.Prelude hiding (to) import Gargantext.Prelude hiding (to)
import Gargantext.System.Logging ( MonadLogger, LogLevel(DEBUG), logLocM ) import Gargantext.System.Logging ( MonadLogger, LogLevel(DEBUG), logLocM )
import Gargantext.Utils.Jobs.Monad (JobHandle, MonadJobStatus(markProgress, addMoreSteps))
import Gargantext.Utils.UTCTime (timeMeasured, timeMeasured'') import Gargantext.Utils.UTCTime (timeMeasured, timeMeasured'')
import Prelude qualified import Prelude qualified
import System.FilePath ((</>)) import System.FilePath ((</>))
...@@ -112,25 +113,35 @@ phylo2dot phylo = do ...@@ -112,25 +113,35 @@ phylo2dot phylo = do
_ -> pure value _ -> pure value
flowPhyloAPI :: (HasNodeStory env err m, HasNodeError err, MonadLogger m) flowPhyloAPI :: (HasNodeStory env err m, HasNodeError err, MonadLogger m, MonadJobStatus m)
=> PhyloConfig => PhyloConfig
-> Maybe ComputeTimeHistory -> Maybe ComputeTimeHistory
-- ^ Previous compute time historical data, if any. -- ^ Previous compute time historical data, if any.
-> CorpusId -> CorpusId
-> JobHandle m
-> m Phylo -> m Phylo
flowPhyloAPI config mbOldComputeHistory cId = do flowPhyloAPI config mbOldComputeHistory cId jobHandle = do
env <- view hasNodeStory env <- view hasNodeStory
addMoreSteps 5 jobHandle
corpus <- timeMeasured "flowPhyloAPI.corpusIdtoDocuments" $ runDBQuery $ corpusIdtoDocuments env (timeUnit config) cId corpus <- timeMeasured "flowPhyloAPI.corpusIdtoDocuments" $ runDBQuery $ corpusIdtoDocuments env (timeUnit config) cId
markProgress 1 jobHandle
-- writePhylo phyloWithCliquesFile phyloWithCliques -- writePhylo phyloWithCliquesFile phyloWithCliques
$(logLocM) DEBUG $ "PhyloConfig old: " <> show config $(logLocM) DEBUG $ "PhyloConfig old: " <> show config
(t1, phyloWithCliques) <- timeMeasured'' DEBUG "flowPhyloAPI.phyloWithCliques" (pure $! toPhyloWithoutLink corpus config) (t1, phyloWithCliques) <- timeMeasured'' DEBUG "flowPhyloAPI.phyloWithCliques" (pure $! toPhyloWithoutLink corpus config)
markProgress 1 jobHandle
(t2, phyloConfigured) <- timeMeasured'' DEBUG "flowPhyloAPI.phyloConfigured" (pure $! setConfig config phyloWithCliques) (t2, phyloConfigured) <- timeMeasured'' DEBUG "flowPhyloAPI.phyloConfigured" (pure $! setConfig config phyloWithCliques)
markProgress 1 jobHandle
(t3, finalPhylo) <- timeMeasured'' DEBUG "flowPhyloAPI.toPhylo" (pure $! toPhylo phyloConfigured) (t3, finalPhylo) <- timeMeasured'' DEBUG "flowPhyloAPI.toPhylo" (pure $! toPhylo phyloConfigured)
markProgress 1 jobHandle
-- As the phylo is computed fresh every time, without looking at the one stored (if any), we -- As the phylo is computed fresh every time, without looking at the one stored (if any), we
-- have to manually propagate computing time across. -- have to manually propagate computing time across.
pure $! trackComputeTime (t1 + t2 + t3) (finalPhylo { _phylo_computeTime = mbOldComputeHistory }) let ret = trackComputeTime (t1 + t2 + t3) (finalPhylo { _phylo_computeTime = mbOldComputeHistory })
markProgress 1 jobHandle
pure ret
-------------------------------------------------------------------- --------------------------------------------------------------------
corpusIdtoDocuments :: HasNodeError err corpusIdtoDocuments :: HasNodeError err
......
...@@ -16,7 +16,7 @@ module Gargantext.Core.Viz.Phylo.PhyloMaker where ...@@ -16,7 +16,7 @@ module Gargantext.Core.Viz.Phylo.PhyloMaker where
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parMap, rpar) import Control.Parallel.Strategies (parMap, rpar, Strategy)
import Data.Containers.ListUtils (nubOrd) import Data.Containers.ListUtils (nubOrd)
import Data.Discrimination qualified as D import Data.Discrimination qualified as D
import Data.List (partition, intersect) import Data.List (partition, intersect)
...@@ -37,6 +37,13 @@ import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering) ...@@ -37,6 +37,13 @@ import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (toPhyloQuality, temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toSimilarity) import Gargantext.Core.Viz.Phylo.TemporalMatching (toPhyloQuality, temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toSimilarity)
import Gargantext.Prelude hiding (empty, toList) import Gargantext.Prelude hiding (empty, toList)
defaultStrategy :: Strategy a
defaultStrategy = rpar
------------------ ------------------
-- | To Phylo | -- -- | To Phylo | --
------------------ ------------------
...@@ -151,7 +158,7 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd ...@@ -151,7 +158,7 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
-------- --------
-- 1.1) for each measure of similarity, prune the flat phylo, compute the branches and estimate the quality -- 1.1) for each measure of similarity, prune the flat phylo, compute the branches and estimate the quality
qua :: [Double] qua :: [Double]
qua = parMap rpar (\thr -> qua = parMap defaultStrategy (\thr ->
let edges = filter (\edge -> snd edge >= thr) graph let edges = filter (\edge -> snd edge >= thr) graph
nodes = nubOrd $ concatMap (\((n,n'),_) -> [n,n']) edges nodes = nubOrd $ concatMap (\((n,n'),_) -> [n,n']) edges
branches = toRelatedComponents nodes edges branches = toRelatedComponents nodes edges
...@@ -192,7 +199,7 @@ findSeaLadder phylo = case getSeaElevation phylo of ...@@ -192,7 +199,7 @@ findSeaLadder phylo = case getSeaElevation phylo of
docs = filterDocs (getDocsByDate phylo) ([period] ++ next) docs = filterDocs (getDocsByDate phylo) ([period] ++ next)
diagos = filterDiago (getCoocByDate phylo) ([period] ++ next) diagos = filterDiago (getCoocByDate phylo) ([period] ++ next)
-- 1.2) compute the kinship similarities between pairs of source & target in parallel -- 1.2) compute the kinship similarities between pairs of source & target in parallel
pairs = parMap rpar (\source -> pairs = parMap defaultStrategy (\source ->
let candidates = filter (\target -> (> 2) $ length let candidates = filter (\target -> (> 2) $ length
$ intersect (getGroupNgrams source) (getGroupNgrams target)) targets $ intersect (getGroupNgrams source) (getGroupNgrams target)) targets
in map (\target -> in map (\target ->
...@@ -329,7 +336,7 @@ filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. clustering_roots) >= ...@@ -329,7 +336,7 @@ filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. clustering_roots) >=
-- To filter nested Fis -- To filter nested Fis
filterCliqueByNested :: Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering] filterCliqueByNested :: Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
filterCliqueByNested m = filterCliqueByNested m =
let clq = parMap rpar (\l -> let clq = parMap defaultStrategy (\l ->
foldl (\mem f -> if (any (\f' -> isNested (f' ^. clustering_roots) (f ^. clustering_roots)) mem) foldl (\mem f -> if (any (\f' -> isNested (f' ^. clustering_roots) (f ^. clustering_roots)) mem)
then mem then mem
else else
...@@ -448,7 +455,7 @@ groupDocsByPeriodRec f prds docs acc = ...@@ -448,7 +455,7 @@ groupDocsByPeriodRec f prds docs acc =
groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod' f pds docs = groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
periods = parMap rpar (inPeriode f docs') pds periods = parMap defaultStrategy (inPeriode f docs') pds
in tracePhylo ("\n" <> "-- | Group " in tracePhylo ("\n" <> "-- | Group "
<> show (length docs) <> show (length docs)
<> " docs by " <> " docs by "
...@@ -466,7 +473,7 @@ groupDocsByPeriod' f pds docs = ...@@ -466,7 +473,7 @@ groupDocsByPeriod' f pds docs =
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods" groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es = groupDocsByPeriod f pds es =
let periods = parMap rpar (inPeriode f es) pds let periods = parMap defaultStrategy (inPeriode f es) pds
in tracePhylo ("\n" <> "-- | Group " in tracePhylo ("\n" <> "-- | Group "
<> show (length es) <> " docs by " <> show (length es) <> " docs by "
......
...@@ -9,8 +9,9 @@ Portability : POSIX ...@@ -9,8 +9,9 @@ Portability : POSIX
-} -}
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE UndecidableInstances #-}
{-# OPTIONS_GHC -Wno-orphans #-} -- orphan HasNodeError BackendInternalError {-# OPTIONS_GHC -Wno-orphans #-} -- orphan HasNodeError BackendInternalError
...@@ -25,6 +26,7 @@ import Control.Lens.TH ...@@ -25,6 +26,7 @@ import Control.Lens.TH
import Control.Monad.Trans.Control (MonadBaseControl) import Control.Monad.Trans.Control (MonadBaseControl)
import Data.Maybe (fromJust) import Data.Maybe (fromJust)
import Data.Pool qualified as Pool import Data.Pool qualified as Pool
import Data.Text qualified as T
import Database.PostgreSQL.Simple qualified as PSQL import Database.PostgreSQL.Simple qualified as PSQL
import Gargantext.API.Admin.Orchestrator.Types (JobLog, noJobLog) import Gargantext.API.Admin.Orchestrator.Types (JobLog, noJobLog)
import Gargantext.API.Errors (BackendInternalError) import Gargantext.API.Errors (BackendInternalError)
...@@ -42,7 +44,7 @@ import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET ...@@ -42,7 +44,7 @@ import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET
import Gargantext.Core.Worker.Types (JobInfo(..)) import Gargantext.Core.Worker.Types (JobInfo(..))
import Gargantext.Database.Prelude (HasConnectionPool(..)) import Gargantext.Database.Prelude (HasConnectionPool(..))
import Gargantext.Prelude hiding (to) import Gargantext.Prelude hiding (to)
import Gargantext.System.Logging (HasLogger(..), Logger, LogLevel(..), MonadLogger(..), withLogger, logMsg, withLoggerIO) import Gargantext.System.Logging (HasLogger(..), Logger, LogLevel(..), MonadLogger(..), withLogger, logMsg, logLocM, withLoggerIO)
import Gargantext.System.Logging.Loggers import Gargantext.System.Logging.Loggers
import Gargantext.Utils.Jobs.Monad ( MonadJobStatus(..), JobHandle ) import Gargantext.Utils.Jobs.Monad ( MonadJobStatus(..), JobHandle )
import System.Log.FastLogger qualified as FL import System.Log.FastLogger qualified as FL
...@@ -182,9 +184,21 @@ instance MonadJobStatus WorkerMonad where ...@@ -182,9 +184,21 @@ instance MonadJobStatus WorkerMonad where
type JobEventType WorkerMonad = JobLog type JobEventType WorkerMonad = JobLog
noJobHandle Proxy = WorkerNoJobHandle noJobHandle Proxy = WorkerNoJobHandle
getLatestJobStatus _ = WorkerMonad (pure noJobLog) getLatestJobStatus WorkerNoJobHandle = pure noJobLog
getLatestJobStatus (WorkerJobHandle ji) = do
stateTVar <- asks _w_env_job_state
state' <- liftIO $ readTVarIO stateTVar
pure $ case state' of
Nothing -> noJobLog
Just wjs ->
if _wjs_job_info wjs == ji
then
_wjs_job_log wjs
else
noJobLog
withTracer _ jh n = n jh withTracer _ jh n = n jh
markStarted n jh = updateJobProgress jh (const $ jobLogStart $ RemainingSteps n) markStarted n jh =
updateJobProgress jh (const $ jobLogStart $ RemainingSteps n)
markProgress steps jh = updateJobProgress jh (jobLogProgress steps) markProgress steps jh = updateJobProgress jh (jobLogProgress steps)
markFailure steps mb_msg jh = markFailure steps mb_msg jh =
updateJobProgress jh (\latest -> case mb_msg of updateJobProgress jh (\latest -> case mb_msg of
...@@ -208,7 +222,9 @@ updateJobProgress (WorkerJobHandle (ji@JobInfo { _ji_message_id })) f = do ...@@ -208,7 +222,9 @@ updateJobProgress (WorkerJobHandle (ji@JobInfo { _ji_message_id })) f = do
case state' of case state' of
Nothing -> pure () Nothing -> pure ()
Just wjs -> do Just wjs -> do
CET.ce_notify $ CET.UpdateWorkerProgress ji (_wjs_job_log wjs) (CET.ce_notify $ CET.UpdateWorkerProgress ji (_wjs_job_log wjs))
`CES.catch` (\(e :: SomeException) ->
$(logLocM) WARNING $ T.pack $ displayException e)
where where
updateState mwjs = updateState mwjs =
let initJobLog = let initJobLog =
......
...@@ -47,9 +47,11 @@ sendJobWithCfg gcConfig job = do ...@@ -47,9 +47,11 @@ sendJobWithCfg gcConfig job = do
b <- initBrokerWithDBCreate (gcConfig ^. gc_database_config) ws b <- initBrokerWithDBCreate (gcConfig ^. gc_database_config) ws
let queueName = _wdQueue wd let queueName = _wdQueue wd
let addDelayAfterRead = gcConfig ^. gc_worker . wsAdditionalDelayAfterRead let addDelayAfterRead = gcConfig ^. gc_worker . wsAdditionalDelayAfterRead
let job' = (updateJobData ws job $ W.mkDefaultSendJob' b queueName job) { W.delay = _wsDefaultDelay let sj = (W.mkDefaultSendJob' b queueName job) { W.delay = _wsDefaultDelay
, W.addDelayAfterRead = B._TimeoutS addDelayAfterRead , W.addDelayAfterRead = B._TimeoutS addDelayAfterRead
, W.toStrat = WT.TSDelete } -- don't allow to repeat infinitely (see #495)
, W.toStrat = WT.TSDelete }
let job' = updateJobData ws job sj
withLogger (gcConfig ^. gc_logging) $ \ioL -> withLogger (gcConfig ^. gc_logging) $ \ioL ->
$(logLoc) ioL DEBUG $ "[sendJob] sending job " <> show job <> " (delay " <> show (W.delay job') <> ")" $(logLoc) ioL DEBUG $ "[sendJob] sending job " <> show job <> " (delay " <> show (W.delay job') <> ")"
W.sendJob' job' W.sendJob' job'
......
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE StandaloneDeriving #-}
{-| {-|
Module : Gargantext.Core.Worker.Types Module : Gargantext.Core.Worker.Types
Description : Some useful worker types Description : Some useful worker types
......
...@@ -55,6 +55,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list) ...@@ -55,6 +55,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
where where
import Conduit import Conduit
import Control.Concurrent.Async.Lifted qualified as AsyncL
import Control.Exception.Safe qualified as CES import Control.Exception.Safe qualified as CES
import Control.Lens ( to, view ) import Control.Lens ( to, view )
import Control.Exception.Safe (catch, MonadCatch) import Control.Exception.Safe (catch, MonadCatch)
...@@ -70,7 +71,8 @@ import Data.Text qualified as T ...@@ -70,7 +71,8 @@ import Data.Text qualified as T
import Gargantext.API.Ngrams.Tools (getTermsWith) import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types (NgramsTerm) import Gargantext.API.Ngrams.Types (NgramsTerm)
import Gargantext.Core (Lang(..), withDefaultLanguage, NLPServerConfig) import Gargantext.Core (Lang(..), withDefaultLanguage, NLPServerConfig)
import Gargantext.Core.Config (GargConfig(..), hasConfig) import Gargantext.Core.Config (GargConfig(..), hasConfig, gc_worker)
import Gargantext.Core.Config.Worker (wsNlpConduitChunkSize)
import Gargantext.Core.Config.Types (APIsConfig(..)) import Gargantext.Core.Config.Types (APIsConfig(..))
import Gargantext.Core.Ext.IMTUser (readFile_Annuaire) import Gargantext.Core.Ext.IMTUser (readFile_Annuaire)
import Gargantext.Core.NLP (HasNLPServer, nlpServerGet) import Gargantext.Core.NLP (HasNLPServer, nlpServerGet)
...@@ -98,7 +100,8 @@ import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact ) ...@@ -98,7 +100,8 @@ import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact )
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataAnnuaire, HyperdataCorpus(_hc_lang) ) import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataAnnuaire, HyperdataCorpus(_hc_lang) )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( ToHyperdataDocument(toHyperdataDocument), HyperdataDocument ) import Gargantext.Database.Admin.Types.Hyperdata.Document ( ToHyperdataDocument(toHyperdataDocument), HyperdataDocument )
import Gargantext.Database.Admin.Types.Node hiding (ERROR, DEBUG) -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId) import Gargantext.Database.Admin.Types.Node hiding (ERROR, DEBUG) -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import Gargantext.Database.Prelude import Gargantext.Database.Class ( DBCmdWithEnv, IsDBCmd )
import Gargantext.Database.Transactional ( DBUpdate, runDBTx )
import Gargantext.Database.Query.Table.ContextNodeNgrams2 ( ContextNodeNgrams2Poly(..), insertContextNodeNgrams2 ) import Gargantext.Database.Query.Table.ContextNodeNgrams2 ( ContextNodeNgrams2Poly(..), insertContextNodeNgrams2 )
import Gargantext.Database.Query.Table.Node ( MkCorpus, insertDefaultNodeIfNotExists, getOrMkList, getNodeWith ) import Gargantext.Database.Query.Table.Node ( MkCorpus, insertDefaultNodeIfNotExists, getOrMkList, getNodeWith )
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add) import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add)
...@@ -108,8 +111,8 @@ import Gargantext.Database.Query.Table.NodeContext (selectDocNodesOnlyId) ...@@ -108,8 +111,8 @@ import Gargantext.Database.Query.Table.NodeContext (selectDocNodesOnlyId)
import Gargantext.Database.Query.Table.NodeNgrams (listInsertDb , getCgramsId) import Gargantext.Database.Query.Table.NodeNgrams (listInsertDb , getCgramsId)
import Gargantext.Database.Query.Tree.Root (MkCorpusUser(..), getOrMkRoot, getOrMkRootWithCorpus, userFromMkCorpusUser) import Gargantext.Database.Query.Tree.Root (MkCorpusUser(..), getOrMkRoot, getOrMkRootWithCorpus, userFromMkCorpusUser)
import Gargantext.Database.Schema.Ngrams ( indexNgrams, NgramsId ) import Gargantext.Database.Schema.Ngrams ( indexNgrams, NgramsId )
import Gargantext.Database.Schema.Node import Gargantext.Database.Schema.Node ( NodePoly(_node_id, _node_hash_id), node_hyperdata )
import Gargantext.Database.Types import Gargantext.Database.Types ( Indexed(Indexed) )
import Gargantext.Prelude hiding (catch, onException, to) import Gargantext.Prelude hiding (catch, onException, to)
import Gargantext.System.Logging ( logLocM, LogLevel(DEBUG, ERROR), MonadLogger ) import Gargantext.System.Logging ( logLocM, LogLevel(DEBUG, ERROR), MonadLogger )
import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..), markFailureNoErr ) import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..), markFailureNoErr )
...@@ -296,11 +299,12 @@ flow :: forall env err m a c. ...@@ -296,11 +299,12 @@ flow :: forall env err m a c.
-> m CorpusId -> m CorpusId
flow c mkCorpusUser la mfslw (count, docsC) jobHandle = do flow c mkCorpusUser la mfslw (count, docsC) jobHandle = do
cfg <- view hasConfig cfg <- view hasConfig
let chunkSize = cfg ^. gc_worker . wsNlpConduitChunkSize
(_userId, userCorpusId, listId, msgs) <- runDBTx $ createNodes cfg mkCorpusUser c (_userId, userCorpusId, listId, msgs) <- runDBTx $ createNodes cfg mkCorpusUser c
forM_ msgs ce_notify forM_ msgs ce_notify
-- TODO if public insertMasterDocs else insertUserDocs -- TODO if public insertMasterDocs else insertUserDocs
runConduit (zipSources (yieldMany ([1..] :: [Int])) docsC runConduit (zipSources (yieldMany ([1..] :: [Int])) docsC
.| CList.chunksOf 5 .| CList.chunksOf chunkSize
.| mapM_C (addDocumentsWithProgress userCorpusId) .| mapM_C (addDocumentsWithProgress userCorpusId)
.| sinkNull) `CES.catches` .| sinkNull) `CES.catches`
[ CES.Handler $ \(e :: ClientError) -> do [ CES.Handler $ \(e :: ClientError) -> do
...@@ -544,13 +548,20 @@ extractNgramsFromDocuments :: forall doc env err m. ...@@ -544,13 +548,20 @@ extractNgramsFromDocuments :: forall doc env err m.
-> TermType Lang -> TermType Lang
-> [doc] -> [doc]
-> m (UncommittedNgrams doc) -> m (UncommittedNgrams doc)
extractNgramsFromDocuments nlpServer lang docs = extractNgramsFromDocuments nlpServer lang docs = do
foldlM go mempty docs ret <- AsyncL.mapConcurrently (extractNgramsFromDocument nlpServer lang) docs
where -- sem <- QSemL.newQSem 10
go :: UncommittedNgrams doc -> doc -> m (UncommittedNgrams doc) -- let f = extractNgramsFromDocument nlpServer lang
go !acc inputDoc = do -- ret <- AsyncL.mapConcurrently (\doc ->
ngrams <- extractNgramsFromDocument nlpServer lang inputDoc -- CEL.bracket_ (QSemL.waitQSem sem) (QSemL.signalQSem sem) (f doc)
pure $ acc <> ngrams -- ) docs
pure $ foldl (<>) mempty ret
-- foldlM go mempty docs
-- where
-- go :: UncommittedNgrams doc -> doc -> m (UncommittedNgrams doc)
-- go !acc inputDoc = do
-- ngrams <- extractNgramsFromDocument nlpServer lang inputDoc
-- pure $ acc <> ngrams
commitNgramsForDocuments :: UniqParameters doc commitNgramsForDocuments :: UniqParameters doc
=> UncommittedNgrams doc => UncommittedNgrams doc
......
...@@ -132,7 +132,8 @@ getOccByNgramsOnlyFast_withSample cId int nt ngs = ...@@ -132,7 +132,8 @@ getOccByNgramsOnlyFast_withSample cId int nt ngs =
HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByContextUser_withSample cId int nt ngs HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByContextUser_withSample cId int nt ngs
-- Returns occurrences of ngrams in given corpus/list (for each ngram, a list of contexts is returned) -- | Returns occurrences of ngrams in given corpus/list (for each
-- ngram, a list of contexts is returned)
getOccByNgramsOnlyFast :: CorpusId getOccByNgramsOnlyFast :: CorpusId
-> ListId -> ListId
-> NgramsType -> NgramsType
...@@ -154,34 +155,39 @@ getOccByNgramsOnlyFast cId lId nt = do ...@@ -154,34 +155,39 @@ getOccByNgramsOnlyFast cId lId nt = do
query :: DPS.Query query :: DPS.Query
query = [sql| query = [sql|
WITH cnnv AS WITH nc AS (
( SELECT DISTINCT context_node_ngrams.context_id, SELECT DISTINCT context_id
context_node_ngrams.ngrams_id, FROM nodes_contexts
nodes_contexts.node_id, WHERE node_id = ?
nodes_contexts.category AND category > 0
FROM nodes_contexts ),
JOIN context_node_ngrams ON context_node_ngrams.context_id = nodes_contexts.context_id cnnv AS
( SELECT DISTINCT context_id,
ngrams_id
FROM context_node_ngrams
WHERE context_id IN (SELECT context_id FROM nc)
), ),
node_context_ids AS
(SELECT context_id, ngrams_id, terms
FROM cnnv
JOIN ngrams ON cnnv.ngrams_id = ngrams.id
WHERE node_id = ? AND cnnv.category > 0
),
ncids_agg AS ncids_agg AS
(SELECT ngrams_id, terms, array_agg(DISTINCT context_id) AS agg ( SELECT array_agg(DISTINCT context_id) AS agg,
FROM node_context_ids ngrams_id,
GROUP BY (ngrams_id, terms)), terms
FROM cnnv
JOIN ngrams
ON cnnv.ngrams_id = ngrams.id
GROUP BY (ngrams_id, terms)
),
ns AS ns AS
(SELECT ngrams_id, terms (SELECT ngrams_id, terms
FROM node_stories FROM node_stories
JOIN ngrams ON ngrams_id = ngrams.id JOIN ngrams
WHERE node_id = ? AND ngrams_type_id = ? ON ngrams_id = ngrams.id
WHERE node_id = ? AND ngrams_type_id = ?
) )
SELECT ns.terms, CASE WHEN agg IS NULL THEN '{}' ELSE agg END SELECT ns.terms, CASE WHEN agg IS NULL THEN '{}' ELSE agg END
FROM ns FROM ns
LEFT JOIN ncids_agg ON ns.ngrams_id = ncids_agg.ngrams_id LEFT JOIN ncids_agg
ON ns.ngrams_id = ncids_agg.ngrams_id
|] |]
-- query = [sql| -- query = [sql|
-- WITH node_context_ids AS -- WITH node_context_ids AS
......
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE LambdaCase #-} {-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.System.Logging.Types ( module Gargantext.System.Logging.Types (
LogLevel(..) LogLevel(..)
......
...@@ -99,6 +99,10 @@ default_job_timeout = 60 ...@@ -99,6 +99,10 @@ default_job_timeout = 60
# default timeout for "long" jobs (in seconds) # default timeout for "long" jobs (in seconds)
long_job_timeout = 3000 long_job_timeout = 3000
# Batch size when sending data to NLP.
# Preferably, set as much as the number of CPUs
nlp_conduit_chunk_size = 10
# NOTE This is overridden by Test.Database.Setup # NOTE This is overridden by Test.Database.Setup
[worker.database] [worker.database]
host = "127.0.0.1" host = "127.0.0.1"
......
...@@ -214,22 +214,23 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do ...@@ -214,22 +214,23 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
eRes <- runClientM (get_table_ngrams token cId APINgrams.Terms listId 50 Nothing (Just MapTerm) Nothing Nothing Nothing Nothing) clientEnv eRes <- runClientM (get_table_ngrams token cId APINgrams.Terms listId 50 Nothing (Just MapTerm) Nothing Nothing Nothing Nothing) clientEnv
eRes `shouldSatisfy` isRight eRes `shouldSatisfy` isRight
let (Right res) = eRes let (Right res) = eRes
-- /NOTA BENE/ The count is 1 because the count applies to roots only.
Just res `shouldBe` JSON.decode [json| {"version":5 Just res `shouldBe` JSON.decode [json| {"version":5
,"count":3 ,"count":1
,"data":[ ,"data":[
{"ngrams":"guitar pedals" {"ngrams":"overdrives"
,"size":1 ,"size":1
,"list":"MapTerm" ,"list":"MapTerm"
,"root":"overdrives"
,"parent":"overdrives"
,"occurrences":[] ,"occurrences":[]
,"children":["tube screamers"] ,"children":["guitar pedals"]
}, },
{"ngrams":"overdrives" {"ngrams":"guitar pedals"
,"size":1 ,"size":1
,"list":"MapTerm" ,"list":"MapTerm"
,"root":"overdrives"
,"parent":"overdrives"
,"occurrences":[] ,"occurrences":[]
,"children":["guitar pedals"] ,"children":["tube screamers"]
}, },
{"ngrams":"tube screamers" {"ngrams":"tube screamers"
,"size":1 ,"size":1
...@@ -309,16 +310,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do ...@@ -309,16 +310,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
-- check that new term is parent of old one -- check that new term is parent of old one
checkNgrams getNgrams [json| {"version": 2 checkNgrams getNgrams [json| {"version": 2
,"count":2 ,"count":1
,"data":[ ,"data":[
{"ngrams":"abelian group"
,"size":2
,"list":"MapTerm"
,"root": "new abelian group"
,"parent": "new abelian group"
,"occurrences":[]
,"children":[]
},
{"ngrams":"new abelian group" {"ngrams":"new abelian group"
,"size":1 ,"size":1
,"list":"MapTerm" ,"list":"MapTerm"
...@@ -326,6 +319,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do ...@@ -326,6 +319,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
,"parent":null ,"parent":null
,"occurrences":[] ,"occurrences":[]
,"children":["abelian group"] ,"children":["abelian group"]
},
{"ngrams":"abelian group"
,"size":2
,"list":"MapTerm"
,"root": "new abelian group"
,"parent": "new abelian group"
,"occurrences":[]
,"children":[]
} }
] ]
} }
...@@ -341,16 +342,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do ...@@ -341,16 +342,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
-- In essence, this JSON needs to be exactly the same as the previous one, -- In essence, this JSON needs to be exactly the same as the previous one,
-- i.e. important doesn't change the topology. -- i.e. important doesn't change the topology.
checkNgrams getNgrams [json| {"version": 2 checkNgrams getNgrams [json| {"version": 2
,"count":2 ,"count":1
,"data":[ ,"data":[
{"ngrams":"abelian group"
,"size":2
,"list":"MapTerm"
,"root": "new abelian group"
,"parent": "new abelian group"
,"occurrences":[]
,"children":[]
},
{"ngrams":"new abelian group" {"ngrams":"new abelian group"
,"size":1 ,"size":1
,"list":"MapTerm" ,"list":"MapTerm"
...@@ -358,6 +351,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do ...@@ -358,6 +351,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
,"parent":null ,"parent":null
,"occurrences":[] ,"occurrences":[]
,"children":["abelian group"] ,"children":["abelian group"]
},
{"ngrams":"abelian group"
,"size":2
,"list":"MapTerm"
,"root": "new abelian group"
,"parent": "new abelian group"
,"occurrences":[]
,"children":[]
} }
] ]
} }
...@@ -610,8 +611,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do ...@@ -610,8 +611,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
liftIO $ exportedNgrams `shouldBe` exportedNgrams2 liftIO $ exportedNgrams `shouldBe` exportedNgrams2
-- We test that if we try to import terms which, when merged with the existing, -- We test that if we try to import terms which, when merged with the existing,
-- would cause a loop, GGTX is capable of rejecting the request. -- would cause a loop but GGTX is capable of breaking them, serving the request.
it "refuses to import terms which will lead to a loop" $ \(SpecContext testEnv port app _) -> do it "allows importing terms which will lead to a loop (because it can break them)" $ \(SpecContext testEnv port app _) -> do
cId <- newCorpusForUser testEnv "alice" cId <- newCorpusForUser testEnv "alice"
let log_cfg = (test_config testEnv) ^. gc_logging let log_cfg = (test_config testEnv) ^. gc_logging
withApplication app $ do withApplication app $ do
...@@ -684,14 +685,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do ...@@ -684,14 +685,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
} }
ji <- checkEither $ liftIO $ runClientM (add_form_to_list token listId params) clientEnv ji <- checkEither $ liftIO $ runClientM (add_form_to_list token listId params) clientEnv
ji' <- pollUntilWorkFinished log_cfg port ji ji' <- pollUntilWorkFinished log_cfg port ji
-- Unfortunately we don't have a better way then to match on the stringified exception, sigh.
case _scst_events ji' of case _scst_events ji' of
Just [ScraperEvent{..}] Just [ScraperEvent{..}]
| Just msg <- _scev_message | Just msg <- _scev_message
-> liftIO $ msg `shouldSatisfy` \txt -> "Loop detected in terms: foo -> bar -> foo" `T.isInfixOf` txt , "Loop detected in terms: foo -> bar -> foo" `T.isInfixOf` msg
-> fail (T.unpack msg)
| otherwise | otherwise
-> fail "No suitable message in ScraperEvent." -> pure () -- no loop!
_ -> fail "Expected job to fail, but it didn't" _ -> pure () -- no loop!
createDocsList :: FilePath createDocsList :: FilePath
-> TestEnv -> TestEnv
......
...@@ -13,6 +13,7 @@ Portability : POSIX ...@@ -13,6 +13,7 @@ Portability : POSIX
{-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module Test.Instances where module Test.Instances where
...@@ -52,6 +53,7 @@ import Gargantext.API.Search.Types (SearchQuery(..), SearchResult(..), SearchRes ...@@ -52,6 +53,7 @@ import Gargantext.API.Search.Types (SearchQuery(..), SearchResult(..), SearchRes
import Gargantext.API.Table.Types (TableQuery(..)) import Gargantext.API.Table.Types (TableQuery(..))
import Gargantext.API.Viz.Types (PhyloData) import Gargantext.API.Viz.Types (PhyloData)
import Gargantext.Core (Lang) import Gargantext.Core (Lang)
import Gargantext.Core.NodeStory hiding (buildForest)
import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET
import Gargantext.Core.Notifications.Dispatcher.Types qualified as DET import Gargantext.Core.Notifications.Dispatcher.Types qualified as DET
import Gargantext.Core.Text.Corpus.Query (RawQuery(..)) import Gargantext.Core.Text.Corpus.Query (RawQuery(..))
...@@ -785,16 +787,23 @@ genCorpusWithMatchingElement = do ...@@ -785,16 +787,23 @@ genCorpusWithMatchingElement = do
depth <- choose (1, 5) depth <- choose (1, 5)
let mkEntry = do let mkEntry = do
trm <- arbitrary trm <- arbitrary
el <- over ne_children (breakLoop trm) <$> (resize depth arbitrary) el <- over ne_children (breakLoop trm) . makeItRoot <$> (resize depth arbitrary)
pure (trm, el { _ne_ngrams = trm }) pure (trm, el { _ne_ngrams = trm })
-- Let's build the map first, so that duplicates will be overwritten. -- Let's build the map first, so that duplicates will be overwritten.
fullMap <- (Map.fromList <$> vectorOf depth mkEntry) `suchThat` (\x -> isRight (buildForest x)) -- exclude loops fullMapE <- buildForest (BreakLoop LBA_just_do_it) . Map.fromList <$> vectorOf depth mkEntry
let (hd NE.:| _) = NE.fromList $ Map.elems fullMap case fullMapE of
pure $ AcyclicTableMap fullMap hd Left e -> panicTrace (show e)
Right (destroyForest -> fullMap) -> do
let (hd NE.:| _) = NE.fromList $ map snd fullMap
pure $ AcyclicTableMap (Map.fromList fullMap) hd
where where
breakLoop :: NgramsTerm -> MSet NgramsTerm -> MSet NgramsTerm breakLoop :: NgramsTerm -> MSet NgramsTerm -> MSet NgramsTerm
breakLoop t = mSetFromSet . Set.delete t . mSetToSet breakLoop t = mSetFromSet . Set.delete t . mSetToSet
makeItRoot :: NgramsElement -> NgramsElement
makeItRoot ne = ne & ne_root .~ Nothing
& ne_parent .~ Nothing
instance Arbitrary AcyclicTableMap where instance Arbitrary AcyclicTableMap where
arbitrary = genCorpusWithMatchingElement arbitrary = genCorpusWithMatchingElement
shrink = shrinkTree shrink = shrinkTree
......
...@@ -235,8 +235,8 @@ testFlat05 = do ...@@ -235,8 +235,8 @@ testFlat05 = do
testForestSearchProp :: Property testForestSearchProp :: Property
testForestSearchProp = forAll arbitrary $ \(AcyclicTableMap ngramsTable el) -> do testForestSearchProp = forAll arbitrary $ \(AcyclicTableMap ngramsTable el) -> do
case searchTableNgrams (Versioned 0 ngramsTable) (searchQuery el) of case searchTableNgrams (Versioned 0 ngramsTable) (searchQuery el) of
Left (BFE_loop_detected err) -> fail (T.unpack $ renderLoop err) Left (BFE_loop_detected err) -> fail (T.unpack $ renderLoop err)
Right res -> res ^. vc_data `shouldSatisfy` (elem (_ne_ngrams el) . map _ne_ngrams . getNgramsTable) Right res -> res ^. vc_data `shouldSatisfy` (any (containsTerm (_ne_ngrams el)) . getNgramsTable)
where where
searchQuery term = NgramsSearchQuery { searchQuery term = NgramsSearchQuery {
_nsq_limit = Limit 5 _nsq_limit = Limit 5
...@@ -255,7 +255,9 @@ testSearchNestedTerms :: Assertion ...@@ -255,7 +255,9 @@ testSearchNestedTerms :: Assertion
testSearchNestedTerms = do testSearchNestedTerms = do
case searchTableNgrams (Versioned 0 hierarchicalTableMap) searchQuery of case searchTableNgrams (Versioned 0 hierarchicalTableMap) searchQuery of
Left (BFE_loop_detected err) -> fail (T.unpack $ renderLoop err) Left (BFE_loop_detected err) -> fail (T.unpack $ renderLoop err)
Right res -> res ^. vc_data `shouldSatisfy` (elem "ford" . map _ne_ngrams . getNgramsTable) Right res ->
-- it should appear at the top level or as one of the children.
res ^. vc_data `shouldSatisfy` (any (containsTerm "ford") . getNgramsTable)
where where
searchQuery = NgramsSearchQuery { searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 5 _nsq_limit = Limit 5
...@@ -267,6 +269,11 @@ testSearchNestedTerms = do ...@@ -267,6 +269,11 @@ testSearchNestedTerms = do
, _nsq_searchQuery = mockQueryFn (Just "ford") , _nsq_searchQuery = mockQueryFn (Just "ford")
} }
-- | Returns True if the input 'NgramsElement' contains (either in the root or in the children)
-- the input term.
containsTerm :: NgramsTerm -> NgramsElement -> Bool
containsTerm t (NgramsElement{..}) = _ne_ngrams == t || any ((==) t) (mSetToList _ne_children)
-- Pagination tests -- Pagination tests
test_pagination_allTerms :: Assertion test_pagination_allTerms :: Assertion
...@@ -390,7 +397,7 @@ test_paginationQuantum = do ...@@ -390,7 +397,7 @@ test_paginationQuantum = do
Left err -> fail (show err) Left err -> fail (show err)
Right res -> do Right res -> do
let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res
length elems @?= 10 countRoots elems @?= 10
forM_ elems $ \term -> forM_ elems $ \term ->
assertBool ("found " <> show (_ne_list term) <> " in: " <> show elems) (_ne_list term == MapTerm) assertBool ("found " <> show (_ne_list term) <> " in: " <> show elems) (_ne_list term == MapTerm)
where where
...@@ -404,13 +411,20 @@ test_paginationQuantum = do ...@@ -404,13 +411,20 @@ test_paginationQuantum = do
, _nsq_searchQuery = mockQueryFn Nothing , _nsq_searchQuery = mockQueryFn Nothing
} }
countRoots :: [NgramsElement] -> Int
countRoots [] = 0
countRoots (x:xs) =
if isNothing (_ne_root x) || isNothing (_ne_parent x)
then 1 + countRoots xs
else countRoots xs
test_paginationQuantum_02 :: Assertion test_paginationQuantum_02 :: Assertion
test_paginationQuantum_02 = do test_paginationQuantum_02 = do
case searchTableNgrams quantumComputingCorpus searchQuery of case searchTableNgrams quantumComputingCorpus searchQuery of
Left err -> fail (show err) Left err -> fail (show err)
Right res -> do Right res -> do
let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res
assertBool ("found only " <> show (length elems) <> " in: " <> show elems) (length elems == 10) assertBool ("found only " <> show (length elems) <> " in: " <> show elems) (countRoots elems == 10)
where where
searchQuery = NgramsSearchQuery { searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 10 _nsq_limit = Limit 10
......
...@@ -11,12 +11,15 @@ import Data.Char (isSpace) ...@@ -11,12 +11,15 @@ import Data.Char (isSpace)
import Data.Char qualified as Char import Data.Char qualified as Char
import Data.Map.Strict (Map) import Data.Map.Strict (Map)
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text qualified as T import Data.Text qualified as T
import Data.Tree import Data.Tree
import Gargantext.API.Ngrams import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Types qualified as NT import Gargantext.API.Ngrams.Types qualified as NT
import Gargantext.Core import Gargantext.Core
import Gargantext.Core.NodeStory (LoopBreakAlgorithm(..), OnLoopDetectedStrategy(..))
import Gargantext.Core.NodeStory.Types (VisitedNode(..))
import Gargantext.Core.Text.Terms.Mono (isSep) import Gargantext.Core.Text.Terms.Mono (isSep)
import Gargantext.Core.Text.Terms.WithList import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Types import Gargantext.Core.Types
...@@ -109,6 +112,8 @@ tests = describe "Ngrams" $ do ...@@ -109,6 +112,8 @@ tests = describe "Ngrams" $ do
it "building a complex deep tree works" testBuildNgramsTree_03 it "building a complex deep tree works" testBuildNgramsTree_03
it "pruning a simple tree works" testPruningNgramsForest_01 it "pruning a simple tree works" testPruningNgramsForest_01
it "pruning a complex tree works" testPruningNgramsForest_02 it "pruning a complex tree works" testPruningNgramsForest_02
it "fails on loops if asked to do nothing" testLoopBreaker_01
it "breaks a simple loop (just do it algorithm)" testLoopBreaker_02
prop "destroyForest . buildForest === id" buildDestroyForestRoundtrips prop "destroyForest . buildForest === id" buildDestroyForestRoundtrips
describe "hierarchical grouping" $ do describe "hierarchical grouping" $ do
it "filterNgramsNodes with empty query is identity" testFilterNgramsNodesEmptyQuery it "filterNgramsNodes with empty query is identity" testFilterNgramsNodesEmptyQuery
...@@ -140,7 +145,12 @@ instance Show ASCIIForest where ...@@ -140,7 +145,12 @@ instance Show ASCIIForest where
show (ASCIIForest x) = x show (ASCIIForest x) = x
buildForestOrFail :: Map NgramsTerm NgramsElement -> Forest NgramsElement buildForestOrFail :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForestOrFail mp = case buildForest mp of buildForestOrFail mp = case buildForest FailOnLoop mp of
Left (BFE_loop_detected treeLoop) -> error (T.unpack $ renderLoop treeLoop)
Right x -> x
buildForestOrBreakLoop :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForestOrBreakLoop mp = case buildForest (BreakLoop LBA_just_do_it) mp of
Left (BFE_loop_detected treeLoop) -> error (T.unpack $ renderLoop treeLoop) Left (BFE_loop_detected treeLoop) -> error (T.unpack $ renderLoop treeLoop)
Right x -> x Right x -> x
...@@ -269,7 +279,7 @@ testBuildNgramsTree_03 = ...@@ -269,7 +279,7 @@ testBuildNgramsTree_03 =
-- /PRECONDITION/: The '_ne_ngrams' field always matches the 'NgramsTerm', key of the map. -- /PRECONDITION/: The '_ne_ngrams' field always matches the 'NgramsTerm', key of the map.
buildDestroyForestRoundtrips :: AcyclicTableMap -> Property buildDestroyForestRoundtrips :: AcyclicTableMap -> Property
buildDestroyForestRoundtrips (AcyclicTableMap mp _) = buildDestroyForestRoundtrips (AcyclicTableMap mp _) =
(destroyForest . buildForestOrFail $ mp) === mp (Map.fromList . destroyForest . buildForestOrFail $ mp) === mp
testPruningNgramsForest_01 :: Property testPruningNgramsForest_01 :: Property
testPruningNgramsForest_01 = testPruningNgramsForest_01 =
...@@ -292,3 +302,21 @@ testPruningNgramsForest_02 = ...@@ -292,3 +302,21 @@ testPruningNgramsForest_02 =
`- ford `- ford
|] |]
testLoopBreaker_01 :: Property
testLoopBreaker_01 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_children .~ mSetFromList ["foo"])
]
in (buildForest FailOnLoop t1) === Left (BFE_loop_detected $ Set.fromList [ VN 0 "foo", VN 1 "bar"])
testLoopBreaker_02 :: Property
testLoopBreaker_02 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_children .~ mSetFromList ["foo"])
]
in (pruneForest $ buildForestOrBreakLoop t1) `compareForestVisually` [r|
bar
|
`- foo
|]
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment