Commit 9c5b373c authored by Fabien Maniere's avatar Fabien Maniere

Merge branch 'adinapoli/issue-513-v2' into 'dev'

Break loops in Ngrams graphs

Closes #513

See merge request !453
parents b3892b96 3735bef1
Pipeline #7951 passed with stages
in 59 minutes and 58 seconds
......@@ -449,8 +449,8 @@ matchingNode listType minSize maxSize searchFn (Node inputNode children) =
-- | Version of 'buildForest' specialised over the 'NgramsElement' as the values of the tree.
-- /IMPORTANT/: This functions returns an error in case we found a loop.
buildForest :: Map NgramsTerm NgramsElement -> Either BuildForestError (Forest NgramsElement)
buildForest = fmap (map (fmap snd)) . NodeStory.buildForest
buildForest :: OnLoopDetectedStrategy -> Map NgramsTerm NgramsElement -> Either BuildForestError (Forest NgramsElement)
buildForest onLoopStrategy = fmap (map (fmap snd)) . NodeStory.buildForest onLoopStrategy
-- | Folds an Ngrams forest back to a table map.
-- This function doesn't aggregate information, but merely just recostructs the original
......@@ -482,7 +482,7 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
-> Either BuildForestError (VersionedWithCount NgramsTable)
searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
let tableMap = versionedTableMap ^. v_data
in case keepRoots <$> buildForest tableMap of
in case keepRoots <$> buildForest (BreakLoop LBA_just_do_it) tableMap of
Left err -> Left err
Right fs ->
let forestRoots = filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery $ fs
......
......@@ -111,6 +111,9 @@ mSetFromSet = MSet . Map.fromSet (const ())
mSetFromList :: Ord a => [a] -> MSet a
mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
mSetDifference :: Ord a => MSet a -> MSet a -> MSet a
mSetDifference (MSet m1) (MSet m2) = MSet (m1 `Map.difference` m2)
-- mSetToSet :: Ord a => MSet a -> Set a
-- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
mSetToSet :: Ord a => MSet a -> Set a
......
This diff is collapsed.
......@@ -42,6 +42,7 @@ module Gargantext.Core.NodeStory.Types
, ArchiveState
, ArchiveStateSet
, ArchiveStateList
, allVisitedNgramsTerms
-- * Errors
, HasNodeStoryError(..)
......@@ -217,6 +218,9 @@ data VisitedNode =
VN { _vn_position :: !Int, _vn_term :: !NgramsTerm }
deriving (Show)
allVisitedNgramsTerms :: Set VisitedNode -> Set NgramsTerm
allVisitedNgramsTerms = Set.map _vn_term
-- /NOTA BENE/: It's important to use this custom instance for the loop detector
-- to work correctly. If we stop comparing on the terms the loop detector .. will loop.
instance Eq VisitedNode where
......
......@@ -611,8 +611,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
liftIO $ exportedNgrams `shouldBe` exportedNgrams2
-- We test that if we try to import terms which, when merged with the existing,
-- would cause a loop, GGTX is capable of rejecting the request.
it "refuses to import terms which will lead to a loop" $ \(SpecContext testEnv port app _) -> do
-- would cause a loop but GGTX is capable of breaking them, serving the request.
it "allows importing terms which will lead to a loop (because it can break them)" $ \(SpecContext testEnv port app _) -> do
cId <- newCorpusForUser testEnv "alice"
let log_cfg = (test_config testEnv) ^. gc_logging
withApplication app $ do
......@@ -685,14 +685,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
}
ji <- checkEither $ liftIO $ runClientM (add_form_to_list token listId params) clientEnv
ji' <- pollUntilWorkFinished log_cfg port ji
-- Unfortunately we don't have a better way then to match on the stringified exception, sigh.
case _scst_events ji' of
Just [ScraperEvent{..}]
| Just msg <- _scev_message
-> liftIO $ msg `shouldSatisfy` \txt -> "Loop detected in terms: foo -> bar -> foo" `T.isInfixOf` txt
, "Loop detected in terms: foo -> bar -> foo" `T.isInfixOf` msg
-> fail (T.unpack msg)
| otherwise
-> fail "No suitable message in ScraperEvent."
_ -> fail "Expected job to fail, but it didn't"
-> pure () -- no loop!
_ -> pure () -- no loop!
createDocsList :: FilePath
-> TestEnv
......
......@@ -13,6 +13,7 @@ Portability : POSIX
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module Test.Instances where
......@@ -52,6 +53,7 @@ import Gargantext.API.Search.Types (SearchQuery(..), SearchResult(..), SearchRes
import Gargantext.API.Table.Types (TableQuery(..))
import Gargantext.API.Viz.Types (PhyloData)
import Gargantext.Core (Lang)
import Gargantext.Core.NodeStory hiding (buildForest)
import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET
import Gargantext.Core.Notifications.Dispatcher.Types qualified as DET
import Gargantext.Core.Text.Corpus.Query (RawQuery(..))
......@@ -771,9 +773,12 @@ genCorpusWithMatchingElement = do
el <- over ne_children (breakLoop trm) . makeItRoot <$> (resize depth arbitrary)
pure (trm, el { _ne_ngrams = trm })
-- Let's build the map first, so that duplicates will be overwritten.
fullMap <- (Map.fromList <$> vectorOf depth mkEntry) `suchThat` (\x -> isRight (buildForest x)) -- exclude loops
let (hd NE.:| _) = NE.fromList $ Map.elems fullMap
pure $ AcyclicTableMap fullMap hd
fullMapE <- buildForest (BreakLoop LBA_just_do_it) . Map.fromList <$> vectorOf depth mkEntry
case fullMapE of
Left e -> panicTrace (show e)
Right (destroyForest -> fullMap) -> do
let (hd NE.:| _) = NE.fromList $ map snd fullMap
pure $ AcyclicTableMap (Map.fromList fullMap) hd
where
breakLoop :: NgramsTerm -> MSet NgramsTerm -> MSet NgramsTerm
breakLoop t = mSetFromSet . Set.delete t . mSetToSet
......
......@@ -11,12 +11,15 @@ import Data.Char (isSpace)
import Data.Char qualified as Char
import Data.Map.Strict (Map)
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text qualified as T
import Data.Tree
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Types qualified as NT
import Gargantext.Core
import Gargantext.Core.NodeStory (LoopBreakAlgorithm(..), OnLoopDetectedStrategy(..))
import Gargantext.Core.NodeStory.Types (VisitedNode(..))
import Gargantext.Core.Text.Terms.Mono (isSep)
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Types
......@@ -109,6 +112,8 @@ tests = describe "Ngrams" $ do
it "building a complex deep tree works" testBuildNgramsTree_03
it "pruning a simple tree works" testPruningNgramsForest_01
it "pruning a complex tree works" testPruningNgramsForest_02
it "fails on loops if asked to do nothing" testLoopBreaker_01
it "breaks a simple loop (just do it algorithm)" testLoopBreaker_02
prop "destroyForest . buildForest === id" buildDestroyForestRoundtrips
describe "hierarchical grouping" $ do
it "filterNgramsNodes with empty query is identity" testFilterNgramsNodesEmptyQuery
......@@ -140,7 +145,12 @@ instance Show ASCIIForest where
show (ASCIIForest x) = x
buildForestOrFail :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForestOrFail mp = case buildForest mp of
buildForestOrFail mp = case buildForest FailOnLoop mp of
Left (BFE_loop_detected treeLoop) -> error (T.unpack $ renderLoop treeLoop)
Right x -> x
buildForestOrBreakLoop :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForestOrBreakLoop mp = case buildForest (BreakLoop LBA_just_do_it) mp of
Left (BFE_loop_detected treeLoop) -> error (T.unpack $ renderLoop treeLoop)
Right x -> x
......@@ -292,3 +302,21 @@ testPruningNgramsForest_02 =
`- ford
|]
testLoopBreaker_01 :: Property
testLoopBreaker_01 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_children .~ mSetFromList ["foo"])
]
in (buildForest FailOnLoop t1) === Left (BFE_loop_detected $ Set.fromList [ VN 0 "foo", VN 1 "bar"])
testLoopBreaker_02 :: Property
testLoopBreaker_02 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_children .~ mSetFromList ["foo"])
]
in (pruneForest $ buildForestOrBreakLoop t1) `compareForestVisually` [r|
bar
|
`- foo
|]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment