Commit 9c5b373c authored by Fabien Maniere's avatar Fabien Maniere

Merge branch 'adinapoli/issue-513-v2' into 'dev'

Break loops in Ngrams graphs

Closes #513

See merge request !453
parents b3892b96 3735bef1
Pipeline #7951 passed with stages
in 59 minutes and 58 seconds
...@@ -449,8 +449,8 @@ matchingNode listType minSize maxSize searchFn (Node inputNode children) = ...@@ -449,8 +449,8 @@ matchingNode listType minSize maxSize searchFn (Node inputNode children) =
-- | Version of 'buildForest' specialised over the 'NgramsElement' as the values of the tree. -- | Version of 'buildForest' specialised over the 'NgramsElement' as the values of the tree.
-- /IMPORTANT/: This functions returns an error in case we found a loop. -- /IMPORTANT/: This functions returns an error in case we found a loop.
buildForest :: Map NgramsTerm NgramsElement -> Either BuildForestError (Forest NgramsElement) buildForest :: OnLoopDetectedStrategy -> Map NgramsTerm NgramsElement -> Either BuildForestError (Forest NgramsElement)
buildForest = fmap (map (fmap snd)) . NodeStory.buildForest buildForest onLoopStrategy = fmap (map (fmap snd)) . NodeStory.buildForest onLoopStrategy
-- | Folds an Ngrams forest back to a table map. -- | Folds an Ngrams forest back to a table map.
-- This function doesn't aggregate information, but merely just recostructs the original -- This function doesn't aggregate information, but merely just recostructs the original
...@@ -482,7 +482,7 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement) ...@@ -482,7 +482,7 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
-> Either BuildForestError (VersionedWithCount NgramsTable) -> Either BuildForestError (VersionedWithCount NgramsTable)
searchTableNgrams versionedTableMap NgramsSearchQuery{..} = searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
let tableMap = versionedTableMap ^. v_data let tableMap = versionedTableMap ^. v_data
in case keepRoots <$> buildForest tableMap of in case keepRoots <$> buildForest (BreakLoop LBA_just_do_it) tableMap of
Left err -> Left err Left err -> Left err
Right fs -> Right fs ->
let forestRoots = filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery $ fs let forestRoots = filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery $ fs
......
...@@ -111,6 +111,9 @@ mSetFromSet = MSet . Map.fromSet (const ()) ...@@ -111,6 +111,9 @@ mSetFromSet = MSet . Map.fromSet (const ())
mSetFromList :: Ord a => [a] -> MSet a mSetFromList :: Ord a => [a] -> MSet a
mSetFromList = MSet . Map.fromList . map (\x -> (x, ())) mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
mSetDifference :: Ord a => MSet a -> MSet a -> MSet a
mSetDifference (MSet m1) (MSet m2) = MSet (m1 `Map.difference` m2)
-- mSetToSet :: Ord a => MSet a -> Set a -- mSetToSet :: Ord a => MSet a -> Set a
-- mSetToSet (MSet a) = Set.fromList ( Map.keys a) -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
mSetToSet :: Ord a => MSet a -> Set a mSetToSet :: Ord a => MSet a -> Set a
......
This diff is collapsed.
...@@ -42,6 +42,7 @@ module Gargantext.Core.NodeStory.Types ...@@ -42,6 +42,7 @@ module Gargantext.Core.NodeStory.Types
, ArchiveState , ArchiveState
, ArchiveStateSet , ArchiveStateSet
, ArchiveStateList , ArchiveStateList
, allVisitedNgramsTerms
-- * Errors -- * Errors
, HasNodeStoryError(..) , HasNodeStoryError(..)
...@@ -197,7 +198,7 @@ data BuildForestError ...@@ -197,7 +198,7 @@ data BuildForestError
= -- We found a loop, something that shouldn't normally happen if the calling = -- We found a loop, something that shouldn't normally happen if the calling
-- code is correct by construction, but if that does happen, the value will -- code is correct by construction, but if that does happen, the value will
-- contain the full path to the cycle. -- contain the full path to the cycle.
BFE_loop_detected !(Set VisitedNode) BFE_loop_detected !(Set VisitedNode)
deriving (Show, Eq) deriving (Show, Eq)
instance ToHumanFriendlyError BuildForestError where instance ToHumanFriendlyError BuildForestError where
...@@ -217,6 +218,9 @@ data VisitedNode = ...@@ -217,6 +218,9 @@ data VisitedNode =
VN { _vn_position :: !Int, _vn_term :: !NgramsTerm } VN { _vn_position :: !Int, _vn_term :: !NgramsTerm }
deriving (Show) deriving (Show)
allVisitedNgramsTerms :: Set VisitedNode -> Set NgramsTerm
allVisitedNgramsTerms = Set.map _vn_term
-- /NOTA BENE/: It's important to use this custom instance for the loop detector -- /NOTA BENE/: It's important to use this custom instance for the loop detector
-- to work correctly. If we stop comparing on the terms the loop detector .. will loop. -- to work correctly. If we stop comparing on the terms the loop detector .. will loop.
instance Eq VisitedNode where instance Eq VisitedNode where
......
...@@ -611,8 +611,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do ...@@ -611,8 +611,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
liftIO $ exportedNgrams `shouldBe` exportedNgrams2 liftIO $ exportedNgrams `shouldBe` exportedNgrams2
-- We test that if we try to import terms which, when merged with the existing, -- We test that if we try to import terms which, when merged with the existing,
-- would cause a loop, GGTX is capable of rejecting the request. -- would cause a loop but GGTX is capable of breaking them, serving the request.
it "refuses to import terms which will lead to a loop" $ \(SpecContext testEnv port app _) -> do it "allows importing terms which will lead to a loop (because it can break them)" $ \(SpecContext testEnv port app _) -> do
cId <- newCorpusForUser testEnv "alice" cId <- newCorpusForUser testEnv "alice"
let log_cfg = (test_config testEnv) ^. gc_logging let log_cfg = (test_config testEnv) ^. gc_logging
withApplication app $ do withApplication app $ do
...@@ -685,14 +685,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do ...@@ -685,14 +685,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
} }
ji <- checkEither $ liftIO $ runClientM (add_form_to_list token listId params) clientEnv ji <- checkEither $ liftIO $ runClientM (add_form_to_list token listId params) clientEnv
ji' <- pollUntilWorkFinished log_cfg port ji ji' <- pollUntilWorkFinished log_cfg port ji
-- Unfortunately we don't have a better way then to match on the stringified exception, sigh.
case _scst_events ji' of case _scst_events ji' of
Just [ScraperEvent{..}] Just [ScraperEvent{..}]
| Just msg <- _scev_message | Just msg <- _scev_message
-> liftIO $ msg `shouldSatisfy` \txt -> "Loop detected in terms: foo -> bar -> foo" `T.isInfixOf` txt , "Loop detected in terms: foo -> bar -> foo" `T.isInfixOf` msg
-> fail (T.unpack msg)
| otherwise | otherwise
-> fail "No suitable message in ScraperEvent." -> pure () -- no loop!
_ -> fail "Expected job to fail, but it didn't" _ -> pure () -- no loop!
createDocsList :: FilePath createDocsList :: FilePath
-> TestEnv -> TestEnv
......
...@@ -13,6 +13,7 @@ Portability : POSIX ...@@ -13,6 +13,7 @@ Portability : POSIX
{-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module Test.Instances where module Test.Instances where
...@@ -52,6 +53,7 @@ import Gargantext.API.Search.Types (SearchQuery(..), SearchResult(..), SearchRes ...@@ -52,6 +53,7 @@ import Gargantext.API.Search.Types (SearchQuery(..), SearchResult(..), SearchRes
import Gargantext.API.Table.Types (TableQuery(..)) import Gargantext.API.Table.Types (TableQuery(..))
import Gargantext.API.Viz.Types (PhyloData) import Gargantext.API.Viz.Types (PhyloData)
import Gargantext.Core (Lang) import Gargantext.Core (Lang)
import Gargantext.Core.NodeStory hiding (buildForest)
import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET
import Gargantext.Core.Notifications.Dispatcher.Types qualified as DET import Gargantext.Core.Notifications.Dispatcher.Types qualified as DET
import Gargantext.Core.Text.Corpus.Query (RawQuery(..)) import Gargantext.Core.Text.Corpus.Query (RawQuery(..))
...@@ -771,9 +773,12 @@ genCorpusWithMatchingElement = do ...@@ -771,9 +773,12 @@ genCorpusWithMatchingElement = do
el <- over ne_children (breakLoop trm) . makeItRoot <$> (resize depth arbitrary) el <- over ne_children (breakLoop trm) . makeItRoot <$> (resize depth arbitrary)
pure (trm, el { _ne_ngrams = trm }) pure (trm, el { _ne_ngrams = trm })
-- Let's build the map first, so that duplicates will be overwritten. -- Let's build the map first, so that duplicates will be overwritten.
fullMap <- (Map.fromList <$> vectorOf depth mkEntry) `suchThat` (\x -> isRight (buildForest x)) -- exclude loops fullMapE <- buildForest (BreakLoop LBA_just_do_it) . Map.fromList <$> vectorOf depth mkEntry
let (hd NE.:| _) = NE.fromList $ Map.elems fullMap case fullMapE of
pure $ AcyclicTableMap fullMap hd Left e -> panicTrace (show e)
Right (destroyForest -> fullMap) -> do
let (hd NE.:| _) = NE.fromList $ map snd fullMap
pure $ AcyclicTableMap (Map.fromList fullMap) hd
where where
breakLoop :: NgramsTerm -> MSet NgramsTerm -> MSet NgramsTerm breakLoop :: NgramsTerm -> MSet NgramsTerm -> MSet NgramsTerm
breakLoop t = mSetFromSet . Set.delete t . mSetToSet breakLoop t = mSetFromSet . Set.delete t . mSetToSet
......
...@@ -235,7 +235,7 @@ testFlat05 = do ...@@ -235,7 +235,7 @@ testFlat05 = do
testForestSearchProp :: Property testForestSearchProp :: Property
testForestSearchProp = forAll arbitrary $ \(AcyclicTableMap ngramsTable el) -> do testForestSearchProp = forAll arbitrary $ \(AcyclicTableMap ngramsTable el) -> do
case searchTableNgrams (Versioned 0 ngramsTable) (searchQuery el) of case searchTableNgrams (Versioned 0 ngramsTable) (searchQuery el) of
Left (BFE_loop_detected err) -> fail (T.unpack $ renderLoop err) Left (BFE_loop_detected err) -> fail (T.unpack $ renderLoop err)
Right res -> res ^. vc_data `shouldSatisfy` (any (containsTerm (_ne_ngrams el)) . getNgramsTable) Right res -> res ^. vc_data `shouldSatisfy` (any (containsTerm (_ne_ngrams el)) . getNgramsTable)
where where
searchQuery term = NgramsSearchQuery { searchQuery term = NgramsSearchQuery {
......
...@@ -11,12 +11,15 @@ import Data.Char (isSpace) ...@@ -11,12 +11,15 @@ import Data.Char (isSpace)
import Data.Char qualified as Char import Data.Char qualified as Char
import Data.Map.Strict (Map) import Data.Map.Strict (Map)
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text qualified as T import Data.Text qualified as T
import Data.Tree import Data.Tree
import Gargantext.API.Ngrams import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Types qualified as NT import Gargantext.API.Ngrams.Types qualified as NT
import Gargantext.Core import Gargantext.Core
import Gargantext.Core.NodeStory (LoopBreakAlgorithm(..), OnLoopDetectedStrategy(..))
import Gargantext.Core.NodeStory.Types (VisitedNode(..))
import Gargantext.Core.Text.Terms.Mono (isSep) import Gargantext.Core.Text.Terms.Mono (isSep)
import Gargantext.Core.Text.Terms.WithList import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Types import Gargantext.Core.Types
...@@ -109,6 +112,8 @@ tests = describe "Ngrams" $ do ...@@ -109,6 +112,8 @@ tests = describe "Ngrams" $ do
it "building a complex deep tree works" testBuildNgramsTree_03 it "building a complex deep tree works" testBuildNgramsTree_03
it "pruning a simple tree works" testPruningNgramsForest_01 it "pruning a simple tree works" testPruningNgramsForest_01
it "pruning a complex tree works" testPruningNgramsForest_02 it "pruning a complex tree works" testPruningNgramsForest_02
it "fails on loops if asked to do nothing" testLoopBreaker_01
it "breaks a simple loop (just do it algorithm)" testLoopBreaker_02
prop "destroyForest . buildForest === id" buildDestroyForestRoundtrips prop "destroyForest . buildForest === id" buildDestroyForestRoundtrips
describe "hierarchical grouping" $ do describe "hierarchical grouping" $ do
it "filterNgramsNodes with empty query is identity" testFilterNgramsNodesEmptyQuery it "filterNgramsNodes with empty query is identity" testFilterNgramsNodesEmptyQuery
...@@ -140,7 +145,12 @@ instance Show ASCIIForest where ...@@ -140,7 +145,12 @@ instance Show ASCIIForest where
show (ASCIIForest x) = x show (ASCIIForest x) = x
buildForestOrFail :: Map NgramsTerm NgramsElement -> Forest NgramsElement buildForestOrFail :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForestOrFail mp = case buildForest mp of buildForestOrFail mp = case buildForest FailOnLoop mp of
Left (BFE_loop_detected treeLoop) -> error (T.unpack $ renderLoop treeLoop)
Right x -> x
buildForestOrBreakLoop :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForestOrBreakLoop mp = case buildForest (BreakLoop LBA_just_do_it) mp of
Left (BFE_loop_detected treeLoop) -> error (T.unpack $ renderLoop treeLoop) Left (BFE_loop_detected treeLoop) -> error (T.unpack $ renderLoop treeLoop)
Right x -> x Right x -> x
...@@ -292,3 +302,21 @@ testPruningNgramsForest_02 = ...@@ -292,3 +302,21 @@ testPruningNgramsForest_02 =
`- ford `- ford
|] |]
testLoopBreaker_01 :: Property
testLoopBreaker_01 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_children .~ mSetFromList ["foo"])
]
in (buildForest FailOnLoop t1) === Left (BFE_loop_detected $ Set.fromList [ VN 0 "foo", VN 1 "bar"])
testLoopBreaker_02 :: Property
testLoopBreaker_02 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_children .~ mSetFromList ["foo"])
]
in (pruneForest $ buildForestOrBreakLoop t1) `compareForestVisually` [r|
bar
|
`- foo
|]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment