Commit 8c1700b1 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Introduce the OnLoopDetectedStrategy and add (failing) test

This commit introduces the `OnLoopDetectedStrategy` type and does a
slight refactoring so that we can "plug in" algorithms to break loops
during our forest search.

It also introduces a test, which at the moment is failing, that we
can actually break a loop.
parent 150350f9
......@@ -449,8 +449,8 @@ matchingNode listType minSize maxSize searchFn (Node inputNode children) =
-- | Version of 'buildForest' specialised over the 'NgramsElement' as the values of the tree.
-- /IMPORTANT/: This functions returns an error in case we found a loop.
buildForest :: Map NgramsTerm NgramsElement -> Either BuildForestError (Forest NgramsElement)
buildForest = fmap (map (fmap snd)) . NodeStory.buildForest
buildForest :: OnLoopDetectedStrategy -> Map NgramsTerm NgramsElement -> Either BuildForestError (Forest NgramsElement)
buildForest onLoopStrategy = fmap (map (fmap snd)) . NodeStory.buildForest onLoopStrategy
-- | Folds an Ngrams forest back to a table map.
-- This function doesn't aggregate information, but merely just recostructs the original
......@@ -482,7 +482,7 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
-> Either BuildForestError (VersionedWithCount NgramsTable)
searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
let tableMap = versionedTableMap ^. v_data
in case keepRoots <$> buildForest tableMap of
in case keepRoots <$> buildForest (BreakLoop LBA_just_do_it) tableMap of
Left err -> Left err
Right fs ->
let forestRoots = filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery $ fs
......
......@@ -65,6 +65,8 @@ module Gargantext.Core.NodeStory
, TreeNode
, BuildForestError(..)
, VisitedNode(..)
, OnLoopDetectedStrategy(..)
, LoopBreakAlgorithm(..)
, buildForest
, pruneForest
) where
......@@ -114,16 +116,35 @@ type ArchiveStateForest = ListZipper (Tree (NgramsTerm, NgramsRepoElement))
type TreeNode e = (NgramsTerm, e)
data LoopBreakAlgorithm
= -- | Just break the loop the easiest possible way
LBA_just_do_it
-- | break the loop such that we preserve the longest possible chain of children.
-- (CURRENTLY UNIMPLEMENTED)
| LBA_prefer_longest_children_chain
-- | break the loop such that we preserve the largest occurrences chain (i.e. the score).
-- (CURRENTLY UNIMPLEMENTED)
| LBA_prefer_largest_occurrences_chain
data OnLoopDetectedStrategy
= -- When a loop is detected don't do anything, just fail.
FailOnLoop
| BreakLoop LoopBreakAlgorithm
buildForestsFromArchiveState :: NgramsState'
-> Either BuildForestError (Map Ngrams.NgramsType (Forest (TreeNode NgramsRepoElement)))
buildForestsFromArchiveState = traverse buildForest
buildForestsFromArchiveState = traverse (buildForest (BreakLoop LBA_just_do_it))
destroyArchiveStateForest :: Map Ngrams.NgramsType (Forest (TreeNode NgramsRepoElement)) -> NgramsState'
destroyArchiveStateForest = Map.map destroyForest
-- | Builds an ngrams forest from the input ngrams table map.
buildForest :: forall e. HasNgramChildren e => Map NgramsTerm e -> Either BuildForestError (Forest (TreeNode e))
buildForest mp = unfoldForestM unfoldNode $ Map.toList mp
buildForest :: forall e. HasNgramChildren e
=> OnLoopDetectedStrategy
-- ^ A strategy to apply when a loop is found.
-> Map NgramsTerm e
-> Either BuildForestError (Forest (TreeNode e))
buildForest _onLoopStrategy mp = unfoldForestM unfoldNode $ Map.toList mp
where
unfoldNode :: TreeNode e -> Either BuildForestError (TreeNode e, [TreeNode e])
unfoldNode (n, el) = flip evalState (1 :: Int, mempty) . runExceptT $ do
......
......@@ -52,6 +52,7 @@ import Gargantext.API.Search.Types (SearchQuery(..), SearchResult(..), SearchRes
import Gargantext.API.Table.Types (TableQuery(..))
import Gargantext.API.Viz.Types (PhyloData)
import Gargantext.Core (Lang)
import Gargantext.Core.NodeStory hiding (buildForest)
import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET
import Gargantext.Core.Notifications.Dispatcher.Types qualified as DET
import Gargantext.Core.Text.Corpus.Query (RawQuery(..))
......@@ -771,7 +772,7 @@ genCorpusWithMatchingElement = do
el <- over ne_children (breakLoop trm) . makeItRoot <$> (resize depth arbitrary)
pure (trm, el { _ne_ngrams = trm })
-- Let's build the map first, so that duplicates will be overwritten.
fullMap <- (Map.fromList <$> vectorOf depth mkEntry) `suchThat` (\x -> isRight (buildForest x)) -- exclude loops
fullMap <- (Map.fromList <$> vectorOf depth mkEntry) `suchThat` (\x -> isRight (buildForest (BreakLoop LBA_just_do_it) x)) -- exclude loops
let (hd NE.:| _) = NE.fromList $ Map.elems fullMap
pure $ AcyclicTableMap fullMap hd
where
......
......@@ -11,12 +11,15 @@ import Data.Char (isSpace)
import Data.Char qualified as Char
import Data.Map.Strict (Map)
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text qualified as T
import Data.Tree
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Types qualified as NT
import Gargantext.Core
import Gargantext.Core.NodeStory (LoopBreakAlgorithm(..), OnLoopDetectedStrategy(..))
import Gargantext.Core.NodeStory.Types (VisitedNode(..))
import Gargantext.Core.Text.Terms.Mono (isSep)
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Types
......@@ -109,6 +112,8 @@ tests = describe "Ngrams" $ do
it "building a complex deep tree works" testBuildNgramsTree_03
it "pruning a simple tree works" testPruningNgramsForest_01
it "pruning a complex tree works" testPruningNgramsForest_02
it "fails on loops if asked to do nothing" testLoopBreaker_01
it "breaks a simple loop (just do it algorithm)" testLoopBreaker_02
prop "destroyForest . buildForest === id" buildDestroyForestRoundtrips
describe "hierarchical grouping" $ do
it "filterNgramsNodes with empty query is identity" testFilterNgramsNodesEmptyQuery
......@@ -140,7 +145,12 @@ instance Show ASCIIForest where
show (ASCIIForest x) = x
buildForestOrFail :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForestOrFail mp = case buildForest mp of
buildForestOrFail mp = case buildForest FailOnLoop mp of
Left (BFE_loop_detected treeLoop) -> error (T.unpack $ renderLoop treeLoop)
Right x -> x
buildForestOrBreakLoop :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForestOrBreakLoop mp = case buildForest (BreakLoop LBA_just_do_it) mp of
Left (BFE_loop_detected treeLoop) -> error (T.unpack $ renderLoop treeLoop)
Right x -> x
......@@ -292,3 +302,21 @@ testPruningNgramsForest_02 =
`- ford
|]
testLoopBreaker_01 :: Property
testLoopBreaker_01 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_children .~ mSetFromList ["foo"])
]
in (buildForest FailOnLoop t1) === Left (BFE_loop_detected $ Set.fromList [ VN 0 "foo", VN 1 "bar"])
testLoopBreaker_02 :: Property
testLoopBreaker_02 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_children .~ mSetFromList ["foo"])
]
in (buildForestOrBreakLoop t1) `compareForestVisually` [r|
foo
|
`- bar
|]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment