Commit 8c1700b1 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Introduce the OnLoopDetectedStrategy and add (failing) test

This commit introduces the `OnLoopDetectedStrategy` type and does a
slight refactoring so that we can "plug in" algorithms to break loops
during our forest search.

It also introduces a test, which at the moment is failing, that we
can actually break a loop.
parent 150350f9
...@@ -449,8 +449,8 @@ matchingNode listType minSize maxSize searchFn (Node inputNode children) = ...@@ -449,8 +449,8 @@ matchingNode listType minSize maxSize searchFn (Node inputNode children) =
-- | Version of 'buildForest' specialised over the 'NgramsElement' as the values of the tree. -- | Version of 'buildForest' specialised over the 'NgramsElement' as the values of the tree.
-- /IMPORTANT/: This functions returns an error in case we found a loop. -- /IMPORTANT/: This functions returns an error in case we found a loop.
buildForest :: Map NgramsTerm NgramsElement -> Either BuildForestError (Forest NgramsElement) buildForest :: OnLoopDetectedStrategy -> Map NgramsTerm NgramsElement -> Either BuildForestError (Forest NgramsElement)
buildForest = fmap (map (fmap snd)) . NodeStory.buildForest buildForest onLoopStrategy = fmap (map (fmap snd)) . NodeStory.buildForest onLoopStrategy
-- | Folds an Ngrams forest back to a table map. -- | Folds an Ngrams forest back to a table map.
-- This function doesn't aggregate information, but merely just recostructs the original -- This function doesn't aggregate information, but merely just recostructs the original
...@@ -482,7 +482,7 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement) ...@@ -482,7 +482,7 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
-> Either BuildForestError (VersionedWithCount NgramsTable) -> Either BuildForestError (VersionedWithCount NgramsTable)
searchTableNgrams versionedTableMap NgramsSearchQuery{..} = searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
let tableMap = versionedTableMap ^. v_data let tableMap = versionedTableMap ^. v_data
in case keepRoots <$> buildForest tableMap of in case keepRoots <$> buildForest (BreakLoop LBA_just_do_it) tableMap of
Left err -> Left err Left err -> Left err
Right fs -> Right fs ->
let forestRoots = filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery $ fs let forestRoots = filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery $ fs
......
...@@ -65,6 +65,8 @@ module Gargantext.Core.NodeStory ...@@ -65,6 +65,8 @@ module Gargantext.Core.NodeStory
, TreeNode , TreeNode
, BuildForestError(..) , BuildForestError(..)
, VisitedNode(..) , VisitedNode(..)
, OnLoopDetectedStrategy(..)
, LoopBreakAlgorithm(..)
, buildForest , buildForest
, pruneForest , pruneForest
) where ) where
...@@ -114,16 +116,35 @@ type ArchiveStateForest = ListZipper (Tree (NgramsTerm, NgramsRepoElement)) ...@@ -114,16 +116,35 @@ type ArchiveStateForest = ListZipper (Tree (NgramsTerm, NgramsRepoElement))
type TreeNode e = (NgramsTerm, e) type TreeNode e = (NgramsTerm, e)
data LoopBreakAlgorithm
= -- | Just break the loop the easiest possible way
LBA_just_do_it
-- | break the loop such that we preserve the longest possible chain of children.
-- (CURRENTLY UNIMPLEMENTED)
| LBA_prefer_longest_children_chain
-- | break the loop such that we preserve the largest occurrences chain (i.e. the score).
-- (CURRENTLY UNIMPLEMENTED)
| LBA_prefer_largest_occurrences_chain
data OnLoopDetectedStrategy
= -- When a loop is detected don't do anything, just fail.
FailOnLoop
| BreakLoop LoopBreakAlgorithm
buildForestsFromArchiveState :: NgramsState' buildForestsFromArchiveState :: NgramsState'
-> Either BuildForestError (Map Ngrams.NgramsType (Forest (TreeNode NgramsRepoElement))) -> Either BuildForestError (Map Ngrams.NgramsType (Forest (TreeNode NgramsRepoElement)))
buildForestsFromArchiveState = traverse buildForest buildForestsFromArchiveState = traverse (buildForest (BreakLoop LBA_just_do_it))
destroyArchiveStateForest :: Map Ngrams.NgramsType (Forest (TreeNode NgramsRepoElement)) -> NgramsState' destroyArchiveStateForest :: Map Ngrams.NgramsType (Forest (TreeNode NgramsRepoElement)) -> NgramsState'
destroyArchiveStateForest = Map.map destroyForest destroyArchiveStateForest = Map.map destroyForest
-- | Builds an ngrams forest from the input ngrams table map. -- | Builds an ngrams forest from the input ngrams table map.
buildForest :: forall e. HasNgramChildren e => Map NgramsTerm e -> Either BuildForestError (Forest (TreeNode e)) buildForest :: forall e. HasNgramChildren e
buildForest mp = unfoldForestM unfoldNode $ Map.toList mp => OnLoopDetectedStrategy
-- ^ A strategy to apply when a loop is found.
-> Map NgramsTerm e
-> Either BuildForestError (Forest (TreeNode e))
buildForest _onLoopStrategy mp = unfoldForestM unfoldNode $ Map.toList mp
where where
unfoldNode :: TreeNode e -> Either BuildForestError (TreeNode e, [TreeNode e]) unfoldNode :: TreeNode e -> Either BuildForestError (TreeNode e, [TreeNode e])
unfoldNode (n, el) = flip evalState (1 :: Int, mempty) . runExceptT $ do unfoldNode (n, el) = flip evalState (1 :: Int, mempty) . runExceptT $ do
......
...@@ -52,6 +52,7 @@ import Gargantext.API.Search.Types (SearchQuery(..), SearchResult(..), SearchRes ...@@ -52,6 +52,7 @@ import Gargantext.API.Search.Types (SearchQuery(..), SearchResult(..), SearchRes
import Gargantext.API.Table.Types (TableQuery(..)) import Gargantext.API.Table.Types (TableQuery(..))
import Gargantext.API.Viz.Types (PhyloData) import Gargantext.API.Viz.Types (PhyloData)
import Gargantext.Core (Lang) import Gargantext.Core (Lang)
import Gargantext.Core.NodeStory hiding (buildForest)
import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET
import Gargantext.Core.Notifications.Dispatcher.Types qualified as DET import Gargantext.Core.Notifications.Dispatcher.Types qualified as DET
import Gargantext.Core.Text.Corpus.Query (RawQuery(..)) import Gargantext.Core.Text.Corpus.Query (RawQuery(..))
...@@ -771,7 +772,7 @@ genCorpusWithMatchingElement = do ...@@ -771,7 +772,7 @@ genCorpusWithMatchingElement = do
el <- over ne_children (breakLoop trm) . makeItRoot <$> (resize depth arbitrary) el <- over ne_children (breakLoop trm) . makeItRoot <$> (resize depth arbitrary)
pure (trm, el { _ne_ngrams = trm }) pure (trm, el { _ne_ngrams = trm })
-- Let's build the map first, so that duplicates will be overwritten. -- Let's build the map first, so that duplicates will be overwritten.
fullMap <- (Map.fromList <$> vectorOf depth mkEntry) `suchThat` (\x -> isRight (buildForest x)) -- exclude loops fullMap <- (Map.fromList <$> vectorOf depth mkEntry) `suchThat` (\x -> isRight (buildForest (BreakLoop LBA_just_do_it) x)) -- exclude loops
let (hd NE.:| _) = NE.fromList $ Map.elems fullMap let (hd NE.:| _) = NE.fromList $ Map.elems fullMap
pure $ AcyclicTableMap fullMap hd pure $ AcyclicTableMap fullMap hd
where where
......
...@@ -11,12 +11,15 @@ import Data.Char (isSpace) ...@@ -11,12 +11,15 @@ import Data.Char (isSpace)
import Data.Char qualified as Char import Data.Char qualified as Char
import Data.Map.Strict (Map) import Data.Map.Strict (Map)
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text qualified as T import Data.Text qualified as T
import Data.Tree import Data.Tree
import Gargantext.API.Ngrams import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Types qualified as NT import Gargantext.API.Ngrams.Types qualified as NT
import Gargantext.Core import Gargantext.Core
import Gargantext.Core.NodeStory (LoopBreakAlgorithm(..), OnLoopDetectedStrategy(..))
import Gargantext.Core.NodeStory.Types (VisitedNode(..))
import Gargantext.Core.Text.Terms.Mono (isSep) import Gargantext.Core.Text.Terms.Mono (isSep)
import Gargantext.Core.Text.Terms.WithList import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Types import Gargantext.Core.Types
...@@ -109,6 +112,8 @@ tests = describe "Ngrams" $ do ...@@ -109,6 +112,8 @@ tests = describe "Ngrams" $ do
it "building a complex deep tree works" testBuildNgramsTree_03 it "building a complex deep tree works" testBuildNgramsTree_03
it "pruning a simple tree works" testPruningNgramsForest_01 it "pruning a simple tree works" testPruningNgramsForest_01
it "pruning a complex tree works" testPruningNgramsForest_02 it "pruning a complex tree works" testPruningNgramsForest_02
it "fails on loops if asked to do nothing" testLoopBreaker_01
it "breaks a simple loop (just do it algorithm)" testLoopBreaker_02
prop "destroyForest . buildForest === id" buildDestroyForestRoundtrips prop "destroyForest . buildForest === id" buildDestroyForestRoundtrips
describe "hierarchical grouping" $ do describe "hierarchical grouping" $ do
it "filterNgramsNodes with empty query is identity" testFilterNgramsNodesEmptyQuery it "filterNgramsNodes with empty query is identity" testFilterNgramsNodesEmptyQuery
...@@ -140,7 +145,12 @@ instance Show ASCIIForest where ...@@ -140,7 +145,12 @@ instance Show ASCIIForest where
show (ASCIIForest x) = x show (ASCIIForest x) = x
buildForestOrFail :: Map NgramsTerm NgramsElement -> Forest NgramsElement buildForestOrFail :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForestOrFail mp = case buildForest mp of buildForestOrFail mp = case buildForest FailOnLoop mp of
Left (BFE_loop_detected treeLoop) -> error (T.unpack $ renderLoop treeLoop)
Right x -> x
buildForestOrBreakLoop :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForestOrBreakLoop mp = case buildForest (BreakLoop LBA_just_do_it) mp of
Left (BFE_loop_detected treeLoop) -> error (T.unpack $ renderLoop treeLoop) Left (BFE_loop_detected treeLoop) -> error (T.unpack $ renderLoop treeLoop)
Right x -> x Right x -> x
...@@ -292,3 +302,21 @@ testPruningNgramsForest_02 = ...@@ -292,3 +302,21 @@ testPruningNgramsForest_02 =
`- ford `- ford
|] |]
testLoopBreaker_01 :: Property
testLoopBreaker_01 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_children .~ mSetFromList ["foo"])
]
in (buildForest FailOnLoop t1) === Left (BFE_loop_detected $ Set.fromList [ VN 0 "foo", VN 1 "bar"])
testLoopBreaker_02 :: Property
testLoopBreaker_02 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_children .~ mSetFromList ["foo"])
]
in (buildForestOrBreakLoop t1) `compareForestVisually` [r|
foo
|
`- bar
|]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment