Commit 63608dd9 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Initial implementation for the 'just_do_it' strategy

parent aef7d677
......@@ -111,6 +111,9 @@ mSetFromSet = MSet . Map.fromSet (const ())
mSetFromList :: Ord a => [a] -> MSet a
mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
mSetDifference :: Ord a => MSet a -> MSet a -> MSet a
mSetDifference (MSet m1) (MSet m2) = MSet (m1 `Map.difference` m2)
-- mSetToSet :: Ord a => MSet a -> Set a
-- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
mSetToSet :: Ord a => MSet a -> Set a
......
......@@ -44,6 +44,7 @@ TODO:
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE LambdaCase #-}
module Gargantext.Core.NodeStory
( module Gargantext.Core.NodeStory.Types
......@@ -71,7 +72,7 @@ module Gargantext.Core.NodeStory
, pruneForest
) where
import Control.Lens ((%~), non, _Just, at, over, Lens', (#))
import Control.Lens ((%~), non, _Just, at, over, Lens', (#), to)
import Data.ListZipper
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
......@@ -87,6 +88,7 @@ import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Admin.Types.Node ( ListId, NodeId(..) )
import Gargantext.Database.Prelude
import Gargantext.Prelude hiding (to)
import qualified Data.List as L
class HasNgramChildren e where
ngramsElementChildren :: Lens' e (MSet NgramsTerm)
......@@ -144,35 +146,95 @@ buildForest :: forall e. HasNgramChildren e
-- ^ A strategy to apply when a loop is found.
-> Map NgramsTerm e
-> Either BuildForestError (Forest (TreeNode e))
buildForest _onLoopStrategy mp = unfoldForestM unfoldNode $ Map.toList mp
buildForest onLoopStrategy mp = unfoldForestM unfoldNode $ Map.toList mp
where
unfoldNode :: TreeNode e -> Either BuildForestError (TreeNode e, [TreeNode e])
unfoldNode (n, el) = flip evalState (1 :: Int, mempty) . runExceptT $ do
let initialChildren = getChildren (mSetToList $ el ^. ngramsElementChildren)
go initialChildren *> pure (mkTreeNode (n, el))
where
-- This function is quite simple: the internal 'State' keeps track of the current
-- position of the visit, and if we discover a term we already seen before, we throw
-- an error, otherwise we store it in the state at the current position and carry on.
go :: [ TreeNode e ] -> ExceptT BuildForestError (State (Int, Set VisitedNode)) ()
go [] = pure ()
go (x:xs) = do
(!pos, !visited) <- get
let nt = fst x
case Set.member (VN pos nt) visited of
True -> throwError $ BFE_loop_detected visited
False -> do
put (pos + 1, Set.insert (VN (pos + 1) nt) visited)
go (getChildren (mSetToList $ snd x ^. ngramsElementChildren) <> xs)
mkTreeNode :: TreeNode e -> (TreeNode e, [TreeNode e])
mkTreeNode (k, el) = ((k, el), mapMaybe findChildren $ mSetToList (el ^. ngramsElementChildren))
findChildren :: NgramsTerm -> Maybe (TreeNode e)
findChildren t = Map.lookup t mp <&> \el -> (t, el)
getChildren :: [NgramsTerm] -> [TreeNode e]
getChildren = mapMaybe (\t -> (t,) <$> Map.lookup t mp)
unfoldNode (n, el) = flip evalState (BuildForestState 1 mempty []) . runExceptT $ do
let initialChildren = getChildren mp (mSetToList $ el ^. ngramsElementChildren)
children <- unfold_node onLoopStrategy mp initialChildren
-- Create the final ngram by setting the children in the root node to be
-- the children computed by unfold_node.
pure ((n, el & ngramsElementChildren .~ (mSetFromList $ map fst children)), children)
getChildren :: Map NgramsTerm e -> [NgramsTerm] -> [TreeNode e]
getChildren mp = mapMaybe (\t -> (t,) <$> Map.lookup t mp)
data BuildForestState e
= BuildForestState
{ _bfs_pos :: !Int
, _bfs_visited :: !(Set VisitedNode)
-- | The children we computed for the target root.
, _bfs_children :: [TreeNode e]
}
-- This function is quite simple: the internal 'State' keeps track of the current
-- position of the visit, and if we discover a term we already seen before, we throw
-- an error, otherwise we store it in the state at the current position and carry on.
unfold_node :: HasNgramChildren e
=> OnLoopDetectedStrategy
-> Map NgramsTerm e
-> [ TreeNode e ]
-> ExceptT BuildForestError (State (BuildForestState e)) [TreeNode e]
unfold_node _ _ [] = L.reverse <$> gets _bfs_children
unfold_node onLoopStrategy mp (x:xs) = do
(BuildForestState !pos !visited !children_so_far) <- get
let nt = fst x
case Set.member (VN pos nt) visited of
True -> case onLoopStrategy of
FailOnLoop -> throwError $ BFE_loop_detected visited
BreakLoop algo -> breakLoopByAlgo mp x xs algo
False -> do
put (BuildForestState (pos + 1) (Set.insert (VN (pos + 1) nt) visited) (x : children_so_far))
unfold_node onLoopStrategy mp (getChildren mp (mSetToList $ snd x ^. ngramsElementChildren) <> xs)
breakLoopByAlgo :: HasNgramChildren e
=> Map NgramsTerm e
-> TreeNode e
-> [TreeNode e]
-> LoopBreakAlgorithm
-> ExceptT BuildForestError (State (BuildForestState e)) [TreeNode e]
breakLoopByAlgo mp x xs = \case
LBA_just_do_it -> justDoItLoopBreaker mp x xs
LBA_prefer_longest_children_chain -> preferLongestChildrenLoopBreaker mp x xs
LBA_prefer_largest_occurrences_chain -> preferLargestOccurrencesLoopBreaker mp x xs
justDoItLoopBreaker :: HasNgramChildren e
=> Map NgramsTerm e
-> TreeNode e
-> [ TreeNode e ]
-> ExceptT BuildForestError (State (BuildForestState e)) [TreeNode e]
justDoItLoopBreaker mp (nt, el) xs = do
(BuildForestState !pos !visited !children_so_far) <- get
-- We need to find the edges which are loopy and remove them
let loopyEdges = findLoopyEdges el visited
let el' = el & over ngramsElementChildren (\mchildren -> mchildren `mSetDifference` loopyEdges)
let x' = (nt, el')
put (BuildForestState pos visited (x' : children_so_far))
unfold_node (BreakLoop LBA_just_do_it) mp (getChildren mp (mSetToList $ snd x' ^. ngramsElementChildren) <> xs)
findLoopyEdges :: HasNgramChildren e => e -> Set VisitedNode -> MSet NgramsTerm
findLoopyEdges e vns = mSetFromSet $
(e ^. ngramsElementChildren . to mSetToSet) `Set.intersection` allVisitedNgramsTerms vns
-- FIXME(adinapoli) At the moment this is unimplemented, just an alias for the simplest version.
preferLongestChildrenLoopBreaker :: HasNgramChildren e
=> Map NgramsTerm e
-> TreeNode e
-> [ TreeNode e ]
-> ExceptT BuildForestError (State (BuildForestState e)) [TreeNode e]
preferLongestChildrenLoopBreaker mp x = justDoItLoopBreaker mp x
-- FIXME(adinapoli) At the moment this is unimplemented, just an alias for the simplest version.
preferLargestOccurrencesLoopBreaker :: HasNgramChildren e
=> Map NgramsTerm e
-> TreeNode e
-> [ TreeNode e ]
-> ExceptT BuildForestError (State (BuildForestState e)) [TreeNode e]
preferLargestOccurrencesLoopBreaker mp x = justDoItLoopBreaker mp x
-- | Folds an Ngrams forest back to a table map.
-- This function doesn't aggregate information, but merely just recostructs the original
......
......@@ -42,6 +42,7 @@ module Gargantext.Core.NodeStory.Types
, ArchiveState
, ArchiveStateSet
, ArchiveStateList
, allVisitedNgramsTerms
-- * Errors
, HasNodeStoryError(..)
......@@ -217,6 +218,9 @@ data VisitedNode =
VN { _vn_position :: !Int, _vn_term :: !NgramsTerm }
deriving (Show)
allVisitedNgramsTerms :: Set VisitedNode -> Set NgramsTerm
allVisitedNgramsTerms = Set.map _vn_term
-- /NOTA BENE/: It's important to use this custom instance for the loop detector
-- to work correctly. If we stop comparing on the terms the loop detector .. will loop.
instance Eq VisitedNode where
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment