Commit 9750af2b authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Rework Node story fixers in terms of forest and trees

This commit generalises the API we have seen for Ngrams and
NgramsElement to Ngrams and NgramsRepoElement, making it suitable for
the NodeStory manipulations.

Furthermore, we use zippers to efficiently traverse forests.

This paves the way for a more disciplined way of handling forests of
ngrams.
parent 3c2dab6d
Pipeline #7763 failed with stages
in 25 minutes and 13 seconds
......@@ -570,6 +570,7 @@ library
, json-stream ^>= 0.4.2.4
, lens >= 5.2.2 && < 5.3
, lens-aeson < 1.3
, list-zipper
, massiv < 1.1
, matrix ^>= 0.3.6.1
, mime-mail >= 0.5.1
......
......@@ -89,6 +89,8 @@ module Gargantext.API.Ngrams
, PatchHistory(..)
, newNgramsFromNgramsStatePatch
, filterNgramsNodes
-- * Operations on a forest
, buildForest
, destroyForest
, pruneForest
......@@ -107,7 +109,8 @@ import Data.Text.Lazy.IO as DTL ( writeFile )
import Data.Tree
import Gargantext.API.Ngrams.Tools (getNodeStory)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory (ArchiveList, HasNodeStory, NgramsStatePatch', a_history, a_state, a_version, currentVersion, NodeStoryEnv, hasNodeArchiveStoryImmediateSaver, hasNodeStoryImmediateSaver, HasNodeStoryEnv (..))
import Gargantext.Core.NodeStory hiding (buildForest)
import Gargantext.Core.NodeStory qualified as NodeStory
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, TODO, assertValid, ContextId, HasValidationError)
import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
......@@ -462,15 +465,12 @@ matchingNode listType minSize maxSize searchQuery inputNode =
&& searchQuery (inputNode ^. ne_ngrams)
&& matchesListType (inputNode ^. ne_list)
-- | Builds an ngrams forest from the input ngrams table map.
-- | Version of 'buildForest' specialised over the 'NgramsElement' as the values of the tree.
-- We can't use a single function to \"rule them all\" because the 'NgramsRepoElement', that
-- the 'NodeStory' uses does not have an 'ngrams' we can use as the key when building and
-- destroying a forest.
buildForest :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForest mp = unfoldForest mkTreeNode (Map.toList mp)
where
mkTreeNode :: (NgramsTerm, NgramsElement) -> (NgramsElement, [(NgramsTerm, NgramsElement)])
mkTreeNode (_, el) = (el, mapMaybe findChildren $ mSetToList (_ne_children el))
findChildren :: NgramsTerm -> Maybe (NgramsTerm, NgramsElement)
findChildren t = Map.lookup t mp <&> \el -> (t, el)
buildForest = map (fmap snd) . NodeStory.buildForest
-- | Folds an Ngrams forest back to a table map.
-- This function doesn't aggregate information, but merely just recostructs the original
......@@ -485,22 +485,13 @@ destroyForest f = Map.fromList . map (foldTree destroyTree) $ f
squashElements :: NgramsElement -> [(NgramsTerm, NgramsElement)] -> NgramsElement
squashElements r _ = r
-- | Prunes the input 'Forest' of 'NgramsElement' by keeping only the roots, i.e. the
-- nodes which has no children /AND/ they do not appear in any other 'children' relationship.
-- /NOTE ON IMPLEMENTATION:/ The fast way to do this is to simply filter each tree, ensuring
-- that we keep only trees which root has no parent or root (i.e. it's a root itself!) and this
-- will work only under the assumption that the input 'Forest' has been built correctly, i.e.
-- with the correct relationships specified, or this will break.
pruneForest :: Forest NgramsElement -> Forest NgramsElement
pruneForest = filter (\(Node r _) -> isNothing (_ne_parent r))
-- | TODO Errors management
-- TODO: polymorphic for Annuaire or Corpus or ...
-- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
-- TODO: should take only one ListId
-- | /pure/ function to query a 'Map NgramsTerm NgramsElement', according to a
-- search function. Returns a /versioned/ 'NgramsTable' which is paginated and
-- search function. Returns a /versioned/ 'NgramsTable' which is paginated and
-- sorted according to the input 'NgramsSearchQuery', together with the
-- occurrences of the elements.
searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment