Commit 1964cc3c authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Introduce the pruneForest

This function allows us to keep only the roots, and discard the
redundant children after building a 'Forest'.
parent ca82be78
Pipeline #7748 passed with stages
in 100 minutes and 31 seconds
...@@ -91,6 +91,7 @@ module Gargantext.API.Ngrams ...@@ -91,6 +91,7 @@ module Gargantext.API.Ngrams
, filterNgramsNodes , filterNgramsNodes
, buildForest , buildForest
, destroyForest , destroyForest
, pruneForest
) )
where where
...@@ -484,6 +485,15 @@ destroyForest f = Map.fromList . map (foldTree destroyTree) $ f ...@@ -484,6 +485,15 @@ destroyForest f = Map.fromList . map (foldTree destroyTree) $ f
squashElements :: NgramsElement -> [(NgramsTerm, NgramsElement)] -> NgramsElement squashElements :: NgramsElement -> [(NgramsTerm, NgramsElement)] -> NgramsElement
squashElements r _ = r squashElements r _ = r
-- | Prunes the input 'Forest' of 'NgramsElement' by keeping only the roots, i.e. the
-- nodes which has no children /AND/ they do not appear in any other 'children' relationship.
-- /NOTE ON IMPLEMENTATION:/ The fast way to do this is to simply filter each tree, ensuring
-- that we keep only trees which root has no parent or root (i.e. it's a root itself!) and this
-- will work only under the assumption that the input 'Forest' has been built correctly, i.e.
-- with the correct relationships specified, or this will break.
pruneForest :: Forest NgramsElement -> Forest NgramsElement
pruneForest = filter (\(Node r _) -> isNothing (_ne_parent r))
-- | TODO Errors management -- | TODO Errors management
-- TODO: polymorphic for Annuaire or Corpus or ... -- TODO: polymorphic for Annuaire or Corpus or ...
-- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut). -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
...@@ -500,9 +510,9 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement) ...@@ -500,9 +510,9 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
searchTableNgrams versionedTableMap NgramsSearchQuery{..} = searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
let tableMap = versionedTableMap ^. v_data let tableMap = versionedTableMap ^. v_data
filteredData = filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery tableMap filteredData = filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery tableMap
forestRoots = Set.fromList . Map.elems . destroyForest . buildForest $ filteredData forestRoots = Set.fromList . Map.elems . destroyForest . pruneForest . buildForest $ filteredData
tableMapSorted = versionedTableMap tableMapSorted = versionedTableMap
& v_data .~ (NgramsTable . sortAndPaginate $ forestRoots) & v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ forestRoots)
in toVersionedWithCount (Set.size forestRoots) tableMapSorted in toVersionedWithCount (Set.size forestRoots) tableMapSorted
where where
...@@ -524,8 +534,8 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} = ...@@ -524,8 +534,8 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
-- | For each input root, extends its occurrence count with -- | For each input root, extends its occurrence count with
-- the information found in the subitems. -- the information found in the subitems.
_withInners :: Map NgramsTerm NgramsElement -> Set NgramsElement -> Set NgramsElement withInners :: Map NgramsTerm NgramsElement -> Set NgramsElement -> Set NgramsElement
_withInners tblMap roots = Set.map addSubitemsOccurrences roots withInners tblMap roots = Set.map addSubitemsOccurrences roots
where where
addSubitemsOccurrences :: NgramsElement -> NgramsElement addSubitemsOccurrences :: NgramsElement -> NgramsElement
addSubitemsOccurrences e = addSubitemsOccurrences e =
......
...@@ -8,7 +8,7 @@ import Prelude ...@@ -8,7 +8,7 @@ import Prelude
import Control.Lens import Control.Lens
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Text qualified as T import Data.Text qualified as T
import Gargantext.API.Ngrams (filterNgramsNodes, buildForest, destroyForest) import Gargantext.API.Ngrams (filterNgramsNodes, buildForest, destroyForest, pruneForest)
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Types qualified as NT import Gargantext.API.Ngrams.Types qualified as NT
import Gargantext.Core import Gargantext.Core
...@@ -104,6 +104,8 @@ tests = describe "Ngrams" $ do ...@@ -104,6 +104,8 @@ tests = describe "Ngrams" $ do
describe "ngram forests" $ do describe "ngram forests" $ do
it "building a simple tree works" testBuildNgramsTree_01 it "building a simple tree works" testBuildNgramsTree_01
it "building a complex tree works" testBuildNgramsTree_02 it "building a complex tree works" testBuildNgramsTree_02
it "pruning a simple tree works" testPruningNgramsForest_01
it "pruning a complex tree works" testPruningNgramsForest_02
prop "destroyForest . buildForest === id" buildDestroyForestRoundtrips prop "destroyForest . buildForest === id" buildDestroyForestRoundtrips
describe "hierarchical grouping" $ do describe "hierarchical grouping" $ do
it "filterNgramsNodes with empty query is identity" testFilterNgramsNodesEmptyQuery it "filterNgramsNodes with empty query is identity" testFilterNgramsNodesEmptyQuery
...@@ -204,3 +206,24 @@ instance Arbitrary TableMapLockStep where ...@@ -204,3 +206,24 @@ instance Arbitrary TableMapLockStep where
buildDestroyForestRoundtrips :: TableMapLockStep -> Property buildDestroyForestRoundtrips :: TableMapLockStep -> Property
buildDestroyForestRoundtrips (TableMapLockStep mp) = buildDestroyForestRoundtrips (TableMapLockStep mp) =
(destroyForest . buildForest $ mp) === mp (destroyForest . buildForest $ mp) === mp
testPruningNgramsForest_01 :: Property
testPruningNgramsForest_01 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_parent .~ Just "foo")
]
in (pruneForest $ buildForest t1) `compareForestVisually` [r|
foo
|
`- bar
|]
testPruningNgramsForest_02 :: Property
testPruningNgramsForest_02 =
(pruneForest $ buildForest hierarchicalTableMap) `compareForestVisually` [r|
vehicle
|
`- car
|
`- ford
|]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment