Try to keep only the roots in searchTableNgrams (breaks tests)

2eb05b13 · Alfredo Di Napoli · 7be89ae5 · 2eb05b13
Commit 2eb05b13 authored Sep 08, 2025 by Alfredo Di Napoli
Show whitespace changes
Inline Side-by-side

Showing with 54 additions and 49 deletions

Ngrams.hs src/Gargantext/API/Ngrams.hs +54 -49

No files found.
--- a/src/Gargantext/API/Ngrams.hs
+++ b/src/Gargantext/API/Ngrams.hs
@@ -114,7 +114,7 @@ import Gargantext.API.Ngrams.Types
 import Gargantext.Core.NodeStory hiding (buildForest)
 import Gargantext.Core.NodeStory qualified as NodeStory
 import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType)
-import Gargantext.Core.Types (ListType(..), NodeId, ListId, TODO, assertValid, ContextId, HasValidationError)
+import Gargantext.Core.Types (ListType(..), NodeId, ListId, TODO, assertValid, HasValidationError)
 import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
 import Gargantext.Database.Prelude
@@ -482,59 +482,64 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
                  -> Either BuildForestError (VersionedWithCount NgramsTable)
 searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
  let tableMap     = versionedTableMap ^. v_data
-  in case buildForest tableMap of
+  in case keepRoots <$> buildForest tableMap of
       Left err -> Left err
       Right fs ->
-         let forestRoots = Set.fromList
+         let forestRoots = filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery $ fs
-                         . Map.elems
-                         . destroyForest
-                         . filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery
-                         $ fs
             tableMapSorted = versionedTableMap
-                            & v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ forestRoots)
+                            & v_data .~ (NgramsTable . Map.elems
+                                                     . destroyForest
+                                                     . sortAndPaginateForest _nsq_offset _nsq_limit _nsq_orderBy
+                                                     . withInnersForest
+                                                     $ forestRoots
+                                                     )
-         in Right $ toVersionedWithCount (Set.size forestRoots) tableMapSorted
+         in Right $ toVersionedWithCount (length forestRoots) tableMapSorted
-  where
+keepRoots :: Forest NgramsElement -> Forest NgramsElement
+keepRoots = filter (\(Node r _) -> isNothing (_ne_root r) || isNothing (_ne_parent r))
-    -- Sorts the input 'NgramsElement' list.
+-- | For each input root, extends its occurrence count with
-    -- /IMPORTANT/: As we might be sorting ngrams in all sorts of language,
+-- the information found in the subforest.
-    -- some of them might include letters with accents and other unicode symbols,
+withInnersForest :: Forest NgramsElement -> Forest NgramsElement
-    -- but we need to filter those /diacritics/ out so that the sorting would
+withInnersForest = map sumSubitemsOccurrences
-    -- happen in the way users would expect. See ticket #331.
-    sortOnOrder :: Maybe OrderBy -> ([NgramsElement] -> [NgramsElement])
-    sortOnOrder Nothing          = sortOnOrder (Just ScoreDesc)
-    sortOnOrder (Just TermAsc)   = List.sortBy ngramTermsAscSorter
-    sortOnOrder (Just TermDesc)  = List.sortBy ngramTermsDescSorter
-    sortOnOrder (Just ScoreAsc)  = List.sortOn $ view (ne_occurrences . to Set.size)
-    sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to Set.size)
-    ngramTermsAscSorter  = on unicodeDUCETSorter (unNgramsTerm . view ne_ngrams)
-    ngramTermsDescSorter = on (\n1 n2 -> unicodeDUCETSorter n2 n1) (unNgramsTerm . view ne_ngrams)
-    -- | For each input root, extends its occurrence count with
-    -- the information found in the subitems.
-    withInners :: Map NgramsTerm NgramsElement -> Set NgramsElement -> Set NgramsElement
-    withInners tblMap roots = Set.map addSubitemsOccurrences roots
  where
-        addSubitemsOccurrences :: NgramsElement -> NgramsElement
+    sumSubitemsOccurrences :: Tree NgramsElement -> Tree NgramsElement
-        addSubitemsOccurrences e =
+    sumSubitemsOccurrences (Node root children) =
-          e { _ne_occurrences = foldl' alterOccurrences (e ^. ne_occurrences) (e ^. ne_children) }
+      let children' = withInnersForest children
+          root' = root { _ne_occurrences = (_ne_occurrences root) <> foldMap (_ne_occurrences . rootLabel) children' }
-        alterOccurrences :: Set ContextId -> NgramsTerm -> Set ContextId
+      in Node root' children'
-        alterOccurrences occs t = case Map.lookup t tblMap of
-          Nothing -> occs
+sortAndPaginateForest :: Maybe Offset
-          Just e' -> occs <> e' ^. ne_occurrences
+                      -> Limit
+                      -> Maybe OrderBy
-    -- | Paginate the results
+                      -> Forest NgramsElement
-    sortAndPaginate :: Set NgramsElement -> [NgramsElement]
+                      -> Forest NgramsElement
-    sortAndPaginate xs =
+sortAndPaginateForest mb_offset limit orderBy xs =
-      let offset'  = getOffset $ maybe 0 identity _nsq_offset
+  let offset'  = getOffset $ maybe 0 identity mb_offset
-      in   take (getLimit _nsq_limit)
+  in   take (getLimit limit)
     . drop offset'
-         . sortOnOrder _nsq_orderBy
+     . sortOnOrderForest orderBy
-         . Set.toList
     $ xs
+-- Sorts the input 'NgramsElement' list.
+-- /IMPORTANT/: As we might be sorting ngrams in all sorts of language,
+-- some of them might include letters with accents and other unicode symbols,
+-- but we need to filter those /diacritics/ out so that the sorting would
+-- happen in the way users would expect. See ticket #331.
+sortOnOrderForest :: Maybe OrderBy -> (Forest NgramsElement -> Forest NgramsElement)
+sortOnOrderForest Nothing          = sortOnOrderForest (Just ScoreDesc)
+sortOnOrderForest (Just TermAsc)   = List.sortBy (\(Node t1 _) (Node t2 _) -> ngramTermsAscSorter t1 t2)
+sortOnOrderForest (Just TermDesc)  = List.sortBy (\(Node t1 _) (Node t2 _) -> ngramTermsDescSorter t1 t2)
+sortOnOrderForest (Just ScoreAsc)  = List.sortOn $ \(Node root _) -> root ^. (ne_occurrences . to Set.size)
+sortOnOrderForest (Just ScoreDesc) = List.sortOn $ Down . (\(Node root _) -> root ^. (ne_occurrences . to Set.size))
+ngramTermsAscSorter :: NgramsElement -> NgramsElement -> Ordering
+ngramTermsAscSorter  = on unicodeDUCETSorter (unNgramsTerm . view ne_ngrams)
+ngramTermsDescSorter :: NgramsElement -> NgramsElement -> Ordering
+ngramTermsDescSorter = on (\n1 n2 -> unicodeDUCETSorter n2 n1) (unNgramsTerm . view ne_ngrams)
 -- | This function allows sorting two texts via their unicode sorting
 -- (as opposed as the standard lexicographical sorting) by relying on
 -- the DUCET table, a table that specifies the ordering of all unicode