Commit 2eb05b13 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Try to keep only the roots in searchTableNgrams (breaks tests)

parent 7be89ae5
...@@ -114,7 +114,7 @@ import Gargantext.API.Ngrams.Types ...@@ -114,7 +114,7 @@ import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory hiding (buildForest) import Gargantext.Core.NodeStory hiding (buildForest)
import Gargantext.Core.NodeStory qualified as NodeStory import Gargantext.Core.NodeStory qualified as NodeStory
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType) import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, TODO, assertValid, ContextId, HasValidationError) import Gargantext.Core.Types (ListType(..), NodeId, ListId, TODO, assertValid, HasValidationError)
import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..)) import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast) import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
import Gargantext.Database.Prelude import Gargantext.Database.Prelude
...@@ -482,59 +482,64 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement) ...@@ -482,59 +482,64 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
-> Either BuildForestError (VersionedWithCount NgramsTable) -> Either BuildForestError (VersionedWithCount NgramsTable)
searchTableNgrams versionedTableMap NgramsSearchQuery{..} = searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
let tableMap = versionedTableMap ^. v_data let tableMap = versionedTableMap ^. v_data
in case buildForest tableMap of in case keepRoots <$> buildForest tableMap of
Left err -> Left err Left err -> Left err
Right fs -> Right fs ->
let forestRoots = Set.fromList let forestRoots = filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery $ fs
. Map.elems
. destroyForest
. filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery
$ fs
tableMapSorted = versionedTableMap tableMapSorted = versionedTableMap
& v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ forestRoots) & v_data .~ (NgramsTable . Map.elems
. destroyForest
. sortAndPaginateForest _nsq_offset _nsq_limit _nsq_orderBy
. withInnersForest
$ forestRoots
)
in Right $ toVersionedWithCount (Set.size forestRoots) tableMapSorted in Right $ toVersionedWithCount (length forestRoots) tableMapSorted
where
keepRoots :: Forest NgramsElement -> Forest NgramsElement
keepRoots = filter (\(Node r _) -> isNothing (_ne_root r) || isNothing (_ne_parent r))
-- Sorts the input 'NgramsElement' list. -- | For each input root, extends its occurrence count with
-- /IMPORTANT/: As we might be sorting ngrams in all sorts of language, -- the information found in the subforest.
-- some of them might include letters with accents and other unicode symbols, withInnersForest :: Forest NgramsElement -> Forest NgramsElement
-- but we need to filter those /diacritics/ out so that the sorting would withInnersForest = map sumSubitemsOccurrences
-- happen in the way users would expect. See ticket #331.
sortOnOrder :: Maybe OrderBy -> ([NgramsElement] -> [NgramsElement])
sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
sortOnOrder (Just TermAsc) = List.sortBy ngramTermsAscSorter
sortOnOrder (Just TermDesc) = List.sortBy ngramTermsDescSorter
sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to Set.size)
sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to Set.size)
ngramTermsAscSorter = on unicodeDUCETSorter (unNgramsTerm . view ne_ngrams)
ngramTermsDescSorter = on (\n1 n2 -> unicodeDUCETSorter n2 n1) (unNgramsTerm . view ne_ngrams)
-- | For each input root, extends its occurrence count with
-- the information found in the subitems.
withInners :: Map NgramsTerm NgramsElement -> Set NgramsElement -> Set NgramsElement
withInners tblMap roots = Set.map addSubitemsOccurrences roots
where where
addSubitemsOccurrences :: NgramsElement -> NgramsElement sumSubitemsOccurrences :: Tree NgramsElement -> Tree NgramsElement
addSubitemsOccurrences e = sumSubitemsOccurrences (Node root children) =
e { _ne_occurrences = foldl' alterOccurrences (e ^. ne_occurrences) (e ^. ne_children) } let children' = withInnersForest children
root' = root { _ne_occurrences = (_ne_occurrences root) <> foldMap (_ne_occurrences . rootLabel) children' }
alterOccurrences :: Set ContextId -> NgramsTerm -> Set ContextId in Node root' children'
alterOccurrences occs t = case Map.lookup t tblMap of
Nothing -> occs sortAndPaginateForest :: Maybe Offset
Just e' -> occs <> e' ^. ne_occurrences -> Limit
-> Maybe OrderBy
-- | Paginate the results -> Forest NgramsElement
sortAndPaginate :: Set NgramsElement -> [NgramsElement] -> Forest NgramsElement
sortAndPaginate xs = sortAndPaginateForest mb_offset limit orderBy xs =
let offset' = getOffset $ maybe 0 identity _nsq_offset let offset' = getOffset $ maybe 0 identity mb_offset
in take (getLimit _nsq_limit) in take (getLimit limit)
. drop offset' . drop offset'
. sortOnOrder _nsq_orderBy . sortOnOrderForest orderBy
. Set.toList
$ xs $ xs
-- Sorts the input 'NgramsElement' list.
-- /IMPORTANT/: As we might be sorting ngrams in all sorts of language,
-- some of them might include letters with accents and other unicode symbols,
-- but we need to filter those /diacritics/ out so that the sorting would
-- happen in the way users would expect. See ticket #331.
sortOnOrderForest :: Maybe OrderBy -> (Forest NgramsElement -> Forest NgramsElement)
sortOnOrderForest Nothing = sortOnOrderForest (Just ScoreDesc)
sortOnOrderForest (Just TermAsc) = List.sortBy (\(Node t1 _) (Node t2 _) -> ngramTermsAscSorter t1 t2)
sortOnOrderForest (Just TermDesc) = List.sortBy (\(Node t1 _) (Node t2 _) -> ngramTermsDescSorter t1 t2)
sortOnOrderForest (Just ScoreAsc) = List.sortOn $ \(Node root _) -> root ^. (ne_occurrences . to Set.size)
sortOnOrderForest (Just ScoreDesc) = List.sortOn $ Down . (\(Node root _) -> root ^. (ne_occurrences . to Set.size))
ngramTermsAscSorter :: NgramsElement -> NgramsElement -> Ordering
ngramTermsAscSorter = on unicodeDUCETSorter (unNgramsTerm . view ne_ngrams)
ngramTermsDescSorter :: NgramsElement -> NgramsElement -> Ordering
ngramTermsDescSorter = on (\n1 n2 -> unicodeDUCETSorter n2 n1) (unNgramsTerm . view ne_ngrams)
-- | This function allows sorting two texts via their unicode sorting -- | This function allows sorting two texts via their unicode sorting
-- (as opposed as the standard lexicographical sorting) by relying on -- (as opposed as the standard lexicographical sorting) by relying on
-- the DUCET table, a table that specifies the ordering of all unicode -- the DUCET table, a table that specifies the ordering of all unicode
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment