Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
191
Issues
191
List
Board
Labels
Milestones
Merge Requests
8
Merge Requests
8
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
4b0e60f2
Commit
4b0e60f2
authored
Jul 14, 2025
by
Alfredo Di Napoli
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Try introducing buildForest and destroyForest in Gargantext.API.Ngrams
parent
fca2b73e
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
118 additions
and
56 deletions
+118
-56
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+43
-45
Ngrams.hs
test/Test/Offline/Ngrams.hs
+75
-11
No files found.
src/Gargantext/API/Ngrams.hs
View file @
4b0e60f2
...
...
@@ -84,17 +84,17 @@ module Gargantext.API.Ngrams
-- * Handlers to be used when serving top-level API requests
,
getTableNgramsCorpusHandler
-- * Internals
,
for testing
-- * Internals for testing
,
compute_new_state_patches
,
PatchHistory
(
..
)
,
newNgramsFromNgramsStatePatch
,
filterNgramsNodes
,
rootOfNgramsElemen
t
,
matchingNode
,
buildFores
t
,
destroyForest
)
where
import
Control.Lens
(
view
,
(
^..
),
(
+~
),
(
%~
),
msumOf
,
at
,
ix
,
_Just
,
Each
(
..
),
(
%%~
),
ifolded
,
to
,
withIndex
,
over
)
import
Control.Lens
(
view
,
(
^..
),
(
+~
),
(
%~
),
msumOf
,
at
,
ix
,
_Just
,
Each
(
..
),
(
%%~
),
ifolded
,
to
,
withIndex
)
import
Data.Aeson.Text
qualified
as
DAT
import
Data.List
qualified
as
List
import
Data.Map.Strict
qualified
as
Map
...
...
@@ -103,6 +103,7 @@ import Data.Patch.Class (Action(act), Transformable(..), ours)
import
Data.Set
qualified
as
Set
import
Data.Text
(
isInfixOf
,
toLower
,
unpack
)
import
Data.Text.Lazy.IO
as
DTL
(
writeFile
)
import
Data.Tree
import
Gargantext.API.Ngrams.Tools
(
getNodeStory
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.NodeStory
(
ArchiveList
,
HasNodeStory
,
NgramsStatePatch
'
,
a_history
,
a_state
,
a_version
,
currentVersion
,
NodeStoryEnv
,
hasNodeArchiveStoryImmediateSaver
,
hasNodeStoryImmediateSaver
,
HasNodeStoryEnv
(
..
))
...
...
@@ -426,49 +427,20 @@ dumpJsonTableMap fpath nodeId ngramsType = do
pure
()
-- | Filters the given `tableMap` with the search criteria. It returns
-- the input map, where each bucket indexed by a 'NgramsTerm' has been
-- filtered via the given predicate. Removes the key from the map if
-- the filtering would result in the empty set.
filterNgramsNodes
::
Maybe
ListType
->
Maybe
MinSize
->
Maybe
MaxSize
->
(
NgramsTerm
->
Bool
)
->
Map
NgramsTerm
NgramsElement
->
Map
NgramsTerm
NgramsElement
->
Set
NgramsElement
filterNgramsNodes
listTy
minSize
maxSize
searchFn
tblMap
=
Set
.
fromList
$
concatMap
(
findRootPath
tblMap
)
selectedNodes
where
allNodes
=
Set
.
fromList
$
Map
.
elems
tblMap
selectedNodes
=
Set
.
filter
(
matchingNode
listTy
minSize
maxSize
searchFn
)
allNodes
-- | Given the full forest of ngrams and the current element we are iterating on,
-- returns the full path of ngrams between this node and its /root/. This informs
-- us of all the nodes we have to keep in the final result set, because if we were
-- to filter them, we would be omitting important hierarchical information and this
-- will cause ngrams to not be displayed correctly on the frontend.
findRootPath
::
Map
NgramsTerm
NgramsElement
->
NgramsElement
->
[
NgramsElement
]
findRootPath
tblMap
node
=
go
node
[]
where
go
current
!
acc
=
case
_ne_parent
current
>>=
(`
Map
.
lookup
`
tblMap
)
of
Nothing
->
current
:
acc
Just
parentNode
->
go
parentNode
(
current
:
acc
)
-- | Returns the \"root\" of the 'NgramsElement', or it falls back to the input
-- 'NgramsElement' itself, if no root can be found.
-- /CAREFUL/: The root we select might /not/ have the same 'listType' we are
-- filtering for, in which case we have to change its type to match, if needed.
->
Map
NgramsTerm
NgramsElement
->
Map
NgramsTerm
NgramsElement
->
NgramsElement
->
NgramsElement
rootOfNgramsElement
listType
tblMap
ne
=
case
ne
^.
ne_root
of
Nothing
->
ne
Just
rootKey
|
Just
r
<-
tblMap
^.
at
rootKey
-- NOTE(adinapoli) It's unclear what is the correct behaviour here: should
-- we override the type or we filter out the node altogether?
->
over
ne_list
(
\
oldList
->
fromMaybe
oldList
listType
)
r
|
otherwise
filterNgramsNodes
listTy
minSize
maxSize
searchFn
tblMap
=
flip
Map
.
mapMaybe
tblMap
$
\
e
->
case
matchingNode
listTy
minSize
maxSize
searchFn
e
of
False
->
Nothing
True
->
Just
e
-- | Returns 'True' if the input 'NgramsElement' satisfies the search criteria
-- mandated by 'NgramsSearchQuery'.
...
...
@@ -489,7 +461,32 @@ matchingNode listType minSize maxSize searchQuery inputNode =
&&
searchQuery
(
inputNode
^.
ne_ngrams
)
&&
matchesListType
(
inputNode
^.
ne_list
)
-- | Builds an ngrams forest from the input ngrams table map.
buildForest
::
Map
NgramsTerm
NgramsElement
->
Forest
NgramsElement
buildForest
mp
=
unfoldForest
mkTreeNode
(
Map
.
toList
mp
)
where
mkTreeNode
::
(
NgramsTerm
,
NgramsElement
)
->
(
NgramsElement
,
[(
NgramsTerm
,
NgramsElement
)])
mkTreeNode
(
_
,
el
)
=
(
el
,
mapMaybe
findChildren
$
mSetToList
(
_ne_children
el
))
findChildren
::
NgramsTerm
->
Maybe
(
NgramsTerm
,
NgramsElement
)
findChildren
t
=
Map
.
lookup
t
mp
<&>
\
el
->
(
t
,
el
)
-- | Folds an Ngrams forest back to a table map.
-- FIXME(adn) propagate the root information.
destroyForest
::
Forest
NgramsElement
->
Map
NgramsTerm
NgramsElement
destroyForest
f
=
Map
.
fromList
.
map
(
foldTree
destroyTree
)
$
f
where
destroyTree
::
NgramsElement
->
[(
NgramsTerm
,
NgramsElement
)]
->
(
NgramsTerm
,
NgramsElement
)
destroyTree
rootEl
childrenEl
=
(
_ne_ngrams
rootEl
,
squashElements
rootEl
childrenEl
)
-- Given a list of children, generate a single node that has as the parent
-- the children, as the score the sum of the individual elements.
squashElements
::
NgramsElement
->
[(
NgramsTerm
,
NgramsElement
)]
->
NgramsElement
squashElements
r
c
=
r
{
_ne_size
=
_ne_size
r
<>
sum
(
map
(
_ne_size
.
snd
)
c
)
,
_ne_occurrences
=
_ne_occurrences
r
<>
(
mconcat
$
map
(
_ne_occurrences
.
snd
)
c
)
,
_ne_children
=
mSetFromList
$
map
fst
c
}
-- | TODO Errors management
-- TODO: polymorphic for Annuaire or Corpus or ...
...
...
@@ -507,10 +504,11 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
searchTableNgrams
versionedTableMap
NgramsSearchQuery
{
..
}
=
let
tableMap
=
versionedTableMap
^.
v_data
filteredData
=
filterNgramsNodes
_nsq_listType
_nsq_minSize
_nsq_maxSize
_nsq_searchQuery
tableMap
forestRoots
=
Set
.
fromList
.
Map
.
elems
.
destroyForest
.
buildForest
$
filteredData
tableMapSorted
=
versionedTableMap
&
v_data
.~
(
NgramsTable
.
sortAndPaginate
.
withInners
tableMap
$
filteredData
)
&
v_data
.~
(
NgramsTable
.
sortAndPaginate
$
forestRoots
)
in
toVersionedWithCount
(
Set
.
size
f
ilteredData
)
tableMapSorted
in
toVersionedWithCount
(
Set
.
size
f
orestRoots
)
tableMapSorted
where
-- Sorts the input 'NgramsElement' list.
...
...
@@ -530,8 +528,8 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
-- | For each input root, extends its occurrence count with
-- the information found in the subitems.
withInners
::
Map
NgramsTerm
NgramsElement
->
Set
NgramsElement
->
Set
NgramsElement
withInners
tblMap
roots
=
Set
.
map
addSubitemsOccurrences
roots
_
withInners
::
Map
NgramsTerm
NgramsElement
->
Set
NgramsElement
->
Set
NgramsElement
_
withInners
tblMap
roots
=
Set
.
map
addSubitemsOccurrences
roots
where
addSubitemsOccurrences
::
NgramsElement
->
NgramsElement
addSubitemsOccurrences
e
=
...
...
test/Test/Offline/Ngrams.hs
View file @
4b0e60f2
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE ViewPatterns #-}
module
Test.Offline.Ngrams
(
tests
)
where
import
Prelude
import
Control.Lens
import
Data.Map.Strict
qualified
as
Map
import
Data.Set
qualified
as
Set
import
Data.Text
qualified
as
T
import
Gargantext.API.Ngrams
(
filterNgramsNodes
)
import
Gargantext.API.Ngrams
(
filterNgramsNodes
,
buildForest
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Ngrams.Types
qualified
as
NT
import
Gargantext.Core
...
...
@@ -23,6 +24,10 @@ import Test.Instances ()
import
Test.Ngrams.Query
(
mkMapTerm
)
import
Test.QuickCheck
import
Test.QuickCheck
qualified
as
QC
import
Data.Tree
import
Text.RawString.QQ
(
r
)
import
Data.Char
(
isSpace
)
import
Data.Map.Strict
(
Map
)
genScientificText
::
Gen
T
.
Text
...
...
@@ -95,13 +100,14 @@ tests = describe "Ngrams" $ do
it
"return results for non-empty input terms"
$
property
testBuildPatternsNonEmpty
describe
"docNgrams"
$
do
it
"always matches if the input text contains any of the terms"
$
property
testDocNgramsOKMatch
describe
"ngram forests"
$
do
it
"building a simple tree works"
testBuildNgramsTree_01
it
"building a complex tree works"
testBuildNgramsTree_02
describe
"hierarchical grouping"
$
do
it
"filterNgramsNodes with empty query is identity"
testFilterNgramsNodesEmptyQuery
testFilterNgramsNodesEmptyQuery
::
Assertion
testFilterNgramsNodesEmptyQuery
=
do
let
input
=
Map
.
fromList
[
hierarchicalTableMap
::
Map
NgramsTerm
NgramsElement
hierarchicalTableMap
=
Map
.
fromList
[
(
"vehicle"
,
mkMapTerm
"vehicle"
&
ne_children
.~
mSetFromList
[
"car"
])
,
(
"car"
,
mkMapTerm
"car"
&
ne_root
.~
Just
"vehicle"
&
ne_parent
.~
Just
"vehicle"
...
...
@@ -109,8 +115,12 @@ testFilterNgramsNodesEmptyQuery = do
,
(
"ford"
,
mkMapTerm
"ford"
&
ne_root
.~
Just
"vehicle"
&
ne_parent
.~
Just
"car"
)
]
testFilterNgramsNodesEmptyQuery
::
Assertion
testFilterNgramsNodesEmptyQuery
=
do
let
input
=
hierarchicalTableMap
let
actual
=
filterNgramsNodes
(
Just
MapTerm
)
Nothing
Nothing
(
const
True
)
input
actual
@?=
(
Set
.
fromList
$
Map
.
elems
input
)
actual
@?=
input
testDocNgramsOKMatch
::
Lang
->
DocumentWithMatches
->
Property
testDocNgramsOKMatch
lang
(
DocumentWithMatches
ts
doc
)
=
...
...
@@ -125,3 +135,57 @@ testBuildPatternsNonEmpty :: Lang -> NonEmptyList NgramsTermNonEmpty -> Property
testBuildPatternsNonEmpty
lang
ts
=
let
ts'
=
map
(
NT
.
NgramsTerm
.
unNgramsTermNonEmpty
)
$
getNonEmpty
ts
in
counterexample
"buildPatterns returned no results"
$
length
(
buildPatternsWith
lang
ts'
)
>
0
newtype
ASCIIForest
=
ASCIIForest
String
deriving
Eq
instance
Show
ASCIIForest
where
show
(
ASCIIForest
x
)
=
x
compareForestVisually
::
Forest
NgramsElement
->
String
->
Property
compareForestVisually
f
expected
=
let
actual
=
init
$
drawForest
(
map
(
fmap
renderEl
)
f
)
outermostIndentation
=
T
.
length
.
T
.
takeWhile
isSpace
.
T
.
dropWhile
(
==
'
\n
'
)
.
T
.
pack
$
expected
in
ASCIIForest
actual
===
ASCIIForest
(
sanitiseDrawing
outermostIndentation
expected
)
where
renderEl
::
NgramsElement
->
String
renderEl
=
T
.
unpack
.
unNgramsTerm
.
_ne_ngrams
toTextPaths
::
String
->
[
T
.
Text
]
toTextPaths
=
T
.
splitOn
"
\n
"
.
T
.
strip
.
T
.
pack
sanitiseDrawing
::
Int
->
String
->
String
sanitiseDrawing
outermostIndentation
=
let
dropLayout
t
=
case
T
.
uncons
t
of
Just
(
' '
,
_
)
->
T
.
drop
outermostIndentation
t
_
->
t
-- leave it be
in
T
.
unpack
.
T
.
unlines
.
map
dropLayout
.
toTextPaths
testBuildNgramsTree_01
::
Property
testBuildNgramsTree_01
=
let
t1
=
Map
.
fromList
[
(
"foo"
,
mkMapTerm
"foo"
&
ne_children
.~
mSetFromList
[
"bar"
])
,
(
"bar"
,
mkMapTerm
"bar"
&
ne_parent
.~
Just
"foo"
)
]
in
(
buildForest
t1
)
`
compareForestVisually
`
[
r
|
bar
foo
|
`- bar
|]
testBuildNgramsTree_02
::
Property
testBuildNgramsTree_02
=
buildForest
hierarchicalTableMap
`
compareForestVisually
`
[
r
|
car
|
`- ford
ford
vehicle
|
`- car
|
`- ford
|]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment