Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
197
Issues
197
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
f036a436
Commit
f036a436
authored
Jul 28, 2025
by
Alfredo Di Napoli
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Allow terms to be searched even if they appear nested
parent
204c2052
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
140 additions
and
56 deletions
+140
-56
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+78
-18
Instances.hs
test/Test/Instances.hs
+3
-3
Query.hs
test/Test/Ngrams/Query.hs
+47
-28
Ngrams.hs
test/Test/Offline/Ngrams.hs
+12
-7
No files found.
src/Gargantext/API/Ngrams.hs
View file @
f036a436
...
...
@@ -25,6 +25,7 @@ add get
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TupleSections #-}
module
Gargantext.API.Ngrams
...
...
@@ -91,6 +92,8 @@ module Gargantext.API.Ngrams
,
filterNgramsNodes
-- * Operations on a forest
,
BuildForestError
(
..
)
,
renderLoop
,
buildForest
,
destroyForest
,
pruneForest
...
...
@@ -105,12 +108,12 @@ import Data.Map.Strict.Patch qualified as PM
import
Data.Patch.Class
(
Action
(
act
),
Transformable
(
..
),
ours
)
import
Data.Set
qualified
as
Set
import
Data.Text
(
isInfixOf
,
toLower
,
unpack
)
import
Data.Text
qualified
as
T
import
Data.Text.Lazy.IO
as
DTL
(
writeFile
)
import
Data.Tree
import
Gargantext.API.Ngrams.Tools
(
getNodeStory
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.NodeStory
hiding
(
buildForest
)
import
Gargantext.Core.NodeStory
qualified
as
NodeStory
import
Gargantext.Core.Text.Ngrams
(
Ngrams
,
NgramsType
)
import
Gargantext.Core.Types
(
ListType
(
..
),
NodeId
,
ListId
,
TODO
,
assertValid
,
ContextId
,
HasValidationError
)
import
Gargantext.Core.Types.Query
(
Limit
(
..
),
Offset
(
..
),
MinSize
(
..
),
MaxSize
(
..
))
...
...
@@ -451,7 +454,7 @@ matchingNode :: Maybe ListType
->
(
NgramsTerm
->
Bool
)
->
Tree
NgramsElement
->
Bool
matchingNode
listType
minSize
maxSize
searchQuery
(
Node
inputNode
_
children
)
=
matchingNode
listType
minSize
maxSize
searchQuery
(
Node
inputNode
children
)
=
let
nodeSize
=
inputNode
^.
ne_size
matchesListType
=
maybe
(
const
True
)
(
==
)
listType
respectsMinSize
=
maybe
(
const
True
)
((
<=
)
.
getMinSize
)
minSize
...
...
@@ -459,15 +462,67 @@ matchingNode listType minSize maxSize searchQuery (Node inputNode _children) =
in
respectsMinSize
nodeSize
&&
respectsMaxSize
nodeSize
&&
searchQuery
(
inputNode
^.
ne_ngrams
)
-- Search for the query either in the root or in the children.
&&
(
searchQuery
(
inputNode
^.
ne_ngrams
)
||
any
(
matchingNode
listType
minSize
maxSize
searchQuery
)
children
)
&&
matchesListType
(
inputNode
^.
ne_list
)
-- | Errors returned by 'buildForest'.
data
BuildForestError
=
-- We found a loop, something that shouldn't normally happen if the calling
-- code is correct by construction, but if that does happen, the value will
-- contain the full path to the cycle.
BFE_loop_detected
!
(
Set
VisitedNode
)
deriving
(
Show
,
Eq
)
renderLoop
::
Set
VisitedNode
->
T
.
Text
renderLoop
=
T
.
intercalate
" -> "
.
map
(
unNgramsTerm
.
_vn_term
)
.
Set
.
toAscList
-- | Keeps track of the relative order in which visited a node, to be able to print cycles.
data
VisitedNode
=
VN
{
_vn_position
::
!
Int
,
_vn_term
::
!
NgramsTerm
}
deriving
(
Show
)
instance
Eq
VisitedNode
where
(
VN
_
t1
)
==
(
VN
_
t2
)
=
t1
==
t2
instance
Ord
VisitedNode
where
compare
(
VN
_
t1
)
(
VN
_
t2
)
=
t1
`
compare
`
t2
type
TreeNode
=
(
NgramsTerm
,
NgramsElement
)
-- | Version of 'buildForest' specialised over the 'NgramsElement' as the values of the tree.
-- We can't use a single function to \"rule them all\" because the 'NgramsRepoElement', that
-- the 'NodeStory' uses does not have an 'ngrams' we can use as the key when building and
-- destroying a forest.
buildForest
::
Map
NgramsTerm
NgramsElement
->
Forest
NgramsElement
buildForest
=
map
(
fmap
snd
)
.
NodeStory
.
buildForest
-- /IMPORTANT/: This functions returns an error in case we found a loop.
buildForest
::
Map
NgramsTerm
NgramsElement
->
Either
BuildForestError
(
Forest
NgramsElement
)
buildForest
mp
=
fmap
(
map
(
fmap
snd
))
.
unfoldForestM
unfoldNode
$
Map
.
toList
mp
where
unfoldNode
::
TreeNode
->
Either
BuildForestError
(
TreeNode
,
[
TreeNode
])
unfoldNode
(
n
,
el
)
=
flip
evalState
(
1
::
Int
,
mempty
)
.
runExceptT
$
do
let
initialChildren
=
getChildren
(
mSetToList
$
_ne_children
el
)
go
initialChildren
*>
pure
(
mkTreeNode
(
n
,
el
))
where
go
::
[
NgramsElement
]
->
ExceptT
BuildForestError
(
State
(
Int
,
Set
VisitedNode
))
()
go
[]
=
pure
()
go
(
x
:
xs
)
=
do
(
pos
,
visited
)
<-
get
let
nt
=
_ne_ngrams
x
case
Set
.
member
(
VN
pos
nt
)
visited
of
True
->
throwError
$
BFE_loop_detected
visited
False
->
do
put
(
pos
+
1
,
Set
.
insert
(
VN
(
pos
+
1
)
nt
)
visited
)
go
(
getChildren
(
mSetToList
$
_ne_children
x
)
<>
xs
)
mkTreeNode
::
TreeNode
->
(
TreeNode
,
[
TreeNode
])
mkTreeNode
(
k
,
el
)
=
((
k
,
el
),
mapMaybe
findChildren
$
mSetToList
(
el
^.
ne_children
))
findChildren
::
NgramsTerm
->
Maybe
TreeNode
findChildren
t
=
Map
.
lookup
t
mp
<&>
\
el
->
(
t
,
el
)
getChildren
::
[
NgramsTerm
]
->
[
NgramsElement
]
getChildren
=
mapMaybe
(`
Map
.
lookup
`
mp
)
-- | Folds an Ngrams forest back to a table map.
-- This function doesn't aggregate information, but merely just recostructs the original
...
...
@@ -494,19 +549,21 @@ destroyForest f = Map.fromList . map (foldTree destroyTree) $ f
searchTableNgrams
::
Versioned
(
Map
NgramsTerm
NgramsElement
)
->
NgramsSearchQuery
-- ^ The search query on the retrieved data
->
VersionedWithCount
NgramsTable
->
Either
BuildForestError
(
VersionedWithCount
NgramsTable
)
searchTableNgrams
versionedTableMap
NgramsSearchQuery
{
..
}
=
let
tableMap
=
versionedTableMap
^.
v_data
forestRoots
=
Set
.
fromList
.
Map
.
elems
.
destroyForest
.
filterNgramsNodes
_nsq_listType
_nsq_minSize
_nsq_maxSize
_nsq_searchQuery
.
buildForest
$
tableMap
tableMapSorted
=
versionedTableMap
&
v_data
.~
(
NgramsTable
.
sortAndPaginate
.
withInners
tableMap
$
forestRoots
)
in
toVersionedWithCount
(
Set
.
size
forestRoots
)
tableMapSorted
in
case
buildForest
tableMap
of
Left
err
->
Left
err
Right
fs
->
let
forestRoots
=
Set
.
fromList
.
Map
.
elems
.
destroyForest
.
filterNgramsNodes
_nsq_listType
_nsq_minSize
_nsq_maxSize
_nsq_searchQuery
$
fs
tableMapSorted
=
versionedTableMap
&
v_data
.~
(
NgramsTable
.
sortAndPaginate
.
withInners
tableMap
$
forestRoots
)
in
Right
$
toVersionedWithCount
(
Set
.
size
forestRoots
)
tableMapSorted
where
-- Sorts the input 'NgramsElement' list.
...
...
@@ -565,8 +622,11 @@ getTableNgrams :: NodeStoryEnv err
getTableNgrams
env
nodeId
listId
tabType
searchQuery
=
do
let
ngramsType
=
ngramsTypeFromTabType
tabType
versionedInput
<-
getNgramsTable'
env
nodeId
listId
ngramsType
pure
$
searchTableNgrams
versionedInput
searchQuery
-- FIXME(adn) In case of a loop at the moment we just return the
-- empty result set, but we should probably bubble the error upstream.
pure
$
case
searchTableNgrams
versionedInput
searchQuery
of
Left
_err
->
VersionedWithCount
0
0
(
NgramsTable
mempty
)
Right
x
->
x
-- | Helper function to get the ngrams table with scores.
getNgramsTable'
::
NodeStoryEnv
err
...
...
test/Test/Instances.hs
View file @
f036a436
...
...
@@ -500,14 +500,14 @@ instance Arbitrary Ngrams.NgramsElement where
-- because we still want to simulate potential hierarchies, i.e. forests of ngrams.
-- so we sample the ngrams terms from a selection, and we restrict the number of max
-- children for each 'NgramsElement' to the size parameter to not have very large trees.
arbitrary
=
do
arbitrary
=
sized
$
\
n
->
do
_ne_ngrams
<-
arbitrary
_ne_size
<-
getPositive
<$>
arbitrary
-- it doesn't make sense to have a negative size
_ne_list
<-
arbitrary
_ne_occurrences
<-
arbitrary
_ne_occurrences
<-
resize
n
arbitrary
_ne_root
<-
arbitrary
`
suchThat
`
(
maybe
True
(
\
x
->
x
/=
_ne_ngrams
))
-- can't be root of itself
_ne_parent
<-
arbitrary
`
suchThat
`
(
maybe
True
(
\
x
->
x
/=
_ne_ngrams
))
-- can't be parent of itself
_ne_children
<-
Ngrams
.
mSetFromList
<$>
(
sized
(
\
n
->
vectorOf
n
arbitrary
`
suchThat
`
(
\
x
->
_ne_ngrams
`
notElem
`
x
)
))
-- can't be cyclic
_ne_children
<-
Ngrams
.
mSetFromList
<$>
(
vectorOf
n
arbitrary
`
suchThat
`
(
\
x
->
_ne_ngrams
`
notElem
`
x
))
-- can't be cyclic
pure
Ngrams
.
NgramsElement
{
..
}
instance
Arbitrary
Ngrams
.
NgramsTable
where
...
...
test/Test/Ngrams/Query.hs
View file @
f036a436
This diff is collapsed.
Click to expand it.
test/Test/Offline/Ngrams.hs
View file @
f036a436
...
...
@@ -125,7 +125,7 @@ hierarchicalTableMap = Map.fromList [
testFilterNgramsNodesEmptyQuery
::
Assertion
testFilterNgramsNodesEmptyQuery
=
do
let
input
=
buildForest
hierarchicalTableMap
let
input
=
buildForest
OrFail
hierarchicalTableMap
let
actual
=
filterNgramsNodes
(
Just
MapTerm
)
Nothing
Nothing
(
const
True
)
input
actual
@?=
input
...
...
@@ -149,6 +149,11 @@ newtype ASCIIForest = ASCIIForest String
instance
Show
ASCIIForest
where
show
(
ASCIIForest
x
)
=
x
buildForestOrFail
::
Map
NgramsTerm
NgramsElement
->
Forest
NgramsElement
buildForestOrFail
mp
=
case
buildForest
mp
of
Left
err
->
error
(
show
err
)
Right
x
->
x
compareForestVisually
::
Forest
NgramsElement
->
String
->
Property
compareForestVisually
f
expected
=
let
actual
=
init
$
drawForest
(
map
(
fmap
renderEl
)
f
)
...
...
@@ -173,7 +178,7 @@ testBuildNgramsTree_01 =
let
t1
=
Map
.
fromList
[
(
"foo"
,
mkMapTerm
"foo"
&
ne_children
.~
mSetFromList
[
"bar"
])
,
(
"bar"
,
mkMapTerm
"bar"
&
ne_parent
.~
Just
"foo"
)
]
in
(
buildForest
t1
)
`
compareForestVisually
`
[
r
|
in
(
buildForest
OrFail
t1
)
`
compareForestVisually
`
[
r
|
bar
foo
...
...
@@ -183,7 +188,7 @@ testBuildNgramsTree_01 =
testBuildNgramsTree_02
::
Property
testBuildNgramsTree_02
=
buildForest
hierarchicalTableMap
`
compareForestVisually
`
[
r
|
buildForest
OrFail
hierarchicalTableMap
`
compareForestVisually
`
[
r
|
car
|
`- ford
...
...
@@ -246,7 +251,7 @@ testBuildNgramsTree_03 =
)
]
in
pruneForest
(
buildForest
input
)
`
compareForestVisually
`
[
r
|
in
pruneForest
(
buildForest
OrFail
input
)
`
compareForestVisually
`
[
r
|
animalia
|
`- chordata
...
...
@@ -282,14 +287,14 @@ instance Arbitrary TableMapLockStep where
-- /PRECONDITION/: The '_ne_ngrams' field always matches the 'NgramsTerm', key of the map.
buildDestroyForestRoundtrips
::
TableMapLockStep
->
Property
buildDestroyForestRoundtrips
(
TableMapLockStep
mp
)
=
(
destroyForest
.
buildForest
$
mp
)
===
mp
(
destroyForest
.
buildForest
OrFail
$
mp
)
===
mp
testPruningNgramsForest_01
::
Property
testPruningNgramsForest_01
=
let
t1
=
Map
.
fromList
[
(
"foo"
,
mkMapTerm
"foo"
&
ne_children
.~
mSetFromList
[
"bar"
])
,
(
"bar"
,
mkMapTerm
"bar"
&
ne_parent
.~
Just
"foo"
)
]
in
(
pruneForest
$
buildForest
t1
)
`
compareForestVisually
`
[
r
|
in
(
pruneForest
$
buildForest
OrFail
t1
)
`
compareForestVisually
`
[
r
|
foo
|
`- bar
...
...
@@ -297,7 +302,7 @@ testPruningNgramsForest_01 =
testPruningNgramsForest_02
::
Property
testPruningNgramsForest_02
=
(
pruneForest
$
buildForest
hierarchicalTableMap
)
`
compareForestVisually
`
[
r
|
(
pruneForest
$
buildForest
OrFail
hierarchicalTableMap
)
`
compareForestVisually
`
[
r
|
vehicle
|
`- car
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment