Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
191
Issues
191
List
Board
Labels
Milestones
Merge Requests
8
Merge Requests
8
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
d38dbd35
Commit
d38dbd35
authored
Sep 23, 2025
by
Fabien Maniere
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Revert "Merge branch 'revert-
97441f3d
' into 'dev'"
This reverts merge request
!449
parent
c767088d
Pipeline
#7924
passed with stages
in 41 minutes and 41 seconds
Changes
5
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
114 additions
and
90 deletions
+114
-90
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+64
-59
UpdateList.hs
test/Test/API/UpdateList.hs
+26
-25
Instances.hs
test/Test/Instances.hs
+5
-1
Query.hs
test/Test/Ngrams/Query.hs
+18
-4
Ngrams.hs
test/Test/Offline/Ngrams.hs
+1
-1
No files found.
src/Gargantext/API/Ngrams.hs
View file @
d38dbd35
...
...
@@ -17,14 +17,12 @@ add get
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE IncoherentInstances #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.API.Ngrams
(
...
...
@@ -114,7 +112,7 @@ import Gargantext.API.Ngrams.Types
import
Gargantext.Core.NodeStory
hiding
(
buildForest
)
import
Gargantext.Core.NodeStory
qualified
as
NodeStory
import
Gargantext.Core.Text.Ngrams
(
Ngrams
,
NgramsType
)
import
Gargantext.Core.Types
(
ListType
(
..
),
NodeId
,
ListId
,
TODO
,
assertValid
,
ContextId
,
HasValidationError
)
import
Gargantext.Core.Types
(
ListType
(
..
),
NodeId
,
ListId
,
TODO
,
assertValid
,
HasValidationError
)
import
Gargantext.Core.Types.Query
(
Limit
(
..
),
Offset
(
..
),
MinSize
(
..
),
MaxSize
(
..
))
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getOccByNgramsOnlyFast
)
import
Gargantext.Database.Prelude
...
...
@@ -458,14 +456,16 @@ buildForest = fmap (map (fmap snd)) . NodeStory.buildForest
-- This function doesn't aggregate information, but merely just recostructs the original
-- map without loss of information. To perform operations on the forest, use the appropriate
-- functions.
destroyForest
::
Forest
NgramsElement
->
Map
NgramsTerm
NgramsElement
destroyForest
f
=
Map
.
fromList
.
map
(
foldTree
destroyTree
)
$
f
-- /NOTA BENE:/ We return a list and not a Map because we might have sorted the forest, and
-- converting into a map would trash the carefully-constructed sorting.
destroyForest
::
Forest
NgramsElement
->
[(
NgramsTerm
,
NgramsElement
)]
destroyForest
f
=
concatMap
(
map
(
\
el
->
(
_ne_ngrams
el
,
el
))
.
flatten
)
$
f
where
destroyTree
::
NgramsElement
->
[(
NgramsTerm
,
NgramsElement
)]
->
(
NgramsTerm
,
NgramsElement
)
destroyTree
rootEl
childrenEl
=
(
_ne_ngrams
rootEl
,
squashElements
rootEl
childrenEl
)
-- _destroyTree :: NgramsElement -> [(NgramsTerm, [NgramsElement])] -> (NgramsTerm, [NgramsElement]
)
-- _destroyTree rootEl childrenEl = (_ne_ngrams rootEl,
childrenEl)
squashElements
::
NgramsElement
->
[(
NgramsTerm
,
NgramsElement
)]
->
NgramsElement
squashElements
r
_
=
r
-- _
squashElements :: NgramsElement -> [(NgramsTerm, NgramsElement)] -> NgramsElement
-- _
squashElements r _ = r
-- | TODO Errors management
-- TODO: polymorphic for Annuaire or Corpus or ...
...
...
@@ -482,59 +482,64 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
->
Either
BuildForestError
(
VersionedWithCount
NgramsTable
)
searchTableNgrams
versionedTableMap
NgramsSearchQuery
{
..
}
=
let
tableMap
=
versionedTableMap
^.
v_data
in
case
buildForest
tableMap
of
in
case
keepRoots
<$>
buildForest
tableMap
of
Left
err
->
Left
err
Right
fs
->
let
forestRoots
=
Set
.
fromList
.
Map
.
elems
.
destroyForest
.
filterNgramsNodes
_nsq_listType
_nsq_minSize
_nsq_maxSize
_nsq_searchQuery
$
fs
let
forestRoots
=
filterNgramsNodes
_nsq_listType
_nsq_minSize
_nsq_maxSize
_nsq_searchQuery
$
fs
tableMapSorted
=
versionedTableMap
&
v_data
.~
(
NgramsTable
.
sortAndPaginate
.
withInners
tableMap
$
forestRoots
)
&
v_data
.~
(
NgramsTable
.
map
snd
.
destroyForest
.
sortAndPaginateForest
_nsq_offset
_nsq_limit
_nsq_orderBy
.
withInnersForest
$
forestRoots
)
in
Right
$
toVersionedWithCount
(
Set
.
size
forestRoots
)
tableMapSorted
where
in
Right
$
toVersionedWithCount
(
length
forestRoots
)
tableMapSorted
keepRoots
::
Forest
NgramsElement
->
Forest
NgramsElement
keepRoots
=
filter
(
\
(
Node
r
_
)
->
isNothing
(
_ne_root
r
)
||
isNothing
(
_ne_parent
r
))
-- Sorts the input 'NgramsElement' list.
-- /IMPORTANT/: As we might be sorting ngrams in all sorts of language,
-- some of them might include letters with accents and other unicode symbols,
-- but we need to filter those /diacritics/ out so that the sorting would
-- happen in the way users would expect. See ticket #331.
sortOnOrder
::
Maybe
OrderBy
->
([
NgramsElement
]
->
[
NgramsElement
])
sortOnOrder
Nothing
=
sortOnOrder
(
Just
ScoreDesc
)
sortOnOrder
(
Just
TermAsc
)
=
List
.
sortBy
ngramTermsAscSorter
sortOnOrder
(
Just
TermDesc
)
=
List
.
sortBy
ngramTermsDescSorter
sortOnOrder
(
Just
ScoreAsc
)
=
List
.
sortOn
$
view
(
ne_occurrences
.
to
Set
.
size
)
sortOnOrder
(
Just
ScoreDesc
)
=
List
.
sortOn
$
Down
.
view
(
ne_occurrences
.
to
Set
.
size
)
ngramTermsAscSorter
=
on
unicodeDUCETSorter
(
unNgramsTerm
.
view
ne_ngrams
)
ngramTermsDescSorter
=
on
(
\
n1
n2
->
unicodeDUCETSorter
n2
n1
)
(
unNgramsTerm
.
view
ne_ngrams
)
-- | For each input root, extends its occurrence count with
-- the information found in the subitems.
withInners
::
Map
NgramsTerm
NgramsElement
->
Set
NgramsElement
->
Set
NgramsElement
withInners
tblMap
roots
=
Set
.
map
addSubitemsOccurrences
roots
-- | For each input root, extends its occurrence count with
-- the information found in the subforest.
withInnersForest
::
Forest
NgramsElement
->
Forest
NgramsElement
withInnersForest
=
map
sumSubitemsOccurrences
where
addSubitemsOccurrences
::
NgramsElement
->
NgramsElement
addSubitemsOccurrences
e
=
e
{
_ne_occurrences
=
foldl'
alterOccurrences
(
e
^.
ne_occurrences
)
(
e
^.
ne_children
)
}
alterOccurrences
::
Set
ContextId
->
NgramsTerm
->
Set
ContextId
alterOccurrences
occs
t
=
case
Map
.
lookup
t
tblMap
of
Nothing
->
occs
Just
e'
->
occs
<>
e'
^.
ne_occurrences
-- | Paginate the results
sortAndPaginate
::
Set
NgramsElement
->
[
NgramsElement
]
sortAndPaginate
xs
=
let
offset'
=
getOffset
$
maybe
0
identity
_nsq
_offset
in
take
(
getLimit
_nsq_
limit
)
sumSubitemsOccurrences
::
Tree
NgramsElement
->
Tree
NgramsElement
sumSubitemsOccurrences
(
Node
root
children
)
=
let
children'
=
withInnersForest
children
root'
=
root
{
_ne_occurrences
=
(
_ne_occurrences
root
)
<>
foldMap
(
_ne_occurrences
.
rootLabel
)
children'
}
in
Node
root'
children'
sortAndPaginateForest
::
Maybe
Offset
->
Limit
->
Maybe
OrderBy
->
Forest
NgramsElement
->
Forest
NgramsElement
sortAndPaginateForest
mb_offset
limit
orderBy
xs
=
let
offset'
=
getOffset
$
maybe
0
identity
mb
_offset
in
take
(
getLimit
limit
)
.
drop
offset'
.
sortOnOrder
_nsq_orderBy
.
Set
.
toList
.
sortOnOrderForest
orderBy
$
xs
-- Sorts the input 'NgramsElement' list.
-- /IMPORTANT/: As we might be sorting ngrams in all sorts of language,
-- some of them might include letters with accents and other unicode symbols,
-- but we need to filter those /diacritics/ out so that the sorting would
-- happen in the way users would expect. See ticket #331.
sortOnOrderForest
::
Maybe
OrderBy
->
(
Forest
NgramsElement
->
Forest
NgramsElement
)
sortOnOrderForest
Nothing
=
sortOnOrderForest
(
Just
ScoreDesc
)
sortOnOrderForest
(
Just
TermAsc
)
=
List
.
sortBy
(
\
(
Node
t1
_
)
(
Node
t2
_
)
->
ngramTermsAscSorter
t1
t2
)
sortOnOrderForest
(
Just
TermDesc
)
=
List
.
sortBy
(
\
(
Node
t1
_
)
(
Node
t2
_
)
->
ngramTermsDescSorter
t1
t2
)
sortOnOrderForest
(
Just
ScoreAsc
)
=
List
.
sortOn
$
\
(
Node
root
_
)
->
root
^.
(
ne_occurrences
.
to
Set
.
size
)
sortOnOrderForest
(
Just
ScoreDesc
)
=
List
.
sortOn
$
Down
.
(
\
(
Node
root
_
)
->
root
^.
(
ne_occurrences
.
to
Set
.
size
))
ngramTermsAscSorter
::
NgramsElement
->
NgramsElement
->
Ordering
ngramTermsAscSorter
=
on
unicodeDUCETSorter
(
unNgramsTerm
.
view
ne_ngrams
)
ngramTermsDescSorter
::
NgramsElement
->
NgramsElement
->
Ordering
ngramTermsDescSorter
=
on
(
\
n1
n2
->
unicodeDUCETSorter
n2
n1
)
(
unNgramsTerm
.
view
ne_ngrams
)
-- | This function allows sorting two texts via their unicode sorting
-- (as opposed as the standard lexicographical sorting) by relying on
-- the DUCET table, a table that specifies the ordering of all unicode
...
...
test/Test/API/UpdateList.hs
View file @
d38dbd35
...
...
@@ -214,22 +214,23 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
eRes
<-
runClientM
(
get_table_ngrams
token
cId
APINgrams
.
Terms
listId
50
Nothing
(
Just
MapTerm
)
Nothing
Nothing
Nothing
Nothing
)
clientEnv
eRes
`
shouldSatisfy
`
isRight
let
(
Right
res
)
=
eRes
-- /NOTA BENE/ The count is 1 because the count applies to roots only.
Just
res
`
shouldBe
`
JSON
.
decode
[
json
|
{"version":5
,"count":
3
,"count":
1
,"data":[
{"ngrams":"
guitar pedal
s"
{"ngrams":"
overdrive
s"
,"size":1
,"list":"MapTerm"
,"root":"overdrives"
,"parent":"overdrives"
,"occurrences":[]
,"children":["
tube screamer
s"]
,"children":["
guitar pedal
s"]
},
{"ngrams":"
overdrive
s"
{"ngrams":"
guitar pedal
s"
,"size":1
,"list":"MapTerm"
,"root":"overdrives"
,"parent":"overdrives"
,"occurrences":[]
,"children":["
guitar pedal
s"]
,"children":["
tube screamer
s"]
},
{"ngrams":"tube screamers"
,"size":1
...
...
@@ -309,16 +310,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
-- check that new term is parent of old one
checkNgrams
getNgrams
[
json
|
{"version": 2
,"count":
2
,"count":
1
,"data":[
{"ngrams":"abelian group"
,"size":2
,"list":"MapTerm"
,"root": "new abelian group"
,"parent": "new abelian group"
,"occurrences":[]
,"children":[]
},
{"ngrams":"new abelian group"
,"size":1
,"list":"MapTerm"
...
...
@@ -326,6 +319,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
,"parent":null
,"occurrences":[]
,"children":["abelian group"]
},
{"ngrams":"abelian group"
,"size":2
,"list":"MapTerm"
,"root": "new abelian group"
,"parent": "new abelian group"
,"occurrences":[]
,"children":[]
}
]
}
...
...
@@ -341,16 +342,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
-- In essence, this JSON needs to be exactly the same as the previous one,
-- i.e. important doesn't change the topology.
checkNgrams
getNgrams
[
json
|
{"version": 2
,"count":
2
,"count":
1
,"data":[
{"ngrams":"abelian group"
,"size":2
,"list":"MapTerm"
,"root": "new abelian group"
,"parent": "new abelian group"
,"occurrences":[]
,"children":[]
},
{"ngrams":"new abelian group"
,"size":1
,"list":"MapTerm"
...
...
@@ -358,6 +351,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
,"parent":null
,"occurrences":[]
,"children":["abelian group"]
},
{"ngrams":"abelian group"
,"size":2
,"list":"MapTerm"
,"root": "new abelian group"
,"parent": "new abelian group"
,"occurrences":[]
,"children":[]
}
]
}
...
...
test/Test/Instances.hs
View file @
d38dbd35
...
...
@@ -768,7 +768,7 @@ genCorpusWithMatchingElement = do
depth
<-
choose
(
1
,
5
)
let
mkEntry
=
do
trm
<-
arbitrary
el
<-
over
ne_children
(
breakLoop
trm
)
<$>
(
resize
depth
arbitrary
)
el
<-
over
ne_children
(
breakLoop
trm
)
.
makeItRoot
<$>
(
resize
depth
arbitrary
)
pure
(
trm
,
el
{
_ne_ngrams
=
trm
})
-- Let's build the map first, so that duplicates will be overwritten.
fullMap
<-
(
Map
.
fromList
<$>
vectorOf
depth
mkEntry
)
`
suchThat
`
(
\
x
->
isRight
(
buildForest
x
))
-- exclude loops
...
...
@@ -778,6 +778,10 @@ genCorpusWithMatchingElement = do
breakLoop
::
NgramsTerm
->
MSet
NgramsTerm
->
MSet
NgramsTerm
breakLoop
t
=
mSetFromSet
.
Set
.
delete
t
.
mSetToSet
makeItRoot
::
NgramsElement
->
NgramsElement
makeItRoot
ne
=
ne
&
ne_root
.~
Nothing
&
ne_parent
.~
Nothing
instance
Arbitrary
AcyclicTableMap
where
arbitrary
=
genCorpusWithMatchingElement
shrink
=
shrinkTree
...
...
test/Test/Ngrams/Query.hs
View file @
d38dbd35
...
...
@@ -236,7 +236,7 @@ testForestSearchProp :: Property
testForestSearchProp
=
forAll
arbitrary
$
\
(
AcyclicTableMap
ngramsTable
el
)
->
do
case
searchTableNgrams
(
Versioned
0
ngramsTable
)
(
searchQuery
el
)
of
Left
(
BFE_loop_detected
err
)
->
fail
(
T
.
unpack
$
renderLoop
err
)
Right
res
->
res
^.
vc_data
`
shouldSatisfy
`
(
elem
(
_ne_ngrams
el
)
.
map
_ne_ngrams
.
getNgramsTable
)
Right
res
->
res
^.
vc_data
`
shouldSatisfy
`
(
any
(
containsTerm
(
_ne_ngrams
el
))
.
getNgramsTable
)
where
searchQuery
term
=
NgramsSearchQuery
{
_nsq_limit
=
Limit
5
...
...
@@ -255,7 +255,9 @@ testSearchNestedTerms :: Assertion
testSearchNestedTerms
=
do
case
searchTableNgrams
(
Versioned
0
hierarchicalTableMap
)
searchQuery
of
Left
(
BFE_loop_detected
err
)
->
fail
(
T
.
unpack
$
renderLoop
err
)
Right
res
->
res
^.
vc_data
`
shouldSatisfy
`
(
elem
"ford"
.
map
_ne_ngrams
.
getNgramsTable
)
Right
res
->
-- it should appear at the top level or as one of the children.
res
^.
vc_data
`
shouldSatisfy
`
(
any
(
containsTerm
"ford"
)
.
getNgramsTable
)
where
searchQuery
=
NgramsSearchQuery
{
_nsq_limit
=
Limit
5
...
...
@@ -267,6 +269,11 @@ testSearchNestedTerms = do
,
_nsq_searchQuery
=
mockQueryFn
(
Just
"ford"
)
}
-- | Returns True if the input 'NgramsElement' contains (either in the root or in the children)
-- the input term.
containsTerm
::
NgramsTerm
->
NgramsElement
->
Bool
containsTerm
t
(
NgramsElement
{
..
})
=
_ne_ngrams
==
t
||
any
((
==
)
t
)
(
mSetToList
_ne_children
)
-- Pagination tests
test_pagination_allTerms
::
Assertion
...
...
@@ -390,7 +397,7 @@ test_paginationQuantum = do
Left
err
->
fail
(
show
err
)
Right
res
->
do
let
elems
=
coerce
@
NgramsTable
@
[
NgramsElement
]
$
_vc_data
res
length
elems
@?=
10
countRoots
elems
@?=
10
forM_
elems
$
\
term
->
assertBool
(
"found "
<>
show
(
_ne_list
term
)
<>
" in: "
<>
show
elems
)
(
_ne_list
term
==
MapTerm
)
where
...
...
@@ -404,13 +411,20 @@ test_paginationQuantum = do
,
_nsq_searchQuery
=
mockQueryFn
Nothing
}
countRoots
::
[
NgramsElement
]
->
Int
countRoots
[]
=
0
countRoots
(
x
:
xs
)
=
if
isNothing
(
_ne_root
x
)
||
isNothing
(
_ne_parent
x
)
then
1
+
countRoots
xs
else
countRoots
xs
test_paginationQuantum_02
::
Assertion
test_paginationQuantum_02
=
do
case
searchTableNgrams
quantumComputingCorpus
searchQuery
of
Left
err
->
fail
(
show
err
)
Right
res
->
do
let
elems
=
coerce
@
NgramsTable
@
[
NgramsElement
]
$
_vc_data
res
assertBool
(
"found only "
<>
show
(
length
elems
)
<>
" in: "
<>
show
elems
)
(
length
elems
==
10
)
assertBool
(
"found only "
<>
show
(
length
elems
)
<>
" in: "
<>
show
elems
)
(
countRoots
elems
==
10
)
where
searchQuery
=
NgramsSearchQuery
{
_nsq_limit
=
Limit
10
...
...
test/Test/Offline/Ngrams.hs
View file @
d38dbd35
...
...
@@ -269,7 +269,7 @@ testBuildNgramsTree_03 =
-- /PRECONDITION/: The '_ne_ngrams' field always matches the 'NgramsTerm', key of the map.
buildDestroyForestRoundtrips
::
AcyclicTableMap
->
Property
buildDestroyForestRoundtrips
(
AcyclicTableMap
mp
_
)
=
(
destroyForest
.
buildForestOrFail
$
mp
)
===
mp
(
Map
.
fromList
.
destroyForest
.
buildForestOrFail
$
mp
)
===
mp
testPruningNgramsForest_01
::
Property
testPruningNgramsForest_01
=
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment