Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
195
Issues
195
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
781b74dc
Commit
781b74dc
authored
Sep 23, 2025
by
Fabien Maniere
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Revert "Merge branch 'adinapoli/issue-504' into 'dev'"
This reverts merge request
!439
parent
ca7f0f26
Pipeline
#7922
passed with stages
in 49 minutes and 8 seconds
Changes
5
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
90 additions
and
114 deletions
+90
-114
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+59
-64
UpdateList.hs
test/Test/API/UpdateList.hs
+25
-26
Instances.hs
test/Test/Instances.hs
+1
-5
Query.hs
test/Test/Ngrams/Query.hs
+4
-18
Ngrams.hs
test/Test/Offline/Ngrams.hs
+1
-1
No files found.
src/Gargantext/API/Ngrams.hs
View file @
781b74dc
...
...
@@ -17,12 +17,14 @@ add get
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE IncoherentInstances #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.API.Ngrams
(
...
...
@@ -112,7 +114,7 @@ import Gargantext.API.Ngrams.Types
import
Gargantext.Core.NodeStory
hiding
(
buildForest
)
import
Gargantext.Core.NodeStory
qualified
as
NodeStory
import
Gargantext.Core.Text.Ngrams
(
Ngrams
,
NgramsType
)
import
Gargantext.Core.Types
(
ListType
(
..
),
NodeId
,
ListId
,
TODO
,
assertValid
,
HasValidationError
)
import
Gargantext.Core.Types
(
ListType
(
..
),
NodeId
,
ListId
,
TODO
,
assertValid
,
ContextId
,
HasValidationError
)
import
Gargantext.Core.Types.Query
(
Limit
(
..
),
Offset
(
..
),
MinSize
(
..
),
MaxSize
(
..
))
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getOccByNgramsOnlyFast
)
import
Gargantext.Database.Prelude
...
...
@@ -456,16 +458,14 @@ buildForest = fmap (map (fmap snd)) . NodeStory.buildForest
-- This function doesn't aggregate information, but merely just recostructs the original
-- map without loss of information. To perform operations on the forest, use the appropriate
-- functions.
-- /NOTA BENE:/ We return a list and not a Map because we might have sorted the forest, and
-- converting into a map would trash the carefully-constructed sorting.
destroyForest
::
Forest
NgramsElement
->
[(
NgramsTerm
,
NgramsElement
)]
destroyForest
f
=
concatMap
(
map
(
\
el
->
(
_ne_ngrams
el
,
el
))
.
flatten
)
$
f
destroyForest
::
Forest
NgramsElement
->
Map
NgramsTerm
NgramsElement
destroyForest
f
=
Map
.
fromList
.
map
(
foldTree
destroyTree
)
$
f
where
-- _destroyTree :: NgramsElement -> [(NgramsTerm, [NgramsElement])] -> (NgramsTerm, [NgramsElement]
)
-- _destroyTree rootEl childrenEl = (_ne_ngrams rootEl,
childrenEl)
destroyTree
::
NgramsElement
->
[(
NgramsTerm
,
NgramsElement
)]
->
(
NgramsTerm
,
NgramsElement
)
destroyTree
rootEl
childrenEl
=
(
_ne_ngrams
rootEl
,
squashElements
rootEl
childrenEl
)
-- _
squashElements :: NgramsElement -> [(NgramsTerm, NgramsElement)] -> NgramsElement
-- _
squashElements r _ = r
squashElements
::
NgramsElement
->
[(
NgramsTerm
,
NgramsElement
)]
->
NgramsElement
squashElements
r
_
=
r
-- | TODO Errors management
-- TODO: polymorphic for Annuaire or Corpus or ...
...
...
@@ -482,64 +482,59 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
->
Either
BuildForestError
(
VersionedWithCount
NgramsTable
)
searchTableNgrams
versionedTableMap
NgramsSearchQuery
{
..
}
=
let
tableMap
=
versionedTableMap
^.
v_data
in
case
keepRoots
<$>
buildForest
tableMap
of
in
case
buildForest
tableMap
of
Left
err
->
Left
err
Right
fs
->
let
forestRoots
=
filterNgramsNodes
_nsq_listType
_nsq_minSize
_nsq_maxSize
_nsq_searchQuery
$
fs
tableMapSorted
=
versionedTableMap
&
v_data
.~
(
NgramsTable
.
map
snd
let
forestRoots
=
Set
.
fromList
.
Map
.
elems
.
destroyForest
.
sortAndPaginateForest
_nsq_offset
_nsq_limit
_nsq_orderBy
.
withInnersForest
$
forestRoots
)
in
Right
$
toVersionedWithCount
(
length
forestRoots
)
tableMapSorted
keepRoots
::
Forest
NgramsElement
->
Forest
NgramsElement
keepRoots
=
filter
(
\
(
Node
r
_
)
->
isNothing
(
_ne_root
r
)
||
isNothing
(
_ne_parent
r
))
.
filterNgramsNodes
_nsq_listType
_nsq_minSize
_nsq_maxSize
_nsq_searchQuery
$
fs
tableMapSorted
=
versionedTableMap
&
v_data
.~
(
NgramsTable
.
sortAndPaginate
.
withInners
tableMap
$
forestRoots
)
-- | For each input root, extends its occurrence count with
-- the information found in the subforest.
withInnersForest
::
Forest
NgramsElement
->
Forest
NgramsElement
withInnersForest
=
map
sumSubitemsOccurrences
in
Right
$
toVersionedWithCount
(
Set
.
size
forestRoots
)
tableMapSorted
where
sumSubitemsOccurrences
::
Tree
NgramsElement
->
Tree
NgramsElement
sumSubitemsOccurrences
(
Node
root
children
)
=
let
children'
=
withInnersForest
children
root'
=
root
{
_ne_occurrences
=
(
_ne_occurrences
root
)
<>
foldMap
(
_ne_occurrences
.
rootLabel
)
children'
}
in
Node
root'
children'
sortAndPaginateForest
::
Maybe
Offset
->
Limit
->
Maybe
OrderBy
->
Forest
NgramsElement
->
Forest
NgramsElement
sortAndPaginateForest
mb_offset
limit
orderBy
xs
=
let
offset'
=
getOffset
$
maybe
0
identity
mb_offset
in
take
(
getLimit
limit
)
-- Sorts the input 'NgramsElement' list.
-- /IMPORTANT/: As we might be sorting ngrams in all sorts of language,
-- some of them might include letters with accents and other unicode symbols,
-- but we need to filter those /diacritics/ out so that the sorting would
-- happen in the way users would expect. See ticket #331.
sortOnOrder
::
Maybe
OrderBy
->
([
NgramsElement
]
->
[
NgramsElement
])
sortOnOrder
Nothing
=
sortOnOrder
(
Just
ScoreDesc
)
sortOnOrder
(
Just
TermAsc
)
=
List
.
sortBy
ngramTermsAscSorter
sortOnOrder
(
Just
TermDesc
)
=
List
.
sortBy
ngramTermsDescSorter
sortOnOrder
(
Just
ScoreAsc
)
=
List
.
sortOn
$
view
(
ne_occurrences
.
to
Set
.
size
)
sortOnOrder
(
Just
ScoreDesc
)
=
List
.
sortOn
$
Down
.
view
(
ne_occurrences
.
to
Set
.
size
)
ngramTermsAscSorter
=
on
unicodeDUCETSorter
(
unNgramsTerm
.
view
ne_ngrams
)
ngramTermsDescSorter
=
on
(
\
n1
n2
->
unicodeDUCETSorter
n2
n1
)
(
unNgramsTerm
.
view
ne_ngrams
)
-- | For each input root, extends its occurrence count with
-- the information found in the subitems.
withInners
::
Map
NgramsTerm
NgramsElement
->
Set
NgramsElement
->
Set
NgramsElement
withInners
tblMap
roots
=
Set
.
map
addSubitemsOccurrences
roots
where
addSubitemsOccurrences
::
NgramsElement
->
NgramsElement
addSubitemsOccurrences
e
=
e
{
_ne_occurrences
=
foldl'
alterOccurrences
(
e
^.
ne_occurrences
)
(
e
^.
ne_children
)
}
alterOccurrences
::
Set
ContextId
->
NgramsTerm
->
Set
ContextId
alterOccurrences
occs
t
=
case
Map
.
lookup
t
tblMap
of
Nothing
->
occs
Just
e'
->
occs
<>
e'
^.
ne_occurrences
-- | Paginate the results
sortAndPaginate
::
Set
NgramsElement
->
[
NgramsElement
]
sortAndPaginate
xs
=
let
offset'
=
getOffset
$
maybe
0
identity
_nsq_offset
in
take
(
getLimit
_nsq_limit
)
.
drop
offset'
.
sortOnOrderForest
orderBy
.
sortOnOrder
_nsq_orderBy
.
Set
.
toList
$
xs
-- Sorts the input 'NgramsElement' list.
-- /IMPORTANT/: As we might be sorting ngrams in all sorts of language,
-- some of them might include letters with accents and other unicode symbols,
-- but we need to filter those /diacritics/ out so that the sorting would
-- happen in the way users would expect. See ticket #331.
sortOnOrderForest
::
Maybe
OrderBy
->
(
Forest
NgramsElement
->
Forest
NgramsElement
)
sortOnOrderForest
Nothing
=
sortOnOrderForest
(
Just
ScoreDesc
)
sortOnOrderForest
(
Just
TermAsc
)
=
List
.
sortBy
(
\
(
Node
t1
_
)
(
Node
t2
_
)
->
ngramTermsAscSorter
t1
t2
)
sortOnOrderForest
(
Just
TermDesc
)
=
List
.
sortBy
(
\
(
Node
t1
_
)
(
Node
t2
_
)
->
ngramTermsDescSorter
t1
t2
)
sortOnOrderForest
(
Just
ScoreAsc
)
=
List
.
sortOn
$
\
(
Node
root
_
)
->
root
^.
(
ne_occurrences
.
to
Set
.
size
)
sortOnOrderForest
(
Just
ScoreDesc
)
=
List
.
sortOn
$
Down
.
(
\
(
Node
root
_
)
->
root
^.
(
ne_occurrences
.
to
Set
.
size
))
ngramTermsAscSorter
::
NgramsElement
->
NgramsElement
->
Ordering
ngramTermsAscSorter
=
on
unicodeDUCETSorter
(
unNgramsTerm
.
view
ne_ngrams
)
ngramTermsDescSorter
::
NgramsElement
->
NgramsElement
->
Ordering
ngramTermsDescSorter
=
on
(
\
n1
n2
->
unicodeDUCETSorter
n2
n1
)
(
unNgramsTerm
.
view
ne_ngrams
)
-- | This function allows sorting two texts via their unicode sorting
-- (as opposed as the standard lexicographical sorting) by relying on
-- the DUCET table, a table that specifies the ordering of all unicode
...
...
test/Test/API/UpdateList.hs
View file @
781b74dc
...
...
@@ -214,16 +214,9 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
eRes
<-
runClientM
(
get_table_ngrams
token
cId
APINgrams
.
Terms
listId
50
Nothing
(
Just
MapTerm
)
Nothing
Nothing
Nothing
Nothing
)
clientEnv
eRes
`
shouldSatisfy
`
isRight
let
(
Right
res
)
=
eRes
-- /NOTA BENE/ The count is 1 because the count applies to roots only.
Just
res
`
shouldBe
`
JSON
.
decode
[
json
|
{"version":5
,"count":
1
,"count":
3
,"data":[
{"ngrams":"overdrives"
,"size":1
,"list":"MapTerm"
,"occurrences":[]
,"children":["guitar pedals"]
},
{"ngrams":"guitar pedals"
,"size":1
,"list":"MapTerm"
...
...
@@ -232,6 +225,12 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
,"occurrences":[]
,"children":["tube screamers"]
},
{"ngrams":"overdrives"
,"size":1
,"list":"MapTerm"
,"occurrences":[]
,"children":["guitar pedals"]
},
{"ngrams":"tube screamers"
,"size":1
,"list":"MapTerm"
...
...
@@ -310,16 +309,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
-- check that new term is parent of old one
checkNgrams
getNgrams
[
json
|
{"version": 2
,"count":
1
,"count":
2
,"data":[
{"ngrams":"new abelian group"
,"size":1
,"list":"MapTerm"
,"root":null
,"parent":null
,"occurrences":[]
,"children":["abelian group"]
},
{"ngrams":"abelian group"
,"size":2
,"list":"MapTerm"
...
...
@@ -327,6 +318,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
,"parent": "new abelian group"
,"occurrences":[]
,"children":[]
},
{"ngrams":"new abelian group"
,"size":1
,"list":"MapTerm"
,"root":null
,"parent":null
,"occurrences":[]
,"children":["abelian group"]
}
]
}
...
...
@@ -342,16 +341,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
-- In essence, this JSON needs to be exactly the same as the previous one,
-- i.e. important doesn't change the topology.
checkNgrams
getNgrams
[
json
|
{"version": 2
,"count":
1
,"count":
2
,"data":[
{"ngrams":"new abelian group"
,"size":1
,"list":"MapTerm"
,"root":null
,"parent":null
,"occurrences":[]
,"children":["abelian group"]
},
{"ngrams":"abelian group"
,"size":2
,"list":"MapTerm"
...
...
@@ -359,6 +350,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
,"parent": "new abelian group"
,"occurrences":[]
,"children":[]
},
{"ngrams":"new abelian group"
,"size":1
,"list":"MapTerm"
,"root":null
,"parent":null
,"occurrences":[]
,"children":["abelian group"]
}
]
}
...
...
test/Test/Instances.hs
View file @
781b74dc
...
...
@@ -768,7 +768,7 @@ genCorpusWithMatchingElement = do
depth
<-
choose
(
1
,
5
)
let
mkEntry
=
do
trm
<-
arbitrary
el
<-
over
ne_children
(
breakLoop
trm
)
.
makeItRoot
<$>
(
resize
depth
arbitrary
)
el
<-
over
ne_children
(
breakLoop
trm
)
<$>
(
resize
depth
arbitrary
)
pure
(
trm
,
el
{
_ne_ngrams
=
trm
})
-- Let's build the map first, so that duplicates will be overwritten.
fullMap
<-
(
Map
.
fromList
<$>
vectorOf
depth
mkEntry
)
`
suchThat
`
(
\
x
->
isRight
(
buildForest
x
))
-- exclude loops
...
...
@@ -778,10 +778,6 @@ genCorpusWithMatchingElement = do
breakLoop
::
NgramsTerm
->
MSet
NgramsTerm
->
MSet
NgramsTerm
breakLoop
t
=
mSetFromSet
.
Set
.
delete
t
.
mSetToSet
makeItRoot
::
NgramsElement
->
NgramsElement
makeItRoot
ne
=
ne
&
ne_root
.~
Nothing
&
ne_parent
.~
Nothing
instance
Arbitrary
AcyclicTableMap
where
arbitrary
=
genCorpusWithMatchingElement
shrink
=
shrinkTree
...
...
test/Test/Ngrams/Query.hs
View file @
781b74dc
...
...
@@ -236,7 +236,7 @@ testForestSearchProp :: Property
testForestSearchProp
=
forAll
arbitrary
$
\
(
AcyclicTableMap
ngramsTable
el
)
->
do
case
searchTableNgrams
(
Versioned
0
ngramsTable
)
(
searchQuery
el
)
of
Left
(
BFE_loop_detected
err
)
->
fail
(
T
.
unpack
$
renderLoop
err
)
Right
res
->
res
^.
vc_data
`
shouldSatisfy
`
(
any
(
containsTerm
(
_ne_ngrams
el
))
.
getNgramsTable
)
Right
res
->
res
^.
vc_data
`
shouldSatisfy
`
(
elem
(
_ne_ngrams
el
)
.
map
_ne_ngrams
.
getNgramsTable
)
where
searchQuery
term
=
NgramsSearchQuery
{
_nsq_limit
=
Limit
5
...
...
@@ -255,9 +255,7 @@ testSearchNestedTerms :: Assertion
testSearchNestedTerms
=
do
case
searchTableNgrams
(
Versioned
0
hierarchicalTableMap
)
searchQuery
of
Left
(
BFE_loop_detected
err
)
->
fail
(
T
.
unpack
$
renderLoop
err
)
Right
res
->
-- it should appear at the top level or as one of the children.
res
^.
vc_data
`
shouldSatisfy
`
(
any
(
containsTerm
"ford"
)
.
getNgramsTable
)
Right
res
->
res
^.
vc_data
`
shouldSatisfy
`
(
elem
"ford"
.
map
_ne_ngrams
.
getNgramsTable
)
where
searchQuery
=
NgramsSearchQuery
{
_nsq_limit
=
Limit
5
...
...
@@ -269,11 +267,6 @@ testSearchNestedTerms = do
,
_nsq_searchQuery
=
mockQueryFn
(
Just
"ford"
)
}
-- | Returns True if the input 'NgramsElement' contains (either in the root or in the children)
-- the input term.
containsTerm
::
NgramsTerm
->
NgramsElement
->
Bool
containsTerm
t
(
NgramsElement
{
..
})
=
_ne_ngrams
==
t
||
any
((
==
)
t
)
(
mSetToList
_ne_children
)
-- Pagination tests
test_pagination_allTerms
::
Assertion
...
...
@@ -397,7 +390,7 @@ test_paginationQuantum = do
Left
err
->
fail
(
show
err
)
Right
res
->
do
let
elems
=
coerce
@
NgramsTable
@
[
NgramsElement
]
$
_vc_data
res
countRoots
elems
@?=
10
length
elems
@?=
10
forM_
elems
$
\
term
->
assertBool
(
"found "
<>
show
(
_ne_list
term
)
<>
" in: "
<>
show
elems
)
(
_ne_list
term
==
MapTerm
)
where
...
...
@@ -411,20 +404,13 @@ test_paginationQuantum = do
,
_nsq_searchQuery
=
mockQueryFn
Nothing
}
countRoots
::
[
NgramsElement
]
->
Int
countRoots
[]
=
0
countRoots
(
x
:
xs
)
=
if
isNothing
(
_ne_root
x
)
||
isNothing
(
_ne_parent
x
)
then
1
+
countRoots
xs
else
countRoots
xs
test_paginationQuantum_02
::
Assertion
test_paginationQuantum_02
=
do
case
searchTableNgrams
quantumComputingCorpus
searchQuery
of
Left
err
->
fail
(
show
err
)
Right
res
->
do
let
elems
=
coerce
@
NgramsTable
@
[
NgramsElement
]
$
_vc_data
res
assertBool
(
"found only "
<>
show
(
length
elems
)
<>
" in: "
<>
show
elems
)
(
countRoots
elems
==
10
)
assertBool
(
"found only "
<>
show
(
length
elems
)
<>
" in: "
<>
show
elems
)
(
length
elems
==
10
)
where
searchQuery
=
NgramsSearchQuery
{
_nsq_limit
=
Limit
10
...
...
test/Test/Offline/Ngrams.hs
View file @
781b74dc
...
...
@@ -269,7 +269,7 @@ testBuildNgramsTree_03 =
-- /PRECONDITION/: The '_ne_ngrams' field always matches the 'NgramsTerm', key of the map.
buildDestroyForestRoundtrips
::
AcyclicTableMap
->
Property
buildDestroyForestRoundtrips
(
AcyclicTableMap
mp
_
)
=
(
Map
.
fromList
.
destroyForest
.
buildForestOrFail
$
mp
)
===
mp
(
destroyForest
.
buildForestOrFail
$
mp
)
===
mp
testPruningNgramsForest_01
::
Property
testPruningNgramsForest_01
=
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment