Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
37979721
Commit
37979721
authored
Apr 08, 2024
by
Alfredo Di Napoli
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Use DUCET sorting for ngrams elements
parent
1259eabd
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
56 additions
and
5 deletions
+56
-5
gargantext.cabal
gargantext.cabal
+2
-0
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+18
-2
Query.hs
test/Test/Ngrams/Query.hs
+36
-3
No files found.
gargantext.cabal
View file @
37979721
...
...
@@ -628,6 +628,7 @@ library
, tomland >= 1.3.3.2
, tuple ^>= 0.3.0.2
, unordered-containers ^>= 0.2.16.0
, unicode-collation >= 0.1.3.6
, uri-encode ^>= 1.5.0.7
, utf8-string ^>= 1.0.2
, uuid ^>= 1.3.15
...
...
@@ -959,6 +960,7 @@ test-suite garg-test-tasty
, tmp-postgres >= 1.34.1 && < 1.35
, tree-diff
, unordered-containers ^>= 0.2.16.0
, unicode-collation >= 0.1.3.6
, validity ^>= 0.11.0.1
, vector ^>= 0.12.3.0
, wai
...
...
src/Gargantext/API/Ngrams.hs
View file @
37979721
...
...
@@ -120,6 +120,7 @@ import Gargantext.Prelude hiding (log, to, toLower, (%), isInfixOf)
import
Gargantext.Prelude.Clock
(
hasTime
,
getTime
)
import
Gargantext.Utils.Jobs
(
serveJobsAPI
,
MonadJobStatus
(
..
))
import
Servant
hiding
(
Patch
)
import
Text.Collate
qualified
as
Unicode
{-
-- TODO sequences of modifications (Patchs)
...
...
@@ -556,13 +557,21 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
&&
_nsq_searchQuery
(
inputNode
^.
ne_ngrams
)
&&
matchesListType
(
inputNode
^.
ne_list
)
-- Sorts the input 'NgramsElement' list.
-- /IMPORTANT/: As we might be sorting ngrams in all sorts of language,
-- some of them might include letters with accents and other unicode symbols,
-- but we need to filter those /diacritics/ out so that the sorting would
-- happen in the way users would expect. See ticket #331.
sortOnOrder
::
Maybe
OrderBy
->
([
NgramsElement
]
->
[
NgramsElement
])
sortOnOrder
Nothing
=
sortOnOrder
(
Just
ScoreDesc
)
sortOnOrder
(
Just
TermAsc
)
=
List
.
sort
On
$
view
ne_ngrams
sortOnOrder
(
Just
TermDesc
)
=
List
.
sort
On
$
Down
.
view
ne_ngrams
sortOnOrder
(
Just
TermAsc
)
=
List
.
sort
By
ngramTermsAscSorter
sortOnOrder
(
Just
TermDesc
)
=
List
.
sort
By
ngramTermsDescSorter
sortOnOrder
(
Just
ScoreAsc
)
=
List
.
sortOn
$
view
(
ne_occurrences
.
to
Set
.
size
)
sortOnOrder
(
Just
ScoreDesc
)
=
List
.
sortOn
$
Down
.
view
(
ne_occurrences
.
to
Set
.
size
)
ngramTermsAscSorter
=
on
unicodeDUCETSorter
(
unNgramsTerm
.
view
ne_ngrams
)
ngramTermsDescSorter
=
on
(
\
n1
n2
->
unicodeDUCETSorter
n2
n1
)
(
unNgramsTerm
.
view
ne_ngrams
)
-- | Filters the given `tableMap` with the search criteria. It returns
-- a set of 'NgramsElement' all matching the input 'NGramsSearchQuery'.
filterNodes
::
Map
NgramsTerm
NgramsElement
->
Set
NgramsElement
...
...
@@ -595,6 +604,13 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
.
Set
.
toList
$
xs
-- | This function allows sorting two texts via their unicode sorting
-- (as opposed as the standard lexicographical sorting) by relying on
-- the DUCET table, a table that specifies the ordering of all unicode
-- characters. This is enough for mimicking the \"natural sort\" effect
-- that users would expect.
unicodeDUCETSorter
::
Text
->
Text
->
Ordering
unicodeDUCETSorter
=
Unicode
.
collate
Unicode
.
rootCollator
getTableNgrams
::
forall
env
err
m
.
(
HasNodeStory
env
err
m
...
...
test/Test/Ngrams/Query.hs
View file @
37979721
...
...
@@ -10,6 +10,7 @@ import Data.Patch.Class qualified as Patch
import
Data.String
import
Data.Text
qualified
as
T
import
Data.Validity
qualified
as
Validity
import
Text.Collate
qualified
as
Unicode
import
Gargantext.API.Ngrams
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.Types.Main
...
...
@@ -48,7 +49,9 @@ unitTests = testGroup "Query tests"
[
-- Sorting
testCase
"Simple query mockFlatCorpus"
testFlat01
,
testCase
"Simple query (desc sorting)"
testFlat02
,
testCase
"[#331] Sort must ignore diacritics"
testSortDiacritics
,
testCase
"[#331] sorting via DUCET works"
testSortDiacriticsDucet
,
testCase
"[#331] Natural sort ascending works"
testNaturalSortAsceding
,
testCase
"[#331] Natural sort descending works"
testNaturalSortDescending
-- -- Filtering
,
testCase
"Simple query (listType = MapTerm)"
testFlat03
,
testCase
"Simple query (listType = StopTerm)"
testFlat04
...
...
@@ -102,10 +105,40 @@ testFlat02 = do
,
_nsq_searchQuery
=
mockQueryFn
Nothing
}
testSortDiacritics
::
Assertion
testSortDiacritics
=
do
testSortDiacriticsDucet
::
Assertion
testSortDiacriticsDucet
=
do
let
inputData
=
[
"étude"
,
"âge"
,
"vue"
,
"période"
]
let
expected
=
[
"âge"
,
"étude"
,
"période"
,
"vue"
]
expected
@??=
sortBy
(
Unicode
.
collate
Unicode
.
rootCollator
)
inputData
testNaturalSortAsceding
::
Assertion
testNaturalSortAsceding
=
do
let
res
=
searchTableNgrams
frenchCorpus
searchQuery
res
@??=
VersionedWithCount
0
4
(
NgramsTable
$
map
mkMapTerm
[
"âge"
,
"étude"
,
"période"
,
"vue"
])
where
frenchCorpus
::
Versioned
(
Map
NgramsTerm
NgramsElement
)
frenchCorpus
=
Versioned
0
$
Map
.
fromList
[
(
"doc_01"
,
mkMapTerm
"période"
)
,
(
"doc_02"
,
mkMapTerm
"vue"
)
,
(
"doc_03"
,
mkMapTerm
"âge"
)
,
(
"doc_04"
,
mkMapTerm
"étude"
)
]
searchQuery
=
NgramsSearchQuery
{
_nsq_limit
=
Limit
10
,
_nsq_offset
=
Nothing
,
_nsq_listType
=
Nothing
,
_nsq_minSize
=
Nothing
,
_nsq_maxSize
=
Nothing
,
_nsq_orderBy
=
Just
TermAsc
,
_nsq_searchQuery
=
mockQueryFn
Nothing
}
testNaturalSortDescending
::
Assertion
testNaturalSortDescending
=
do
let
res
=
searchTableNgrams
frenchCorpus
searchQuery
res
@??=
VersionedWithCount
0
4
(
NgramsTable
$
map
mkMapTerm
[
"vue"
,
"période"
,
"étude"
,
"âge"
])
where
frenchCorpus
::
Versioned
(
Map
NgramsTerm
NgramsElement
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment