Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Grégoire Locqueville
haskell-gargantext
Commits
37979721
Commit
37979721
authored
1 year ago
by
Alfredo Di Napoli
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Use DUCET sorting for ngrams elements
parent
1259eabd
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
56 additions
and
5 deletions
+56
-5
gargantext.cabal
gargantext.cabal
+2
-0
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+18
-2
Query.hs
test/Test/Ngrams/Query.hs
+36
-3
No files found.
gargantext.cabal
View file @
37979721
...
...
@@ -628,6 +628,7 @@ library
, tomland >= 1.3.3.2
, tuple ^>= 0.3.0.2
, unordered-containers ^>= 0.2.16.0
, unicode-collation >= 0.1.3.6
, uri-encode ^>= 1.5.0.7
, utf8-string ^>= 1.0.2
, uuid ^>= 1.3.15
...
...
@@ -959,6 +960,7 @@ test-suite garg-test-tasty
, tmp-postgres >= 1.34.1 && < 1.35
, tree-diff
, unordered-containers ^>= 0.2.16.0
, unicode-collation >= 0.1.3.6
, validity ^>= 0.11.0.1
, vector ^>= 0.12.3.0
, wai
...
...
This diff is collapsed.
Click to expand it.
src/Gargantext/API/Ngrams.hs
View file @
37979721
...
...
@@ -120,6 +120,7 @@ import Gargantext.Prelude hiding (log, to, toLower, (%), isInfixOf)
import
Gargantext.Prelude.Clock
(
hasTime
,
getTime
)
import
Gargantext.Utils.Jobs
(
serveJobsAPI
,
MonadJobStatus
(
..
))
import
Servant
hiding
(
Patch
)
import
Text.Collate
qualified
as
Unicode
{-
-- TODO sequences of modifications (Patchs)
...
...
@@ -556,13 +557,21 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
&&
_nsq_searchQuery
(
inputNode
^.
ne_ngrams
)
&&
matchesListType
(
inputNode
^.
ne_list
)
-- Sorts the input 'NgramsElement' list.
-- /IMPORTANT/: As we might be sorting ngrams in all sorts of language,
-- some of them might include letters with accents and other unicode symbols,
-- but we need to filter those /diacritics/ out so that the sorting would
-- happen in the way users would expect. See ticket #331.
sortOnOrder
::
Maybe
OrderBy
->
([
NgramsElement
]
->
[
NgramsElement
])
sortOnOrder
Nothing
=
sortOnOrder
(
Just
ScoreDesc
)
sortOnOrder
(
Just
TermAsc
)
=
List
.
sort
On
$
view
ne_ngrams
sortOnOrder
(
Just
TermDesc
)
=
List
.
sort
On
$
Down
.
view
ne_ngrams
sortOnOrder
(
Just
TermAsc
)
=
List
.
sort
By
ngramTermsAscSorter
sortOnOrder
(
Just
TermDesc
)
=
List
.
sort
By
ngramTermsDescSorter
sortOnOrder
(
Just
ScoreAsc
)
=
List
.
sortOn
$
view
(
ne_occurrences
.
to
Set
.
size
)
sortOnOrder
(
Just
ScoreDesc
)
=
List
.
sortOn
$
Down
.
view
(
ne_occurrences
.
to
Set
.
size
)
ngramTermsAscSorter
=
on
unicodeDUCETSorter
(
unNgramsTerm
.
view
ne_ngrams
)
ngramTermsDescSorter
=
on
(
\
n1
n2
->
unicodeDUCETSorter
n2
n1
)
(
unNgramsTerm
.
view
ne_ngrams
)
-- | Filters the given `tableMap` with the search criteria. It returns
-- a set of 'NgramsElement' all matching the input 'NGramsSearchQuery'.
filterNodes
::
Map
NgramsTerm
NgramsElement
->
Set
NgramsElement
...
...
@@ -595,6 +604,13 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
.
Set
.
toList
$
xs
-- | This function allows sorting two texts via their unicode sorting
-- (as opposed as the standard lexicographical sorting) by relying on
-- the DUCET table, a table that specifies the ordering of all unicode
-- characters. This is enough for mimicking the \"natural sort\" effect
-- that users would expect.
unicodeDUCETSorter
::
Text
->
Text
->
Ordering
unicodeDUCETSorter
=
Unicode
.
collate
Unicode
.
rootCollator
getTableNgrams
::
forall
env
err
m
.
(
HasNodeStory
env
err
m
...
...
This diff is collapsed.
Click to expand it.
test/Test/Ngrams/Query.hs
View file @
37979721
...
...
@@ -10,6 +10,7 @@ import Data.Patch.Class qualified as Patch
import
Data.String
import
Data.Text
qualified
as
T
import
Data.Validity
qualified
as
Validity
import
Text.Collate
qualified
as
Unicode
import
Gargantext.API.Ngrams
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.Types.Main
...
...
@@ -48,7 +49,9 @@ unitTests = testGroup "Query tests"
[
-- Sorting
testCase
"Simple query mockFlatCorpus"
testFlat01
,
testCase
"Simple query (desc sorting)"
testFlat02
,
testCase
"[#331] Sort must ignore diacritics"
testSortDiacritics
,
testCase
"[#331] sorting via DUCET works"
testSortDiacriticsDucet
,
testCase
"[#331] Natural sort ascending works"
testNaturalSortAsceding
,
testCase
"[#331] Natural sort descending works"
testNaturalSortDescending
-- -- Filtering
,
testCase
"Simple query (listType = MapTerm)"
testFlat03
,
testCase
"Simple query (listType = StopTerm)"
testFlat04
...
...
@@ -102,10 +105,40 @@ testFlat02 = do
,
_nsq_searchQuery
=
mockQueryFn
Nothing
}
testSortDiacritics
::
Assertion
testSortDiacritics
=
do
testSortDiacriticsDucet
::
Assertion
testSortDiacriticsDucet
=
do
let
inputData
=
[
"étude"
,
"âge"
,
"vue"
,
"période"
]
let
expected
=
[
"âge"
,
"étude"
,
"période"
,
"vue"
]
expected
@??=
sortBy
(
Unicode
.
collate
Unicode
.
rootCollator
)
inputData
testNaturalSortAsceding
::
Assertion
testNaturalSortAsceding
=
do
let
res
=
searchTableNgrams
frenchCorpus
searchQuery
res
@??=
VersionedWithCount
0
4
(
NgramsTable
$
map
mkMapTerm
[
"âge"
,
"étude"
,
"période"
,
"vue"
])
where
frenchCorpus
::
Versioned
(
Map
NgramsTerm
NgramsElement
)
frenchCorpus
=
Versioned
0
$
Map
.
fromList
[
(
"doc_01"
,
mkMapTerm
"période"
)
,
(
"doc_02"
,
mkMapTerm
"vue"
)
,
(
"doc_03"
,
mkMapTerm
"âge"
)
,
(
"doc_04"
,
mkMapTerm
"étude"
)
]
searchQuery
=
NgramsSearchQuery
{
_nsq_limit
=
Limit
10
,
_nsq_offset
=
Nothing
,
_nsq_listType
=
Nothing
,
_nsq_minSize
=
Nothing
,
_nsq_maxSize
=
Nothing
,
_nsq_orderBy
=
Just
TermAsc
,
_nsq_searchQuery
=
mockQueryFn
Nothing
}
testNaturalSortDescending
::
Assertion
testNaturalSortDescending
=
do
let
res
=
searchTableNgrams
frenchCorpus
searchQuery
res
@??=
VersionedWithCount
0
4
(
NgramsTable
$
map
mkMapTerm
[
"vue"
,
"période"
,
"étude"
,
"âge"
])
where
frenchCorpus
::
Versioned
(
Map
NgramsTerm
NgramsElement
)
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment