Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
132
Issues
132
List
Board
Labels
Milestones
Merge Requests
4
Merge Requests
4
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
77fe2ea6
Commit
77fe2ea6
authored
Mar 04, 2024
by
Alfredo Di Napoli
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Test vectors and tests for Lancaster stemming
parent
396fbd52
Pipeline
#5699
failed with stages
in 27 minutes and 10 seconds
Changes
6
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
294 additions
and
13 deletions
+294
-13
Main.hs
bin/gargantext-golden-file-diff/Main.hs
+27
-0
gargantext.cabal
gargantext.cabal
+18
-0
Lancaster.hs
src/Gargantext/Core/Text/Terms/Mono/Stem/Lancaster.hs
+2
-2
lancaster.txt
test-data/stemming/lancaster.txt
+103
-0
Lancaster.hs
test/Test/Offline/Stemming/Lancaster.hs
+131
-0
Main.hs
test/drivers/tasty/Main.hs
+13
-11
No files found.
bin/gargantext-golden-file-diff/Main.hs
0 → 100644
View file @
77fe2ea6
module
Main
where
import
Prelude
import
Data.TreeDiff.Class
import
Data.TreeDiff.Pretty
import
qualified
Data.Text
as
T
import
qualified
Data.Text.IO
as
TIO
import
System.Environment
(
getArgs
)
import
System.Exit
(
exitFailure
)
import
Control.Monad
(
unless
)
import
qualified
Data.List
as
L
-- | Renders in a pretty way the content of two golden files. The
-- first file should contain the expected output, the second the
-- actual data generated by the test suite.
main
::
IO
()
main
=
do
(
refPath
:
newPath
:
_
)
<-
getArgs
ref
<-
T
.
lines
<$>
TIO
.
readFile
refPath
new
<-
T
.
lines
<$>
TIO
.
readFile
newPath
let
differences
=
filter
(
\
(
r
,
n
)
->
r
/=
n
)
$
zip
ref
new
unless
(
L
.
null
differences
)
$
do
putStrLn
$
show
$
ansiWlEditExpr
$
ediff'
(
map
fst
differences
)
(
map
snd
differences
)
exitFailure
gargantext.cabal
View file @
77fe2ea6
...
...
@@ -38,6 +38,7 @@ data-files:
test-data/phylo/bpa_phylo_test.json
test-data/phylo/open_science.json
test-data/phylo/issue-290-small.golden.json
test-data/stemming/lancaster.txt
test-data/test_config.ini
gargantext-cors-settings.toml
.clippy.dhall
...
...
@@ -862,6 +863,7 @@ test-suite garg-test-tasty
Test.Offline.Errors
Test.Offline.JSON
Test.Offline.Phylo
Test.Offline.Stemming.Lancaster
Test.Parsers.Date
Test.Parsers.Types
Test.Parsers.WOS
...
...
@@ -907,6 +909,7 @@ test-suite garg-test-tasty
, patches-map ^>= 0.1.0.1
, postgres-options >= 0.2 && < 0.3
, postgresql-simple >= 0.6.4 && < 0.7
, pretty
, process ^>= 1.6.13.2
, quickcheck-instances ^>= 0.3.25.2
, raw-strings-qq
...
...
@@ -921,6 +924,7 @@ test-suite garg-test-tasty
, shelly >= 1.9 && < 2
, stm ^>= 2.5.0.1
, tasty ^>= 1.4.2.1
, tasty-golden
, tasty-hspec
, tasty-hunit
, tasty-quickcheck
...
...
@@ -929,6 +933,7 @@ test-suite garg-test-tasty
, text ^>= 1.2.4.1
, time ^>= 1.9.3
, tmp-postgres >= 1.34.1 && < 1.35
, tree-diff
, unordered-containers ^>= 0.2.16.0
, validity ^>= 0.11.0.1
, wai
...
...
@@ -1059,3 +1064,16 @@ executable gargantext-phylo-profile
, vector
, directory
default-language: Haskell2010
executable garg-golden-file-diff
import:
defaults
, optimized
main-is: Main.hs
hs-source-dirs:
bin/gargantext-golden-file-diff
build-depends:
base
, text
, tree-diff
default-language: Haskell2010
src/Gargantext/Core/Text/Terms/Mono/Stem/Lancaster.hs
View file @
77fe2ea6
...
...
@@ -38,7 +38,7 @@ rulesPaper =
[
(
'a'
,
[
Rule
"ia"
""
intact
,
Rule
"a"
""
intact
])
,
(
'b'
,
[
Rule
"bb"
"b"
stop
])
,
(
'c'
,
[
Rule
"ytic"
"ys"
stop
,
Rule
"ic"
""
cont
,
Rule
"nc"
"nt"
cont
])
,
(
'd'
,
[
Rule
"dd"
"d"
stop
,
Rule
"ied"
"
y"
cont
,
Rule
"ceed"
"cess"
stop
,
Rule
"eed"
"ee"
stop
,
(
'd'
,
[
Rule
"dd"
"d"
stop
,
Rule
"ied"
"
i"
stop
,
Rule
"ceed"
"cess"
stop
,
Rule
"eed"
"ee"
stop
,
Rule
"ed"
""
cont
,
Rule
"hood"
""
cont
])
,
(
'e'
,
[
Rule
"e"
""
cont
])
,
(
'f'
,
[
Rule
"lief"
"liev"
stop
,
Rule
"if"
""
cont
])
...
...
@@ -121,7 +121,7 @@ applyRules value isIntact rules =
then
Just
$
applyRules
next
False
rules
else
Just
next
-- |
A stem is acceptable if
-- |
Returns 'True' if a stem is acceptable.
acceptable
::
Text
->
Bool
acceptable
val
|
T
.
null
val
=
False
...
...
test-data/stemming/lancaster.txt
0 → 100644
View file @
77fe2ea6
1,collab
2,postpart
3,cat
4,cat
5,dog
6,dog
7,run
8,run
9,run
10,jump
11,jump
12,jump
13,swim
14,swim
15,swim
16,fish
17,fish
18,fish
19,eat
20,eat
21,eat
22,talk
23,talk
24,talk
25,walk
26,walk
27,walk
28,dant
29,dant
30,dant
31,sing
32,sing
33,sing
34,play
35,play
36,play
37,work
38,work
39,work
40,teach
41,teach
42,teach
43,learn
44,learn
45,learn
46,read
47,read
48,read
49,writ
50,writ
51,writ
52,paint
53,paint
54,paint
55,draw
56,draw
57,draw
58,speak
59,speak
60,speak
61,think
62,think
63,think
64,see
65,see
66,seen
67,hear
68,hear
69,heard
70,touch
71,touch
72,touch
73,smel
74,smel
75,smel
76,tast
77,tast
78,tast
79,laugh
80,laugh
81,laugh
82,cry
83,cry
84,cri
85,smil
86,smil
87,smil
88,frown
89,frown
90,frown
91,happy
92,happy
93,happiest
94,sad
95,sad
96,saddest
97,angry
98,angry
99,angriest
100,calm
101,calm
102,calmest
103,corrob
test/Test/Offline/Stemming/Lancaster.hs
0 → 100644
View file @
77fe2ea6
module
Test.Offline.Stemming.Lancaster
where
import
Prelude
import
Data.ByteString.Char8
qualified
as
C8
import
Data.Text
qualified
as
T
import
Gargantext.Core.Text.Terms.Mono.Stem.Lancaster
(
stemIt
)
import
Gargantext.Prelude
(
toS
)
import
Test.Tasty
import
Test.Tasty.Golden
(
goldenVsStringDiff
)
import
qualified
Data.ByteString.Lazy
as
BL
import
qualified
Data.Text.Encoding
as
TE
tests
::
TestTree
tests
=
testGroup
"Lancaster"
[
goldenVsStringDiff
"test vector works"
(
\
ref
new
->
[
"cabal"
,
"v2-run"
,
"-v0"
,
"garg-golden-file-diff"
,
"--"
,
ref
,
new
])
"test-data/stemming/lancaster.txt"
mkTestVector
]
-- | List un /unstemmed/ test words
testWords
::
[(
Int
,
T
.
Text
)]
testWords
=
[
(
1
,
"collaboration"
)
,
(
2
,
"postpartum"
)
,
(
3
,
"cat"
)
,
(
4
,
"cats"
)
,
(
5
,
"dog"
)
,
(
6
,
"dogs"
)
,
(
7
,
"run"
)
,
(
8
,
"running"
)
,
(
9
,
"runner"
)
,
(
10
,
"jump"
)
,
(
11
,
"jumped"
)
,
(
12
,
"jumping"
)
,
(
13
,
"swim"
)
,
(
14
,
"swimming"
)
,
(
15
,
"swimmer"
)
,
(
16
,
"fish"
)
,
(
17
,
"fishing"
)
,
(
18
,
"fisher"
)
,
(
19
,
"eat"
)
,
(
20
,
"eating"
)
,
(
21
,
"eater"
)
,
(
22
,
"talk"
)
,
(
23
,
"talking"
)
,
(
24
,
"talks"
)
,
(
25
,
"walk"
)
,
(
26
,
"walking"
)
,
(
27
,
"walker"
)
,
(
28
,
"dance"
)
,
(
29
,
"dancing"
)
,
(
30
,
"dancer"
)
,
(
31
,
"sing"
)
,
(
32
,
"singing"
)
,
(
33
,
"singer"
)
,
(
34
,
"play"
)
,
(
35
,
"playing"
)
,
(
36
,
"player"
)
,
(
37
,
"work"
)
,
(
38
,
"working"
)
,
(
39
,
"worker"
)
,
(
40
,
"teach"
)
,
(
41
,
"teaching"
)
,
(
42
,
"teacher"
)
,
(
43
,
"learn"
)
,
(
44
,
"learning"
)
,
(
45
,
"learner"
)
,
(
46
,
"read"
)
,
(
47
,
"reading"
)
,
(
48
,
"reader"
)
,
(
49
,
"write"
)
,
(
50
,
"writing"
)
,
(
51
,
"writer"
)
,
(
52
,
"paint"
)
,
(
53
,
"painting"
)
,
(
54
,
"painter"
)
,
(
55
,
"draw"
)
,
(
56
,
"drawing"
)
,
(
57
,
"drawer"
)
,
(
58
,
"speak"
)
,
(
59
,
"speaking"
)
,
(
60
,
"speaker"
)
,
(
61
,
"think"
)
,
(
62
,
"thinking"
)
,
(
63
,
"thinker"
)
,
(
64
,
"see"
)
,
(
65
,
"seeing"
)
,
(
66
,
"seen"
)
,
(
67
,
"hear"
)
,
(
68
,
"hearing"
)
,
(
69
,
"heard"
)
,
(
70
,
"touch"
)
,
(
71
,
"touching"
)
,
(
72
,
"touched"
)
,
(
73
,
"smell"
)
,
(
74
,
"smelling"
)
,
(
75
,
"smelled"
)
,
(
76
,
"taste"
)
,
(
77
,
"tasting"
)
,
(
78
,
"tasted"
)
,
(
79
,
"laugh"
)
,
(
80
,
"laughing"
)
,
(
81
,
"laughed"
)
,
(
82
,
"cry"
)
,
(
83
,
"crying"
)
,
(
84
,
"cried"
)
,
(
85
,
"smile"
)
,
(
86
,
"smiling"
)
,
(
87
,
"smiled"
)
,
(
88
,
"frown"
)
,
(
89
,
"frowning"
)
,
(
90
,
"frowned"
)
,
(
91
,
"happy"
)
,
(
92
,
"happier"
)
,
(
93
,
"happiest"
)
,
(
94
,
"sad"
)
,
(
95
,
"sadder"
)
,
(
96
,
"saddest"
)
,
(
97
,
"angry"
)
,
(
98
,
"angrier"
)
,
(
99
,
"angriest"
)
,
(
100
,
"calm"
)
,
(
101
,
"calmer"
)
,
(
102
,
"calmest"
)
,
(
103
,
"corroborate"
)
]
mkTestVector
::
IO
BL
.
ByteString
mkTestVector
=
pure
$
toS
$
C8
.
unlines
(
map
(
\
(
indx
,
w
)
->
(
C8
.
pack
$
show
indx
)
<>
","
<>
TE
.
encodeUtf8
(
stemIt
w
))
testWords
)
test/drivers/tasty/Main.hs
View file @
77fe2ea6
...
...
@@ -12,17 +12,18 @@ module Main where
import
Gargantext.Prelude
import
qualified
Test.Core.Text.Corpus.Query
as
CorpusQuery
import
qualified
Test.Core.Utils
as
Utils
import
qualified
Test.Graph.Clustering
as
Graph
import
qualified
Test.Ngrams.NLP
as
NLP
import
qualified
Test.Ngrams.Query
as
NgramsQuery
import
qualified
Test.Offline.JSON
as
JSON
import
qualified
Test.Offline.Errors
as
Errors
import
qualified
Test.Offline.Phylo
as
Phylo
import
qualified
Test.Parsers.Date
as
PD
import
qualified
Test.Utils.Crypto
as
Crypto
import
qualified
Test.Utils.Jobs
as
Jobs
import
qualified
Test.Core.Text.Corpus.Query
as
CorpusQuery
import
qualified
Test.Core.Utils
as
Utils
import
qualified
Test.Graph.Clustering
as
Graph
import
qualified
Test.Ngrams.NLP
as
NLP
import
qualified
Test.Ngrams.Query
as
NgramsQuery
import
qualified
Test.Offline.JSON
as
JSON
import
qualified
Test.Offline.Errors
as
Errors
import
qualified
Test.Offline.Phylo
as
Phylo
import
qualified
Test.Offline.Stemming.Lancaster
as
Lancaster
import
qualified
Test.Parsers.Date
as
PD
import
qualified
Test.Utils.Crypto
as
Crypto
import
qualified
Test.Utils.Jobs
as
Jobs
import
Test.Tasty
import
Test.Tasty.Hspec
...
...
@@ -50,4 +51,5 @@ main = do
,
JSON
.
tests
,
Errors
.
tests
,
Phylo
.
tests
,
testGroup
"Stemming"
[
Lancaster
.
tests
]
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment