Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
160
Issues
160
List
Board
Labels
Milestones
Merge Requests
14
Merge Requests
14
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
05e98637
Commit
05e98637
authored
Jul 02, 2024
by
Loïc Chapron
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add test to TSV import error message
parent
c5671e08
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
210 additions
and
0 deletions
+210
-0
gargantext.cabal
gargantext.cabal
+3
-0
TSV.hs
test/Test/Core/Text/Corpus/TSV.hs
+205
-0
Main.hs
test/drivers/tasty/Main.hs
+2
-0
No files found.
gargantext.cabal
View file @
05e98637
...
...
@@ -782,6 +782,7 @@ test-suite garg-test-tasty
Test.Core.Similarity
Test.Core.Text
Test.Core.Text.Corpus.Query
Test.Core.Text.Corpus.TSV
Test.Core.Text.Examples
Test.Core.Text.Flow
Test.Core.Utils
...
...
@@ -855,6 +856,7 @@ test-suite garg-test-tasty
, postgresql-simple >= 0.6.4 && < 0.7
, pretty
, process ^>= 1.6.13.2
, protolude ^>= 0.3.3
, quickcheck-instances ^>= 0.3.25.2
, raw-strings-qq
, recover-rtti >= 0.4 && < 0.5
...
...
@@ -882,6 +884,7 @@ test-suite garg-test-tasty
, tree-diff
, unordered-containers ^>= 0.2.16.0
, unicode-collation >= 0.1.3.6
, utf8-string ^>= 1.0.2
, validity ^>= 0.11.0.1
, vector ^>= 0.12.3.0
, wai
...
...
test/Test/Core/Text/Corpus/TSV.hs
0 → 100644
View file @
05e98637
module
Test.Core.Text.Corpus.TSV
(
tests
)
where
import
Gargantext.Core.Text.Corpus.Parsers.TSV
import
Test.QuickCheck
import
Test.QuickCheck.Instances
()
import
Data.ByteString.Lazy.UTF8
as
BLU
import
Data.ByteString.Lazy
as
BL
import
Data.Char
(
ord
)
import
Data.Text
as
DT
(
Text
,
pack
,
null
,
elem
)
import
Data.Text.Encoding
as
DT
import
Prelude
import
Test.Tasty
import
Test.Tasty.HUnit
import
Test.Tasty.QuickCheck
hiding
(
Positive
,
Negative
)
tests
::
TestTree
tests
=
testGroup
"TSV Parser"
[
testProperty
"Parses 'Valid Text'"
testValidText
,
testProperty
"Parses 'Valid Number'"
testValidNumber
,
testProperty
"Parses 'Error Per Line On A File'"
testTestErrorPerLine
,
testProperty
"Parses 'Correct File'"
testTestCorrectFile
,
testProperty
"Parses 'Correct File With New Line In Last Header'"
testTestCorrectFileWithNewLine
]
delimiterBS
::
Delimiter
->
BL
.
ByteString
delimiterBS
Tab
=
BLU
.
fromString
"
\t
"
delimiterBS
Comma
=
BLU
.
fromString
","
delimiterBS
Line
=
BLU
.
fromString
"
\n
"
data
File
=
File
{
fDelimiter
::
Delimiter
,
allCorpus
::
[
RandomCorpus
]
}
deriving
(
Show
)
data
RandomCorpus
=
RandomCorpus
{
abstract
::
Text
,
title
::
Text
,
authors
::
Text
,
source
::
Text
,
day
::
Int
,
month
::
Int
,
years
::
Int
}
deriving
(
Show
)
instance
Arbitrary
File
where
arbitrary
=
sized
arbitrarySizedFile
arbitrarySizedFile
::
Int
->
Gen
File
arbitrarySizedFile
m
=
do
del
<-
elements
[
Tab
,
Comma
]
corp
<-
vectorOf
m
(
generateRandomCorpus
)
return
(
File
del
corp
)
delimiterToText
::
Delimiter
->
Text
delimiterToText
Tab
=
DT
.
pack
"
\t
"
delimiterToText
Comma
=
DT
.
pack
","
delimiterToText
Line
=
DT
.
pack
"
\n
"
textToBL
::
Text
->
BL
.
ByteString
textToBL
b
=
BL
.
fromChunks
.
return
.
DT
.
encodeUtf8
$
b
generateRandomCorpus
::
Gen
RandomCorpus
generateRandomCorpus
=
RandomCorpus
<$>
generateString
<*>
generateString
<*>
generateString
<*>
generateString
<*>
generateNumber
<*>
generateNumber
<*>
generateNumber
generateFile
::
Gen
File
generateFile
=
arbitrary
::
Gen
File
generateString
::
Gen
Text
generateString
=
arbitrary
::
Gen
Text
generateNumber
::
Gen
Int
generateNumber
=
arbitrary
::
Gen
Int
--TODO add delimiter
createLineFromCorpus
::
RandomCorpus
->
Delimiter
->
BL
.
ByteString
createLineFromCorpus
corpus
delD
=
do
let
aut
=
(
DT
.
pack
"
\"
"
)
<>
(
authors
corpus
)
<>
(
DT
.
pack
"
\"
"
)
let
tit
=
(
DT
.
pack
"
\"
"
)
<>
(
title
corpus
)
<>
(
DT
.
pack
"
\"
"
)
let
sou
=
(
DT
.
pack
"
\"
"
)
<>
(
source
corpus
)
<>
(
DT
.
pack
"
\"
"
)
let
abt
=
(
DT
.
pack
"
\"
"
)
<>
(
abstract
corpus
)
<>
(
DT
.
pack
"
\"
"
)
let
pDay
=
(
DT
.
pack
"
\"
"
)
<>
(
DT
.
pack
$
show
$
day
corpus
)
<>
(
DT
.
pack
"
\"
"
)
let
pMonth
=
(
DT
.
pack
"
\"
"
)
<>
(
DT
.
pack
$
show
$
month
corpus
)
<>
(
DT
.
pack
"
\"
"
)
let
pYears
=
(
DT
.
pack
"
\"
"
)
<>
(
DT
.
pack
$
show
$
years
corpus
)
<>
(
DT
.
pack
"
\"
"
)
let
del
=
delimiterToText
delD
textToBL
(
pDay
<>
del
<>
pMonth
<>
del
<>
pYears
<>
del
<>
aut
<>
del
<>
tit
<>
del
<>
sou
<>
del
<>
abt
)
createLineFromCorpusWithNewLine
::
RandomCorpus
->
Delimiter
->
BL
.
ByteString
createLineFromCorpusWithNewLine
corpus
delD
=
do
let
aut
=
(
DT
.
pack
"
\"
"
)
<>
(
authors
corpus
)
<>
(
DT
.
pack
"
\"
"
)
let
tit
=
(
DT
.
pack
"
\"
"
)
<>
(
title
corpus
)
<>
(
DT
.
pack
"
\"
"
)
let
sou
=
(
DT
.
pack
"
\"
"
)
<>
(
source
corpus
)
<>
(
DT
.
pack
"
\"
"
)
let
abt
=
(
DT
.
pack
"
\"
"
)
<>
(
abstract
corpus
)
<>
(
DT
.
pack
"
\n
"
)
<>
(
abstract
corpus
)
<>
(
DT
.
pack
"
\"
"
)
let
pDay
=
(
DT
.
pack
"
\"
"
)
<>
(
DT
.
pack
$
show
$
day
corpus
)
<>
(
DT
.
pack
"
\"
"
)
let
pMonth
=
(
DT
.
pack
"
\"
"
)
<>
(
DT
.
pack
$
show
$
month
corpus
)
<>
(
DT
.
pack
"
\"
"
)
let
pYears
=
(
DT
.
pack
"
\"
"
)
<>
(
DT
.
pack
$
show
$
years
corpus
)
<>
(
DT
.
pack
"
\"
"
)
let
del
=
delimiterToText
delD
textToBL
(
pDay
<>
del
<>
pMonth
<>
del
<>
pYears
<>
del
<>
aut
<>
del
<>
tit
<>
del
<>
sou
<>
del
<>
abt
)
createFile
::
File
->
BL
.
ByteString
createFile
file
=
do
let
headers
=
BL
.
intercalate
(
delimiterBS
(
fDelimiter
file
))
$
Prelude
.
map
BLU
.
fromString
[
"Publication Day"
,
"Publication Month"
,
"Publication Year"
,
"Authors"
,
"Title"
,
"Source"
,
"Abstract"
]
let
allLines
=
BL
.
intercalate
(
BLU
.
fromString
"
\n
"
)
$
Prelude
.
map
(
\
x
->
createLineFromCorpusWithNewLine
x
(
fDelimiter
file
))
(
allCorpus
file
)
headers
<>
(
BLU
.
fromString
"
\n
"
)
<>
allLines
createFileWithNewLine
::
File
->
BL
.
ByteString
createFileWithNewLine
file
=
do
let
headers
=
BL
.
intercalate
(
delimiterBS
(
fDelimiter
file
))
$
Prelude
.
map
BLU
.
fromString
[
"Publication Day"
,
"Publication Month"
,
"Publication Year"
,
"Authors"
,
"Title"
,
"Source"
,
"Abstract"
]
let
allLines
=
BL
.
intercalate
(
BLU
.
fromString
"
\n
"
)
$
Prelude
.
map
(
\
x
->
createLineFromCorpus
x
(
fDelimiter
file
))
(
allCorpus
file
)
headers
<>
(
BLU
.
fromString
"
\n
"
)
<>
allLines
validRandomCorpus
::
RandomCorpus
->
Delimiter
->
Bool
validRandomCorpus
tsv
del
|
BL
.
length
(
BL
.
filter
(
==
delimiter
del
)
(
createLineFromCorpus
tsv
del
))
>
3
=
True
|
DT
.
null
$
abstract
tsv
=
True
|
DT
.
null
$
title
tsv
=
True
|
DT
.
null
$
authors
tsv
=
True
|
DT
.
null
$
source
tsv
=
True
|
DT
.
elem
'
\"
'
(
abstract
tsv
)
=
True
|
DT
.
elem
'
\"
'
(
title
tsv
)
=
True
|
DT
.
elem
'
\"
'
(
authors
tsv
)
=
True
|
DT
.
elem
'
\"
'
(
source
tsv
)
=
True
|
otherwise
=
False
-- Test the 'validTextField' function (test if a field is good on garganText)
testValidNumber
::
Property
testValidNumber
=
forAll
generateNumber
(
\
s
->
do
let
nbText
=
DT
.
pack
$
show
s
let
bl
=
textToBL
nbText
case
validNumber
bl
nbText
1
of
Right
_
->
True
Left
_
|
BL
.
empty
==
bl
->
True
|
s
<
1
->
True
|
otherwise
->
False
)
-- Test the 'validTextField' function (test if a field is good on garganText)
testValidText
::
Property
testValidText
=
forAll
generateString
(
\
s
->
let
bl
=
textToBL
s
in
case
validTextField
bl
s
1
of
Right
_
->
True
Left
_
|
BL
.
empty
==
bl
->
True
|
(
fromIntegral
$
ord
'
\"
'
)
`
BL
.
elem
`
bl
->
True
|
otherwise
->
False
)
-- Test if a single line id OK
testTestErrorPerLine
::
Property
testTestErrorPerLine
=
forAll
generateRandomCorpus
(
\
tsv
->
do
let
del
=
Tab
let
line
=
createLineFromCorpus
tsv
del
let
headers
=
Prelude
.
map
DT
.
pack
[
"Publication Day"
,
"Publication Month"
,
"Publication Year"
,
"Authors"
,
"Title"
,
"Source"
,
"Abstract"
]
let
splitLine
=
BL
.
splitWith
(
==
delimiter
del
)
line
case
testErrorPerLine
splitLine
del
headers
1
of
Right
_
->
True
Left
_
->
validRandomCorpus
tsv
del
)
--check :
-- True Del
-- False Error
-- Test if a file is OK
testTestCorrectFile
::
Property
testTestCorrectFile
=
forAll
generateFile
(
\
file
->
do
let
tsv
=
createFile
file
case
testCorrectFile
tsv
of
Right
del
->
del
==
fDelimiter
file
Left
_
->
Prelude
.
all
(
\
x
->
do
let
del
=
fDelimiter
file
let
headers
=
Prelude
.
map
DT
.
pack
[
"Publication Day"
,
"Publication Month"
,
"Publication Year"
,
"Authors"
,
"Title"
,
"Source"
,
"Abstract"
]
let
splitLine
=
BL
.
splitWith
(
==
delimiter
del
)
$
createLineFromCorpus
x
del
case
testErrorPerLine
splitLine
del
headers
1
of
Right
_
->
True
Left
_
->
validRandomCorpus
x
del
)
(
allCorpus
file
))
-- almost the same as the one above but also test if a corpus with abstract of multiple line is OK
testTestCorrectFileWithNewLine
::
Property
testTestCorrectFileWithNewLine
=
forAll
generateFile
(
\
file
->
do
let
tsv
=
createFileWithNewLine
file
case
testCorrectFile
tsv
of
Right
_
->
True
Left
_
->
Prelude
.
all
(
\
x
->
do
let
del
=
fDelimiter
file
let
headers
=
Prelude
.
map
DT
.
pack
[
"Publication Day"
,
"Publication Month"
,
"Publication Year"
,
"Authors"
,
"Title"
,
"Source"
,
"Abstract"
]
let
splitLine
=
BL
.
splitWith
(
==
delimiter
del
)
$
createLineFromCorpus
x
del
case
testErrorPerLine
splitLine
del
headers
1
of
Right
_
->
True
Left
_
->
validRandomCorpus
x
del
)
(
allCorpus
file
))
test/drivers/tasty/Main.hs
View file @
05e98637
...
...
@@ -13,6 +13,7 @@ module Main where
import
Gargantext.Prelude
import
qualified
Test.Core.Text.Corpus.Query
as
CorpusQuery
import
qualified
Test.Core.Text.Corpus.TSV
as
TSVParser
import
qualified
Test.Core.Utils
as
Utils
import
qualified
Test.Graph.Clustering
as
Graph
import
qualified
Test.Ngrams.NLP
as
NLP
...
...
@@ -48,6 +49,7 @@ main = do
,
jobsSpec
,
NgramsQuery
.
tests
,
CorpusQuery
.
tests
,
TSVParser
.
tests
,
JSON
.
tests
,
Errors
.
tests
,
similaritySpec
...
...
Przemyslaw Kaminski
@cgenie
mentioned in commit
5660aec0
·
Oct 08, 2024
mentioned in commit
5660aec0
mentioned in commit 5660aec07ec5a0a0a5468f440092c1a8f57a864e
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment