Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
145
Issues
145
List
Board
Labels
Milestones
Merge Requests
5
Merge Requests
5
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
946448be
Commit
946448be
authored
Mar 13, 2025
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'origin/433-dev-ignore-tsv-errors' into dev
parents
3bf63d30
a5f299c5
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
36 additions
and
25 deletions
+36
-25
Import.hs
bin/gargantext-cli/CLI/Import.hs
+1
-0
update-project-dependencies
bin/update-project-dependencies
+2
-2
gargantext.cabal
gargantext.cabal
+1
-0
List.hs
src/Gargantext/API/Ngrams/List.hs
+21
-20
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+1
-0
433-utf-encoding-issue.tsv
test-data/ngrams/433-utf-encoding-issue.tsv
+0
-0
Notifications.hs
test/Test/API/Notifications.hs
+4
-2
Setup.hs
test/Test/API/Setup.hs
+1
-1
UpdateList.hs
test/Test/API/UpdateList.hs
+5
-0
No files found.
bin/gargantext-cli/CLI/Import.hs
View file @
946448be
...
@@ -75,6 +75,7 @@ import_p = fmap CCMD_import $ ImportArgs
...
@@ -75,6 +75,7 @@ import_p = fmap CCMD_import $ ImportArgs
<*>
(
option
str
(
long
"user"
)
)
<*>
(
option
str
(
long
"user"
)
)
<*>
(
option
str
(
long
"name"
)
)
<*>
(
option
str
(
long
"name"
)
)
<*>
settings_p
<*>
settings_p
-- <*> (fmap Limit ( option auto ( long "limit" <> metavar "INT" <> help "The limit for the query") ))
<*>
(
option
str
(
long
"corpus-path"
<>
help
"Path to corpus file"
)
)
<*>
(
option
str
(
long
"corpus-path"
<>
help
"Path to corpus file"
)
)
function_p
::
String
->
Either
String
ImportFunction
function_p
::
String
->
Either
String
ImportFunction
...
...
bin/update-project-dependencies
View file @
946448be
...
@@ -18,8 +18,8 @@ fi
...
@@ -18,8 +18,8 @@ fi
# with the `sha256sum` result calculated on the `cabal.project` and
# with the `sha256sum` result calculated on the `cabal.project` and
# `cabal.project.freeze`. This ensures the files stay deterministic so that CI
# `cabal.project.freeze`. This ensures the files stay deterministic so that CI
# cache can kick in.
# cache can kick in.
expected_cabal_project_hash
=
"37352ca36ca5e69d9945da11439be4c3909297b338242855fa588dffdf1ba02b"
expected_cabal_project_hash
=
"37352ca36ca5e69d9945da11439be4c3909297b338242855fa588dffdf1ba02b"
expected_cabal_project_freeze_hash
=
"cd52143d3a9d285360b59c6371d3e258552c1bc115bd612024db3de1f7593ff7"
expected_cabal_project_freeze_hash
=
"cd52143d3a9d285360b59c6371d3e258552c1bc115bd612024db3de1f7593ff7"
cabal
--store-dir
=
$STORE_DIR
v2-build
--dry-run
cabal
--store-dir
=
$STORE_DIR
v2-build
--dry-run
...
...
gargantext.cabal
View file @
946448be
...
@@ -32,6 +32,7 @@ data-files:
...
@@ -32,6 +32,7 @@ data-files:
ekg-assets/bootstrap-1.4.0.min.css
ekg-assets/bootstrap-1.4.0.min.css
ekg-assets/chart_line_add.png
ekg-assets/chart_line_add.png
ekg-assets/cross.png
ekg-assets/cross.png
test-data/ngrams/433-utf-encoding-issue.tsv
test-data/ngrams/GarganText_DocsList-nodeId-177.json
test-data/ngrams/GarganText_DocsList-nodeId-177.json
test-data/ngrams/GarganText_NgramsTerms-nodeId-177.json
test-data/ngrams/GarganText_NgramsTerms-nodeId-177.json
test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json
test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json
...
...
src/Gargantext/API/Ngrams/List.hs
View file @
946448be
...
@@ -178,35 +178,36 @@ ngramsListFromTSVData tsvData = case decodeTsv of
...
@@ -178,35 +178,36 @@ ngramsListFromTSVData tsvData = case decodeTsv of
binaryData
=
BSL
.
fromStrict
$
P
.
encodeUtf8
tsvData
binaryData
=
BSL
.
fromStrict
$
P
.
encodeUtf8
tsvData
decodeTsv
::
Either
Prelude
.
String
(
Vector
NgramsTableMap
)
decodeTsv
::
Either
Prelude
.
String
(
Vector
NgramsTableMap
)
decodeTsv
=
Tsv
.
decodeWithP
tsvToNgramsTableMap
decodeTsv
=
Vec
.
catMaybes
<$>
(
Tsv
.
defaultDecodeOptions
{
Tsv
.
decDelimiter
=
fromIntegral
(
P
.
ord
'
\t
'
)
})
Tsv
.
decodeWithP
tsvToNgramsTableMap
Tsv
.
HasHeader
(
Tsv
.
defaultDecodeOptions
{
Tsv
.
decDelimiter
=
fromIntegral
(
P
.
ord
'
\t
'
)
})
binaryData
Tsv
.
HasHeader
binaryData
-- | Converts a plain TSV 'Record' into an NgramsTableMap
-- | Converts a plain TSV 'Record' into an NgramsTableMap
tsvToNgramsTableMap
::
Tsv
.
Record
->
Tsv
.
Parser
NgramsTableMap
tsvToNgramsTableMap
::
Tsv
.
Record
->
Tsv
.
Parser
(
Maybe
NgramsTableMap
)
tsvToNgramsTableMap
record
=
case
Vec
.
toList
record
of
tsvToNgramsTableMap
record
=
case
Vec
.
toList
record
of
(
map
P
.
decodeUtf8
->
[
status
,
label
,
forms
])
(
map
P
.
decodeUtf8
->
[
status
,
label
,
forms
])
->
pure
$
conv
status
label
forms
->
pure
$
Just
$
conv
status
label
forms
_
->
Prelude
.
fail
"tsvToNgramsTableMap failed"
-- WARNING: This silently ignores errors (#433)
_
->
pure
Nothing
where
where
conv
::
Text
->
Text
->
Text
->
NgramsTableMap
conv
::
Text
->
Text
->
Text
->
NgramsTableMap
conv
status
label
forms
=
Map
.
singleton
(
NgramsTerm
label
)
conv
status
label
forms
=
Map
.
singleton
(
NgramsTerm
label
)
$
NgramsRepoElement
{
_nre_size
=
1
$
NgramsRepoElement
{
_nre_size
=
1
,
_nre_list
=
case
status
==
"map"
of
,
_nre_list
=
case
status
of
True
->
MapTerm
"map"
->
MapTerm
False
->
case
status
==
"main"
of
"main"
->
CandidateTerm
True
->
CandidateTerm
_
->
StopTerm
False
->
StopTerm
,
_nre_root
=
Nothing
,
_nre_root
=
Nothing
,
_nre_parent
=
Nothing
,
_nre_parent
=
Nothing
,
_nre_children
=
MSet
,
_nre_children
=
MSet
$
Map
.
fromList
$
Map
.
fromList
$
map
(
\
form
->
(
NgramsTerm
form
,
()
))
$
map
(
\
form
->
(
NgramsTerm
form
,
()
))
$
filter
(
\
w
->
w
/=
""
&&
w
/=
label
)
$
filter
(
\
w
->
w
/=
""
&&
w
/=
label
)
$
splitOn
"|&|"
forms
$
splitOn
"|&|"
forms
}
}
------------------------------------------------------------------------
------------------------------------------------------------------------
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
946448be
...
@@ -418,6 +418,7 @@ insertMasterDocs ncs c lang hs = do
...
@@ -418,6 +418,7 @@ insertMasterDocs ncs c lang hs = do
-- add documents to the corpus (create node_node link)
-- add documents to the corpus (create node_node link)
-- this will enable global database monitoring
-- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs'
::
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
(
TermsWeight
,
TermsCount
)))
mapNgramsDocs'
::
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
(
TermsWeight
,
TermsCount
)))
<-
mapNodeIdNgrams
<-
mapNodeIdNgrams
<$>
documentIdWithNgrams
<$>
documentIdWithNgrams
...
...
test-data/ngrams/433-utf-encoding-issue.tsv
0 → 100644
View file @
946448be
File added
test/Test/API/Notifications.hs
View file @
946448be
...
@@ -23,8 +23,8 @@ import Control.Concurrent.STM.TChan
...
@@ -23,8 +23,8 @@ import Control.Concurrent.STM.TChan
import
Control.Concurrent.STM.TSem
(
newTSem
,
signalTSem
,
TSem
)
import
Control.Concurrent.STM.TSem
(
newTSem
,
signalTSem
,
TSem
)
import
Control.Concurrent
(
threadDelay
)
import
Control.Concurrent
(
threadDelay
)
import
Control.Lens
((
^.
))
import
Control.Lens
((
^.
))
import
Control.Monad.STM
(
atomically
)
import
Control.Monad
(
void
)
import
Control.Monad
(
void
)
import
Control.Monad.STM
(
atomically
)
import
Data.Aeson
qualified
as
Aeson
import
Data.Aeson
qualified
as
Aeson
import
Data.ByteString
qualified
as
BS
import
Data.ByteString
qualified
as
BS
import
Data.Text.Encoding
qualified
as
TE
import
Data.Text.Encoding
qualified
as
TE
...
@@ -47,12 +47,14 @@ import Test.Database.Types (test_config)
...
@@ -47,12 +47,14 @@ import Test.Database.Types (test_config)
import
Test.Hspec
import
Test.Hspec
import
Test.Hspec.Wai.Internal
(
withApplication
)
import
Test.Hspec.Wai.Internal
(
withApplication
)
import
Test.Instances
()
import
Test.Instances
()
import
Test.Utils
(
waitForTChanValue
,
waitForTSem
)
import
Test.Utils.Notifications
(
withAsyncWSConnection
)
import
Test.Utils.Notifications
(
withAsyncWSConnection
)
import
Test.Utils
(
protected
,
w
aitForTChanValue
,
waitForTSem
,
w
ithValidLoginA
)
import
Test.Utils
(
protected
,
withValidLoginA
)
import
Text.RawString.QQ
(
r
)
import
Text.RawString.QQ
(
r
)
tests
::
Spec
tests
::
Spec
tests
=
sequential
$
around
withTestDBAndPort
$
do
tests
=
sequential
$
around
withTestDBAndPort
$
do
describe
"Notifications"
$
do
describe
"Notifications"
$
do
...
...
test/Test/API/Setup.hs
View file @
946448be
...
@@ -190,7 +190,7 @@ dbEnvSetup ctx = do
...
@@ -190,7 +190,7 @@ dbEnvSetup ctx = do
_
<-
createAliceAndBob
testEnv
_
<-
createAliceAndBob
testEnv
pure
ctx
pure
ctx
-- show the full exceptions during testing, rather than shallowing them under a generic
-- show the full exceptions during testing, rather than shallowing them under a generic
-- "Something went wrong".
-- "Something went wrong".
showDebugExceptions
::
SomeException
->
Wai
.
Response
showDebugExceptions
::
SomeException
->
Wai
.
Response
...
...
test/Test/API/UpdateList.hs
View file @
946448be
...
@@ -213,6 +213,11 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
...
@@ -213,6 +213,11 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
,
(
NgramsTerm
"brazorf"
,
NgramsRepoElement
1
StopTerm
Nothing
Nothing
(
MSet
mempty
))
,
(
NgramsTerm
"brazorf"
,
NgramsRepoElement
1
StopTerm
Nothing
Nothing
(
MSet
mempty
))
])])
])])
it
"parses TSV with UTF-8 issues"
$
\
(
SpecContext
_testEnv
_port
_app
_
)
->
do
simpleNgrams
<-
liftIO
(
TIO
.
readFile
=<<
getDataFileName
"test-data/ngrams/433-utf-encoding-issue.tsv"
)
-- we don't care about the output, only that the file was parsed without errors (this file is garbage)
ngramsListFromTSVData
simpleNgrams
`
shouldSatisfy
`
isRight
it
"allows uploading a CSV ngrams file"
$
\
(
SpecContext
testEnv
port
app
_
)
->
do
it
"allows uploading a CSV ngrams file"
$
\
(
SpecContext
testEnv
port
app
_
)
->
do
cId
<-
newCorpusForUser
testEnv
"alice"
cId
<-
newCorpusForUser
testEnv
"alice"
let
log_cfg
=
(
test_config
testEnv
)
^.
gc_logging
let
log_cfg
=
(
test_config
testEnv
)
^.
gc_logging
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment