Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
145
Issues
145
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
946448be
Commit
946448be
authored
Mar 13, 2025
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'origin/433-dev-ignore-tsv-errors' into dev
parents
3bf63d30
a5f299c5
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
36 additions
and
25 deletions
+36
-25
Import.hs
bin/gargantext-cli/CLI/Import.hs
+1
-0
update-project-dependencies
bin/update-project-dependencies
+2
-2
gargantext.cabal
gargantext.cabal
+1
-0
List.hs
src/Gargantext/API/Ngrams/List.hs
+21
-20
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+1
-0
433-utf-encoding-issue.tsv
test-data/ngrams/433-utf-encoding-issue.tsv
+0
-0
Notifications.hs
test/Test/API/Notifications.hs
+4
-2
Setup.hs
test/Test/API/Setup.hs
+1
-1
UpdateList.hs
test/Test/API/UpdateList.hs
+5
-0
No files found.
bin/gargantext-cli/CLI/Import.hs
View file @
946448be
...
...
@@ -75,6 +75,7 @@ import_p = fmap CCMD_import $ ImportArgs
<*>
(
option
str
(
long
"user"
)
)
<*>
(
option
str
(
long
"name"
)
)
<*>
settings_p
-- <*> (fmap Limit ( option auto ( long "limit" <> metavar "INT" <> help "The limit for the query") ))
<*>
(
option
str
(
long
"corpus-path"
<>
help
"Path to corpus file"
)
)
function_p
::
String
->
Either
String
ImportFunction
...
...
bin/update-project-dependencies
View file @
946448be
...
...
@@ -18,8 +18,8 @@ fi
# with the `sha256sum` result calculated on the `cabal.project` and
# `cabal.project.freeze`. This ensures the files stay deterministic so that CI
# cache can kick in.
expected_cabal_project_hash
=
"37352ca36ca5e69d9945da11439be4c3909297b338242855fa588dffdf1ba02b"
expected_cabal_project_freeze_hash
=
"cd52143d3a9d285360b59c6371d3e258552c1bc115bd612024db3de1f7593ff7"
expected_cabal_project_hash
=
"37352ca36ca5e69d9945da11439be4c3909297b338242855fa588dffdf1ba02b"
expected_cabal_project_freeze_hash
=
"cd52143d3a9d285360b59c6371d3e258552c1bc115bd612024db3de1f7593ff7"
cabal
--store-dir
=
$STORE_DIR
v2-build
--dry-run
...
...
gargantext.cabal
View file @
946448be
...
...
@@ -32,6 +32,7 @@ data-files:
ekg-assets/bootstrap-1.4.0.min.css
ekg-assets/chart_line_add.png
ekg-assets/cross.png
test-data/ngrams/433-utf-encoding-issue.tsv
test-data/ngrams/GarganText_DocsList-nodeId-177.json
test-data/ngrams/GarganText_NgramsTerms-nodeId-177.json
test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json
...
...
src/Gargantext/API/Ngrams/List.hs
View file @
946448be
...
...
@@ -178,35 +178,36 @@ ngramsListFromTSVData tsvData = case decodeTsv of
binaryData
=
BSL
.
fromStrict
$
P
.
encodeUtf8
tsvData
decodeTsv
::
Either
Prelude
.
String
(
Vector
NgramsTableMap
)
decodeTsv
=
Tsv
.
decodeWithP
tsvToNgramsTableMap
(
Tsv
.
defaultDecodeOptions
{
Tsv
.
decDelimiter
=
fromIntegral
(
P
.
ord
'
\t
'
)
})
Tsv
.
HasHeader
binaryData
decodeTsv
=
Vec
.
catMaybes
<$>
Tsv
.
decodeWithP
tsvToNgramsTableMap
(
Tsv
.
defaultDecodeOptions
{
Tsv
.
decDelimiter
=
fromIntegral
(
P
.
ord
'
\t
'
)
})
Tsv
.
HasHeader
binaryData
-- | Converts a plain TSV 'Record' into an NgramsTableMap
tsvToNgramsTableMap
::
Tsv
.
Record
->
Tsv
.
Parser
NgramsTableMap
tsvToNgramsTableMap
::
Tsv
.
Record
->
Tsv
.
Parser
(
Maybe
NgramsTableMap
)
tsvToNgramsTableMap
record
=
case
Vec
.
toList
record
of
(
map
P
.
decodeUtf8
->
[
status
,
label
,
forms
])
->
pure
$
conv
status
label
forms
_
->
Prelude
.
fail
"tsvToNgramsTableMap failed"
->
pure
$
Just
$
conv
status
label
forms
-- WARNING: This silently ignores errors (#433)
_
->
pure
Nothing
where
conv
::
Text
->
Text
->
Text
->
NgramsTableMap
conv
status
label
forms
=
Map
.
singleton
(
NgramsTerm
label
)
$
NgramsRepoElement
{
_nre_size
=
1
,
_nre_list
=
case
status
==
"map"
of
True
->
MapTerm
False
->
case
status
==
"main"
of
True
->
CandidateTerm
False
->
StopTerm
,
_nre_root
=
Nothing
,
_nre_parent
=
Nothing
,
_nre_children
=
MSet
$
Map
.
fromList
$
map
(
\
form
->
(
NgramsTerm
form
,
()
))
$
filter
(
\
w
->
w
/=
""
&&
w
/=
label
)
$
splitOn
"|&|"
forms
}
,
_nre_list
=
case
status
of
"map"
->
MapTerm
"main"
->
CandidateTerm
_
->
StopTerm
,
_nre_root
=
Nothing
,
_nre_parent
=
Nothing
,
_nre_children
=
MSet
$
Map
.
fromList
$
map
(
\
form
->
(
NgramsTerm
form
,
()
))
$
filter
(
\
w
->
w
/=
""
&&
w
/=
label
)
$
splitOn
"|&|"
forms
}
------------------------------------------------------------------------
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
946448be
...
...
@@ -418,6 +418,7 @@ insertMasterDocs ncs c lang hs = do
-- add documents to the corpus (create node_node link)
-- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs'
::
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
(
TermsWeight
,
TermsCount
)))
<-
mapNodeIdNgrams
<$>
documentIdWithNgrams
...
...
test-data/ngrams/433-utf-encoding-issue.tsv
0 → 100644
View file @
946448be
File added
test/Test/API/Notifications.hs
View file @
946448be
...
...
@@ -23,8 +23,8 @@ import Control.Concurrent.STM.TChan
import
Control.Concurrent.STM.TSem
(
newTSem
,
signalTSem
,
TSem
)
import
Control.Concurrent
(
threadDelay
)
import
Control.Lens
((
^.
))
import
Control.Monad.STM
(
atomically
)
import
Control.Monad
(
void
)
import
Control.Monad.STM
(
atomically
)
import
Data.Aeson
qualified
as
Aeson
import
Data.ByteString
qualified
as
BS
import
Data.Text.Encoding
qualified
as
TE
...
...
@@ -47,12 +47,14 @@ import Test.Database.Types (test_config)
import
Test.Hspec
import
Test.Hspec.Wai.Internal
(
withApplication
)
import
Test.Instances
()
import
Test.Utils
(
waitForTChanValue
,
waitForTSem
)
import
Test.Utils.Notifications
(
withAsyncWSConnection
)
import
Test.Utils
(
protected
,
w
aitForTChanValue
,
waitForTSem
,
w
ithValidLoginA
)
import
Test.Utils
(
protected
,
withValidLoginA
)
import
Text.RawString.QQ
(
r
)
tests
::
Spec
tests
=
sequential
$
around
withTestDBAndPort
$
do
describe
"Notifications"
$
do
...
...
test/Test/API/Setup.hs
View file @
946448be
...
...
@@ -190,7 +190,7 @@ dbEnvSetup ctx = do
_
<-
createAliceAndBob
testEnv
pure
ctx
-- show the full exceptions during testing, rather than shallowing them under a generic
-- "Something went wrong".
showDebugExceptions
::
SomeException
->
Wai
.
Response
...
...
test/Test/API/UpdateList.hs
View file @
946448be
...
...
@@ -213,6 +213,11 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
,
(
NgramsTerm
"brazorf"
,
NgramsRepoElement
1
StopTerm
Nothing
Nothing
(
MSet
mempty
))
])])
it
"parses TSV with UTF-8 issues"
$
\
(
SpecContext
_testEnv
_port
_app
_
)
->
do
simpleNgrams
<-
liftIO
(
TIO
.
readFile
=<<
getDataFileName
"test-data/ngrams/433-utf-encoding-issue.tsv"
)
-- we don't care about the output, only that the file was parsed without errors (this file is garbage)
ngramsListFromTSVData
simpleNgrams
`
shouldSatisfy
`
isRight
it
"allows uploading a CSV ngrams file"
$
\
(
SpecContext
testEnv
port
app
_
)
->
do
cId
<-
newCorpusForUser
testEnv
"alice"
let
log_cfg
=
(
test_config
testEnv
)
^.
gc_logging
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment