Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
159
Issues
159
List
Board
Labels
Milestones
Merge Requests
7
Merge Requests
7
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
ff0a77cd
Commit
ff0a77cd
authored
Jun 07, 2024
by
Loïc Chapron
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Change CSV into TSV
parent
49946361
Pipeline
#6205
failed with stages
in 162 minutes and 52 seconds
Changes
36
Pipelines
1
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
36 changed files
with
791 additions
and
204 deletions
+791
-204
CleanCsvCorpus.hs
bin/gargantext-cli/CleanCsvCorpus.hs
+11
-11
Main.hs
bin/gargantext-cli/Main.hs
+5
-5
Main.hs
bin/gargantext-import/Main.hs
+5
-5
Main.hs
bin/gargantext-phylo-profile/Main.hs
+1
-1
Main.hs
bin/gargantext-phylo/Main.hs
+3
-3
Common.hs
bin/gargantext-phylo/Phylo/Common.hs
+22
-22
gargantext.cabal
gargantext.cabal
+2
-2
EnvTypes.hs
src/Gargantext/API/Admin/EnvTypes.hs
+1
-1
List.hs
src/Gargantext/API/Ngrams/List.hs
+30
-30
Types.hs
src/Gargantext/API/Ngrams/Types.hs
+7
-7
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+2
-2
Types.hs
src/Gargantext/API/Node/Corpus/New/Types.hs
+5
-5
Export.hs
src/Gargantext/API/Node/Document/Export.hs
+4
-4
FrameCalcUpload.hs
src/Gargantext/API/Node/FrameCalcUpload.hs
+1
-1
Document.hs
src/Gargantext/API/Routes/Named/Document.hs
+1
-1
List.hs
src/Gargantext/API/Routes/Named/List.hs
+4
-4
Private.hs
src/Gargantext/API/Routes/Named/Private.hs
+1
-1
Private.hs
src/Gargantext/API/Server/Named/Private.hs
+1
-1
IMT.hs
src/Gargantext/Core/Ext/IMT.hs
+6
-6
IMTUser.hs
src/Gargantext/Core/Ext/IMTUser.hs
+12
-12
Convert.hs
src/Gargantext/Core/Text/Convert.hs
+5
-5
Isidore.hs
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
+5
-5
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+11
-11
Book.hs
src/Gargantext/Core/Text/Corpus/Parsers/Book.hs
+4
-4
Json2Csv.hs
src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
+16
-16
TSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/TSV.hs
+489
-0
TSV.hs
src/Gargantext/Core/Text/List/Formats/TSV.hs
+98
-0
Search.hs
src/Gargantext/Core/Text/Search.hs
+5
-5
Upload.hs
src/Gargantext/Core/Text/Upload.hs
+14
-14
Phylo.hs
src/Gargantext/Core/Viz/Phylo.hs
+5
-5
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+1
-1
Frame.hs
src/Gargantext/Database/Admin/Types/Hyperdata/Frame.hs
+2
-2
Jobs.hs
src/Gargantext/Utils/Jobs.hs
+1
-1
Servant.hs
src/Gargantext/Utils/Servant.hs
+6
-6
UpdateList.hs
test/Test/API/UpdateList.hs
+2
-2
Phylo.hs
test/Test/Offline/Phylo.hs
+3
-3
No files found.
bin/gargantext-cli/CleanCsvCorpus.hs
View file @
ff0a77cd
...
...
@@ -19,15 +19,15 @@ import Data.Set qualified as S
import
Data.Text
(
pack
)
import
Data.Vector
(
Vector
)
import
Data.Vector
qualified
as
V
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
qualified
as
C
SV
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
qualified
as
T
SV
import
Gargantext.Core.Text.Search
import
Gargantext.Prelude
------------------------------------------------------------------------
type
Query
=
[
S
.
Term
]
filterDocs
::
[
DocId
]
->
Vector
CSV
.
CsvGargV3
->
Vector
CSV
.
C
svGargV3
filterDocs
docIds
=
V
.
filter
(
\
doc
->
S
.
member
(
C
SV
.
d_docId
doc
)
$
S
.
fromList
docIds
)
filterDocs
::
[
DocId
]
->
Vector
TSV
.
TsvGargV3
->
Vector
TSV
.
T
svGargV3
filterDocs
docIds
=
V
.
filter
(
\
doc
->
S
.
member
(
T
SV
.
d_docId
doc
)
$
S
.
fromList
docIds
)
main
::
IO
()
...
...
@@ -37,19 +37,19 @@ main = do
--let q = ["water", "scarcity", "morocco", "shortage","flood"]
let
q
=
[
"gratuit"
,
"gratuité"
,
"culture"
,
"culturel"
]
eDocs
<-
CSV
.
readC
SVFile
rPath
eDocs
<-
TSV
.
readT
SVFile
rPath
case
eDocs
of
Right
(
h
,
c
svDocs
)
->
do
putStrLn
(
"Number of documents before:"
<>
show
(
V
.
length
c
svDocs
)
::
Text
)
putStrLn
(
"Mean size of docs:"
<>
show
(
CSV
.
docsSize
c
svDocs
)
::
Text
)
Right
(
h
,
t
svDocs
)
->
do
putStrLn
(
"Number of documents before:"
<>
show
(
V
.
length
t
svDocs
)
::
Text
)
putStrLn
(
"Mean size of docs:"
<>
show
(
TSV
.
docsSize
t
svDocs
)
::
Text
)
let
docs
=
CSV
.
toDocs
c
svDocs
let
docs
=
TSV
.
toDocs
t
svDocs
let
engine
=
S
.
insertDocs
docs
initialDocSearchEngine
let
docIds
=
S
.
query
engine
(
map
pack
q
)
let
docs'
=
C
SV
.
fromDocs
$
filterDocs
docIds
(
V
.
fromList
docs
)
let
docs'
=
T
SV
.
fromDocs
$
filterDocs
docIds
(
V
.
fromList
docs
)
putStrLn
(
"Number of documents after:"
<>
show
(
V
.
length
docs'
)
::
Text
)
putStrLn
(
"Mean size of docs:"
<>
show
(
C
SV
.
docsSize
docs'
)
::
Text
)
putStrLn
(
"Mean size of docs:"
<>
show
(
T
SV
.
docsSize
docs'
)
::
Text
)
C
SV
.
writeFile
wPath
(
h
,
docs'
)
T
SV
.
writeFile
wPath
(
h
,
docs'
)
Left
e
->
panicTrace
$
"Error: "
<>
e
bin/gargantext-cli/Main.hs
View file @
ff0a77cd
...
...
@@ -26,8 +26,8 @@ import Data.Text.Lazy.Encoding qualified as TLE
import
Data.Tuple.Extra
(
both
)
import
Data.Vector
qualified
as
DV
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
(
readCSVFile
,
csv_title
,
csv_abstract
,
c
sv_publication_year
,
fromMIntOrDec
,
defaultYear
)
import
Gargantext.Core.Text.List.Formats.
CSV
(
c
svMapTermList
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
(
readTSVFile
,
tsv_title
,
tsv_abstract
,
t
sv_publication_year
,
fromMIntOrDec
,
defaultYear
)
import
Gargantext.Core.Text.List.Formats.
TSV
(
t
svMapTermList
)
import
Gargantext.Core.Text.Metrics.Count
(
coocOnContexts
,
Coocs
)
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Prelude
...
...
@@ -72,16 +72,16 @@ main = do
[
corpusFile
,
termListFile
,
outputFile
]
<-
getArgs
--corpus :: IO (DM.IntMap [[Text]])
eCorpusFile
<-
read
C
SVFile
corpusFile
eCorpusFile
<-
read
T
SVFile
corpusFile
case
eCorpusFile
of
Right
cf
->
do
let
corpus
=
DM
.
fromListWith
(
<>
)
.
DV
.
toList
.
DV
.
map
(
\
n
->
(
fromMIntOrDec
defaultYear
$
csv_publication_year
n
,
[(
csv_title
n
)
<>
" "
<>
(
c
sv_abstract
n
)]))
.
DV
.
map
(
\
n
->
(
fromMIntOrDec
defaultYear
$
tsv_publication_year
n
,
[(
tsv_title
n
)
<>
" "
<>
(
t
sv_abstract
n
)]))
.
snd
$
cf
-- termListMap :: [Text]
termList
<-
c
svMapTermList
termListFile
termList
<-
t
svMapTermList
termListFile
putText
$
show
$
length
termList
...
...
bin/gargantext-import/Main.hs
View file @
ff0a77cd
...
...
@@ -41,7 +41,7 @@ main = do
let
--tt = (Unsupervised EN 6 0 Nothing)
tt
=
(
Multi
EN
)
format
=
CsvGargV3
-- C
svHal --WOS
format
=
TsvGargV3
-- T
svHal --WOS
limit'
=
case
(
readMaybe
limit
::
Maybe
Limit
)
of
Nothing
->
panicTrace
$
"Cannot read limit: "
<>
(
Text
.
pack
limit
)
Just
l
->
l
...
...
@@ -49,8 +49,8 @@ main = do
mkCorpusUser
=
MkCorpusUserNormalCorpusName
(
UserName
$
cs
user
)
(
cs
name
::
Text
)
corpus
=
flowCorpusFile
mkCorpusUser
limit'
tt
format
Plain
corpusPath
Nothing
DevJobHandle
corpus
C
svHal
::
forall
m
.
(
FlowCmdM
DevEnv
BackendInternalError
m
,
MonadJobStatus
m
,
JobHandle
m
~
DevJobHandle
)
=>
m
CorpusId
corpus
CsvHal
=
flowCorpusFile
mkCorpusUser
limit'
tt
C
svHal
Plain
corpusPath
Nothing
DevJobHandle
corpus
T
svHal
::
forall
m
.
(
FlowCmdM
DevEnv
BackendInternalError
m
,
MonadJobStatus
m
,
JobHandle
m
~
DevJobHandle
)
=>
m
CorpusId
corpus
TsvHal
=
flowCorpusFile
mkCorpusUser
limit'
tt
T
svHal
Plain
corpusPath
Nothing
DevJobHandle
annuaire
::
forall
m
.
(
FlowCmdM
DevEnv
BackendInternalError
m
,
MonadJobStatus
m
,
JobHandle
m
~
DevJobHandle
)
=>
m
CorpusId
annuaire
=
flowAnnuaire
(
MkCorpusUserNormalCorpusName
(
UserName
$
cs
user
)
"Annuaire"
)
(
Multi
EN
)
corpusPath
DevJobHandle
...
...
@@ -71,8 +71,8 @@ main = do
then
runCmdGargDev
env
corpus
else
pure
0
--(cs "false")
_
<-
if
fun
==
"corpus
C
svHal"
then
runCmdGargDev
env
corpus
C
svHal
_
<-
if
fun
==
"corpus
T
svHal"
then
runCmdGargDev
env
corpus
T
svHal
else
pure
0
--(cs "false")
_
<-
if
fun
==
"annuaire"
...
...
bin/gargantext-phylo-profile/Main.hs
View file @
ff0a77cd
...
...
@@ -26,7 +26,7 @@ phyloConfig outdir = PhyloConfig {
corpusPath
=
"corpus.csv"
,
listPath
=
"list.csv"
,
outputPath
=
outdir
,
corpusParser
=
Csv
{
_c
sv_limit
=
150000
}
,
corpusParser
=
Tsv
{
_t
sv_limit
=
150000
}
,
listParser
=
V4
,
phyloName
=
"phylo_profile_test"
,
phyloScale
=
2
...
...
bin/gargantext-phylo/Main.hs
View file @
ff0a77cd
...
...
@@ -31,9 +31,9 @@ import Gargantext.API.Ngrams.Prelude (toTermList)
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
FileType
(
..
),
parseFile
)
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
c
sv'_weight
)
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
qualified
as
C
sv
import
Gargantext.Core.Text.List.Formats.
CSV
(
c
svMapTermList
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
(
tsv_title
,
tsv_abstract
,
tsv_publication_year
,
tsv_publication_month
,
tsv_publication_day
,
tsv'_source
,
tsv'_title
,
tsv'_abstract
,
tsv'_publication_year
,
tsv'_publication_month
,
tsv'_publication_day
,
t
sv'_weight
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
qualified
as
T
sv
import
Gargantext.Core.Text.List.Formats.
TSV
(
t
svMapTermList
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
...
...
bin/gargantext-phylo/Phylo/Common.hs
View file @
ff0a77cd
...
...
@@ -15,9 +15,9 @@ import Gargantext.API.Ngrams.Prelude (toTermList)
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
FileType
(
..
),
parseFile
)
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
c
sv'_weight
)
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
qualified
as
C
sv
import
Gargantext.Core.Text.List.Formats.
CSV
(
c
svMapTermList
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
(
tsv_title
,
tsv_abstract
,
tsv_publication_year
,
tsv_publication_month
,
tsv_publication_day
,
tsv'_source
,
tsv'_title
,
tsv'_abstract
,
tsv'_publication_year
,
tsv'_publication_month
,
tsv'_publication_day
,
t
sv'_weight
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
qualified
as
T
sv
import
Gargantext.Core.Text.List.Formats.
TSV
(
t
svMapTermList
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
...
...
@@ -76,29 +76,29 @@ wosToDocs limit patterns time path = do
<$>
fromRight
[]
<$>
parseFile
WOS
Plain
(
path
<>
file
)
)
files
-- To transform a
C
sv file into a list of Document
c
svToDocs
::
CorpusParser
->
Patterns
->
TimeUnit
->
FilePath
->
IO
[
Document
]
c
svToDocs
parser
patterns
time
path
=
-- To transform a
T
sv file into a list of Document
t
svToDocs
::
CorpusParser
->
Patterns
->
TimeUnit
->
FilePath
->
IO
[
Document
]
t
svToDocs
parser
patterns
time
path
=
case
parser
of
Wos
_
->
Prelude
.
error
"
c
svToDocs: unimplemented"
C
sv
limit
->
Vector
.
toList
Wos
_
->
Prelude
.
error
"
t
svToDocs: unimplemented"
T
sv
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
c
sv_publication_day
row
)
time
)
(
toPhyloDate'
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
c
sv_publication_day
row
)
time
)
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
c
sv_abstract
row
))
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
Tsv
.
fromMIntOrDec
Tsv
.
defaultYear
$
tsv_publication_year
row
)
(
fromMaybe
Tsv
.
defaultMonth
$
tsv_publication_month
row
)
(
fromMaybe
Tsv
.
defaultDay
$
t
sv_publication_day
row
)
time
)
(
toPhyloDate'
(
Tsv
.
fromMIntOrDec
Tsv
.
defaultYear
$
tsv_publication_year
row
)
(
fromMaybe
Tsv
.
defaultMonth
$
tsv_publication_month
row
)
(
fromMaybe
Tsv
.
defaultDay
$
t
sv_publication_day
row
)
time
)
(
termsInText
patterns
$
(
tsv_title
row
)
<>
" "
<>
(
t
sv_abstract
row
))
Nothing
[]
time
)
<$>
snd
<$>
either
(
\
err
->
panicTrace
$
"
CSV error"
<>
(
show
err
))
identity
<$>
Csv
.
readC
SVFile
path
C
sv'
limit
->
Vector
.
toList
)
<$>
snd
<$>
either
(
\
err
->
panicTrace
$
"
TSV error"
<>
(
show
err
))
identity
<$>
Tsv
.
readT
SVFile
path
T
sv'
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
c
sv'_publication_day
row
)
time
)
(
toPhyloDate'
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
c
sv'_publication_day
row
)
time
)
(
termsInText
patterns
$
(
csv'_title
row
)
<>
" "
<>
(
c
sv'_abstract
row
))
(
Just
$
c
sv'_weight
row
)
(
map
(
T
.
strip
.
pack
)
$
splitOn
";"
(
unpack
$
(
c
sv'_source
row
)))
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
tsv'_publication_year
row
)
(
tsv'_publication_month
row
)
(
t
sv'_publication_day
row
)
time
)
(
toPhyloDate'
(
tsv'_publication_year
row
)
(
tsv'_publication_month
row
)
(
t
sv'_publication_day
row
)
time
)
(
termsInText
patterns
$
(
tsv'_title
row
)
<>
" "
<>
(
t
sv'_abstract
row
))
(
Just
$
t
sv'_weight
row
)
(
map
(
T
.
strip
.
pack
)
$
splitOn
";"
(
unpack
$
(
t
sv'_source
row
)))
time
)
<$>
snd
<$>
Csv
.
readWeightedC
sv
path
)
<$>
snd
<$>
Tsv
.
readWeightedT
sv
path
-- To parse a file into a list of Document
...
...
@@ -107,8 +107,8 @@ fileToDocsAdvanced parser path time lst = do
let
patterns
=
buildPatterns
lst
case
parser
of
Wos
limit
->
wosToDocs
limit
patterns
time
path
Csv
_
->
c
svToDocs
parser
patterns
time
path
Csv'
_
->
c
svToDocs
parser
patterns
time
path
Tsv
_
->
t
svToDocs
parser
patterns
time
path
Tsv'
_
->
t
svToDocs
parser
patterns
time
path
fileToDocsDefault
::
CorpusParser
->
FilePath
->
[
TimeUnit
]
->
TermList
->
IO
[
Document
]
fileToDocsDefault
parser
path
timeUnits
lst
=
...
...
@@ -140,7 +140,7 @@ readListV4 path = do
fileToList
::
ListParser
->
FilePath
->
IO
TermList
fileToList
parser
path
=
case
parser
of
V3
->
c
svMapTermList
path
V3
->
t
svMapTermList
path
V4
->
fromJust
<$>
toTermList
MapTerm
NgramsTerms
<$>
readListV4
path
...
...
gargantext.cabal
View file @
ff0a77cd
...
...
@@ -181,13 +181,13 @@ library
Gargantext.Core.Text.Corpus.API.OpenAlex
Gargantext.Core.Text.Corpus.API.Pubmed
Gargantext.Core.Text.Corpus.Parsers
Gargantext.Core.Text.Corpus.Parsers.
C
SV
Gargantext.Core.Text.Corpus.Parsers.
T
SV
Gargantext.Core.Text.Corpus.Parsers.Date
Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
Gargantext.Core.Text.Corpus.Query
Gargantext.Core.Text.List
Gargantext.Core.Text.List.Group.WithStem
Gargantext.Core.Text.List.Formats.
C
SV
Gargantext.Core.Text.List.Formats.
T
SV
Gargantext.Core.Text.Metrics
Gargantext.Core.Text.Metrics.CharByChar
Gargantext.Core.Text.Metrics.Count
...
...
src/Gargantext/API/Admin/EnvTypes.hs
View file @
ff0a77cd
...
...
@@ -91,7 +91,7 @@ data GargJob
=
TableNgramsJob
|
ForgotPasswordJob
|
UpdateNgramsListJobJSON
|
UpdateNgramsListJob
C
SV
|
UpdateNgramsListJob
T
SV
|
AddContactJob
|
AddFileJob
|
DocumentFromWriteNodeJob
...
...
src/Gargantext/API/Ngrams/List.hs
View file @
ff0a77cd
...
...
@@ -18,7 +18,7 @@ module Gargantext.API.Ngrams.List
where
import
Data.ByteString.Lazy
qualified
as
BSL
import
Data.Csv
qualified
as
C
sv
import
Data.Csv
qualified
as
T
sv
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.Map.Strict
(
toList
)
...
...
@@ -57,7 +57,7 @@ getAPI :: Named.GETAPI (AsServerT (GargM Env BackendInternalError))
getAPI
=
Named
.
GETAPI
$
\
listId
->
Named
.
ListEndpoints
{
listJSONEp
=
getJson
listId
,
listJSONZipEp
=
getJsonZip
listId
,
list
CSVEp
=
getC
sv
listId
,
list
TSVEp
=
getT
sv
listId
}
--
...
...
@@ -91,17 +91,17 @@ getJsonZip lId = do
]
)
nlz
get
C
sv
::
HasNodeStory
env
err
m
get
T
sv
::
HasNodeStory
env
err
m
=>
ListId
->
m
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsTableMap
)
get
C
sv
lId
=
do
get
T
sv
lId
=
do
lst
<-
getNgramsList
lId
pure
$
case
Map
.
lookup
NgramsTerms
lst
of
Nothing
->
noHeader
Map
.
empty
Just
(
Versioned
{
_v_data
})
->
addHeader
(
concat
[
"attachment; filename=GarganText_NgramsList-"
,
pack
$
show
(
_NodeId
lId
)
,
".
c
sv"
,
".
t
sv"
]
)
_v_data
...
...
@@ -140,43 +140,43 @@ postAsyncJSON l ngramsList jobHandle = do
--
--
C
SV API
--
T
SV API
--
csvAPI
::
Named
.
C
SVAPI
(
AsServerT
(
GargM
Env
BackendInternalError
))
csvAPI
=
c
svPostAsync
tsvAPI
::
Named
.
T
SVAPI
(
AsServerT
(
GargM
Env
BackendInternalError
))
tsvAPI
=
t
svPostAsync
------------------------------------------------------------------------
csvPostAsync
::
Named
.
C
SVAPI
(
AsServerT
(
GargM
Env
BackendInternalError
))
csvPostAsync
=
Named
.
C
SVAPI
$
\
lId
->
AsyncJobs
$
serveJobsAPI
UpdateNgramsListJob
C
SV
$
\
jHandle
f
->
do
case
ngramsListFrom
C
SVData
(
_wtf_data
f
)
of
tsvPostAsync
::
Named
.
T
SVAPI
(
AsServerT
(
GargM
Env
BackendInternalError
))
tsvPostAsync
=
Named
.
T
SVAPI
$
\
lId
->
AsyncJobs
$
serveJobsAPI
UpdateNgramsListJob
T
SV
$
\
jHandle
f
->
do
case
ngramsListFrom
T
SVData
(
_wtf_data
f
)
of
Left
err
->
serverError
$
err500
{
errReasonPhrase
=
err
}
Right
ngramsList
->
postAsyncJSON
lId
ngramsList
jHandle
-- | Tries converting a text file into an 'NgramList', so that we can reuse the
-- existing JSON endpoint for the
C
SV upload.
ngramsListFrom
C
SVData
::
Text
->
Either
Prelude
.
String
NgramsList
ngramsListFrom
CSVData
csvData
=
case
decodeC
sv
of
-- /NOTE/ The legacy
C
SV data only supports terms in imports and exports, so this is
-- existing JSON endpoint for the
T
SV upload.
ngramsListFrom
T
SVData
::
Text
->
Either
Prelude
.
String
NgramsList
ngramsListFrom
TSVData
tsvData
=
case
decodeT
sv
of
-- /NOTE/ The legacy
T
SV data only supports terms in imports and exports, so this is
-- all we care about.
Left
err
->
Left
$
"Invalid
CSV found in ngramsListFromC
SVData: "
<>
err
Left
err
->
Left
$
"Invalid
TSV found in ngramsListFromT
SVData: "
<>
err
Right
terms
->
pure
$
Map
.
fromList
[
(
NgramsTerms
,
Versioned
0
$
mconcat
.
Vec
.
toList
$
terms
)
]
where
binaryData
=
BSL
.
fromStrict
$
P
.
encodeUtf8
c
svData
binaryData
=
BSL
.
fromStrict
$
P
.
encodeUtf8
t
svData
decode
C
sv
::
Either
Prelude
.
String
(
Vector
NgramsTableMap
)
decode
Csv
=
Csv
.
decodeWithP
c
svToNgramsTableMap
(
Csv
.
defaultDecodeOptions
{
C
sv
.
decDelimiter
=
fromIntegral
(
P
.
ord
'
\t
'
)
})
C
sv
.
HasHeader
decode
T
sv
::
Either
Prelude
.
String
(
Vector
NgramsTableMap
)
decode
Tsv
=
Tsv
.
decodeWithP
t
svToNgramsTableMap
(
Tsv
.
defaultDecodeOptions
{
T
sv
.
decDelimiter
=
fromIntegral
(
P
.
ord
'
\t
'
)
})
T
sv
.
HasHeader
binaryData
-- | Converts a plain
C
SV 'Record' into an NgramsTableMap
csvToNgramsTableMap
::
Csv
.
Record
->
C
sv
.
Parser
NgramsTableMap
c
svToNgramsTableMap
record
=
case
Vec
.
toList
record
of
-- | Converts a plain
T
SV 'Record' into an NgramsTableMap
tsvToNgramsTableMap
::
Tsv
.
Record
->
T
sv
.
Parser
NgramsTableMap
t
svToNgramsTableMap
record
=
case
Vec
.
toList
record
of
(
map
P
.
decodeUtf8
->
[
status
,
label
,
forms
])
->
pure
$
conv
status
label
forms
_
->
Prelude
.
fail
"
c
svToNgramsTableMap failed"
_
->
Prelude
.
fail
"
t
svToNgramsTableMap failed"
where
conv
::
Text
->
Text
->
Text
->
NgramsTableMap
...
...
@@ -199,12 +199,12 @@ csvToNgramsTableMap record = case Vec.toList record of
------------------------------------------------------------------------
-- | This is for debugging the
C
SV parser in the REPL
import
C
svFile
::
forall
env
err
m
.
(
HasNodeStory
env
err
m
,
HasServerError
err
,
MonadJobStatus
m
)
-- | This is for debugging the
T
SV parser in the REPL
import
T
svFile
::
forall
env
err
m
.
(
HasNodeStory
env
err
m
,
HasServerError
err
,
MonadJobStatus
m
)
=>
ListId
->
P
.
FilePath
->
m
()
import
C
svFile
lId
fp
=
do
import
T
svFile
lId
fp
=
do
contents
<-
liftBase
$
P
.
readFile
fp
case
ngramsListFrom
C
SVData
contents
of
case
ngramsListFrom
T
SVData
contents
of
Left
err
->
serverError
$
err500
{
errReasonPhrase
=
err
}
Right
ngramsList
->
postAsyncJSON
lId
ngramsList
(
noJobHandle
@
m
Proxy
)
...
...
src/Gargantext/API/Ngrams/Types.hs
View file @
ff0a77cd
...
...
@@ -24,7 +24,7 @@ import Control.Category ((>>>))
import
Control.Lens
(
makePrisms
,
Iso
'
,
iso
,
from
,
(
.=
),
(
?=
),
(
#
),
to
,
folded
,
{-withIndex, ifolded,-}
view
,
use
,
(
^?
),
(
%~
),
(
%=
),
at
,
_Just
,
Each
(
..
),
itraverse_
,
both
,
forOf_
,
(
?~
),
over
)
import
Data.Aeson
hiding
((
.=
))
import
Data.Csv
(
defaultEncodeOptions
,
encodeByNameWith
,
header
,
namedRecord
,
EncodeOptions
(
..
),
NamedRecord
,
Quoting
(
QuoteNone
))
import
Data.Csv
qualified
as
C
sv
import
Data.Csv
qualified
as
T
sv
import
Data.HashMap.Strict.InsOrd
qualified
as
InsOrdHashMap
import
Data.List
qualified
as
List
import
Data.Map.Strict
qualified
as
Map
...
...
@@ -47,7 +47,7 @@ import Gargantext.Database.Admin.Types.Node (ContextId)
import
Gargantext.Database.Prelude
(
fromField'
,
HasConnectionPool
,
HasConfig
,
CmdM
'
)
import
Gargantext.Prelude
hiding
(
IsString
,
hash
,
from
,
replace
,
to
)
import
Gargantext.Prelude.Crypto.Hash
(
IsHashable
(
..
))
import
Gargantext.Utils.Servant
(
C
SV
,
ZIP
)
import
Gargantext.Utils.Servant
(
T
SV
,
ZIP
)
import
Gargantext.Utils.Zip
(
zipContentsPure
)
import
Servant
(
FromHttpApiData
(
parseUrlPiece
),
ToHttpApiData
(
toUrlPiece
),
Required
,
Strict
,
QueryParam
'
,
MimeRender
(
..
),
MimeUnrender
(
..
))
import
Servant.Job.Utils
(
jsonOptions
)
...
...
@@ -299,10 +299,10 @@ data NgramsSearchQuery = NgramsSearchQuery
type
NgramsTableMap
=
Map
NgramsTerm
NgramsRepoElement
--
C
SV:
--
T
SV:
-- header: status\tlabel\tforms
-- item: map\taccountability\taccounting|&|accoutns|&|account
instance
MimeRender
C
SV
NgramsTableMap
where
instance
MimeRender
T
SV
NgramsTableMap
where
-- mimeRender _ _val = encode ([] :: [(Text, Text)])
mimeRender
_
val
=
encodeByNameWith
encOptions
(
header
[
"status"
,
"label"
,
"forms"
])
$
fn
<$>
Map
.
toList
val
where
...
...
@@ -310,9 +310,9 @@ instance MimeRender CSV NgramsTableMap where
,
encQuoting
=
QuoteNone
}
fn
::
(
NgramsTerm
,
NgramsRepoElement
)
->
NamedRecord
fn
(
NgramsTerm
term
,
NgramsRepoElement
{
_nre_list
,
_nre_children
})
=
namedRecord
[
"status"
C
sv
..=
toText
_nre_list
,
"label"
C
sv
..=
term
,
"forms"
C
sv
..=
T
.
intercalate
"|&|"
(
unNgramsTerm
<$>
mSetToList
_nre_children
)]
namedRecord
[
"status"
T
sv
..=
toText
_nre_list
,
"label"
T
sv
..=
term
,
"forms"
T
sv
..=
T
.
intercalate
"|&|"
(
unNgramsTerm
<$>
mSetToList
_nre_children
)]
toText
::
ListType
->
Text
toText
CandidateTerm
=
"candidate"
toText
MapTerm
=
"map"
...
...
src/Gargantext/API/Node/Corpus/New.hs
View file @
ff0a77cd
...
...
@@ -240,8 +240,8 @@ addToCorpusWithForm user cid nwf jobHandle = do
let
limit
=
fromIntegral
limit'
::
Integer
let
parseC
=
case
(
nwf
^.
wf_filetype
)
of
CSV
->
Parser
.
parseFormatC
Parser
.
C
svGargV3
CSV_HAL
->
Parser
.
parseFormatC
Parser
.
C
svHal
TSV
->
Parser
.
parseFormatC
Parser
.
T
svGargV3
TSV_HAL
->
Parser
.
parseFormatC
Parser
.
T
svHal
Iramuteq
->
Parser
.
parseFormatC
Parser
.
Iramuteq
Istex
->
Parser
.
parseFormatC
Parser
.
Istex
JSON
->
Parser
.
parseFormatC
Parser
.
JSON
...
...
src/Gargantext/API/Node/Corpus/New/Types.hs
View file @
ff0a77cd
...
...
@@ -8,8 +8,8 @@ import Servant
import
Test.QuickCheck
(
elements
)
import
Test.QuickCheck.Arbitrary
(
Arbitrary
,
arbitrary
)
data
FileType
=
C
SV
|
C
SV_HAL
data
FileType
=
T
SV
|
T
SV_HAL
|
Istex
|
PresseRIS
|
WOS
...
...
@@ -17,14 +17,14 @@ data FileType = CSV
|
JSON
deriving
(
Eq
,
Show
,
Generic
)
instance
ToSchema
FileType
instance
Arbitrary
FileType
where
arbitrary
=
elements
[
C
SV
,
PresseRIS
]
instance
Arbitrary
FileType
where
arbitrary
=
elements
[
T
SV
,
PresseRIS
]
instance
ToParamSchema
FileType
instance
FromJSON
FileType
instance
ToJSON
FileType
instance
FromHttpApiData
FileType
where
parseUrlPiece
"
CSV"
=
pure
C
SV
parseUrlPiece
"
CSV_HAL"
=
pure
C
SV_HAL
parseUrlPiece
"
TSV"
=
pure
T
SV
parseUrlPiece
"
TSV_HAL"
=
pure
T
SV_HAL
parseUrlPiece
"Istex"
=
pure
Istex
parseUrlPiece
"PresseRis"
=
pure
PresseRIS
parseUrlPiece
"WOS"
=
pure
WOS
...
...
src/Gargantext/API/Node/Document/Export.hs
View file @
ff0a77cd
...
...
@@ -41,7 +41,7 @@ documentExportAPI :: IsGargServer env err m
documentExportAPI
userNodeId
dId
=
Named
.
DocumentExportAPI
$
Named
.
DocumentExportEndpoints
{
exportJSONEp
=
getDocumentsJSON
userNodeId
dId
,
exportJSONZipEp
=
getDocumentsJSONZip
userNodeId
dId
,
export
CSVEp
=
getDocumentsC
SV
userNodeId
dId
,
export
TSVEp
=
getDocumentsT
SV
userNodeId
dId
}
--------------------------------------------------
...
...
@@ -98,16 +98,16 @@ getDocumentsJSONZip userNodeId pId = do
,
dezFileName
dexpz
,
".zip"
])
dexpz
getDocuments
C
SV
::
NodeId
getDocuments
T
SV
::
NodeId
-- ^ The Node ID of the target user
->
DocId
->
GargNoServer
(
Headers
'[
H
eader
"Content-Disposition"
T
.
Text
]
T
.
Text
)
-- [Document]
getDocuments
C
SV
userNodeId
pId
=
do
getDocuments
T
SV
userNodeId
pId
=
do
dJSON
<-
getDocumentsJSON
userNodeId
pId
let
DocumentExport
{
_de_documents
}
=
getResponse
dJSON
let
ret
=
TE
.
decodeUtf8
$
BSC
.
toStrict
$
encodeDefaultOrderedByNameWith
(
defaultEncodeOptions
{
encDelimiter
=
fromIntegral
$
ord
'
\t
'
,
encQuoting
=
QuoteAll
})
_de_documents
pure
$
addHeader
(
T
.
concat
[
"attachment; filename=GarganText_DocsList-"
,
T
.
pack
$
show
pId
,
".
c
sv"
])
,
".
t
sv"
])
ret
src/Gargantext/API/Node/FrameCalcUpload.hs
View file @
ff0a77cd
...
...
@@ -89,7 +89,7 @@ frameCalcUploadAsync authenticatedUser nId (FrameCalcUpload _wf_lang _wf_selecti
-- FIXME(adn) Audit this conversion.
addToCorpusWithForm
(
RootId
userNodeId
)
cId
(
NewWithForm
{
_wf_filetype
=
C
SV
(
NewWithForm
{
_wf_filetype
=
T
SV
,
_wf_fileformat
=
Plain
,
_wf_data
=
body
,
_wf_lang
...
...
src/Gargantext/API/Routes/Named/Document.hs
View file @
ff0a77cd
...
...
@@ -31,7 +31,7 @@ data DocumentExportEndpoints mode = DocumentExportEndpoints
:>
Get
'[
J
SON
]
(
Headers
'[
S
ervant
.
Header
"Content-Disposition"
Text
]
DocumentExport
)
,
exportJSONZipEp
::
mode
:-
"json.zip"
:>
Get
'[
Z
IP
]
(
Headers
'[
S
ervant
.
Header
"Content-Disposition"
Text
]
DocumentExportZIP
)
,
export
C
SVEp
::
mode
:-
"csv"
,
export
T
SVEp
::
mode
:-
"csv"
:>
Get
'[
P
lainText
]
(
Headers
'[
S
ervant
.
Header
"Content-Disposition"
Text
]
Text
)
}
deriving
Generic
...
...
src/Gargantext/API/Routes/Named/List.hs
View file @
ff0a77cd
...
...
@@ -5,7 +5,7 @@ module Gargantext.API.Routes.Named.List (
GETAPI
(
..
)
,
ListEndpoints
(
..
)
,
JSONAPI
(
..
)
,
C
SVAPI
(
..
)
,
T
SVAPI
(
..
)
)
where
import
Data.Text
(
Text
)
...
...
@@ -30,7 +30,7 @@ data ListEndpoints mode = ListEndpoints
:>
Get
'[
J
SON
,
HTML
]
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsList
)
,
listJSONZipEp
::
mode
:-
"json.zip"
:>
Get
'[
G
US
.
ZIP
]
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsListZIP
)
,
list
CSVEp
::
mode
:-
"csv"
:>
Get
'[
G
US
.
C
SV
]
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsTableMap
)
,
list
TSVEp
::
mode
:-
"csv"
:>
Get
'[
G
US
.
T
SV
]
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsTableMap
)
}
deriving
Generic
newtype
JSONAPI
mode
=
JSONAPI
...
...
@@ -44,8 +44,8 @@ newtype JSONAPI mode = JSONAPI
}
deriving
Generic
newtype
CSVAPI
mode
=
C
SVAPI
{
updateList
CSVEp
::
mode
:-
Summary
"Update List (legacy v3 C
SV)"
newtype
TSVAPI
mode
=
T
SVAPI
{
updateList
TSVEp
::
mode
:-
Summary
"Update List (legacy v3 T
SV)"
:>
"lists"
:>
Capture
"listId"
ListId
:>
"csv"
...
...
src/Gargantext/API/Routes/Named/Private.hs
View file @
ff0a77cd
...
...
@@ -91,7 +91,7 @@ data GargPrivateAPI' mode = GargPrivateAPI'
,
addWithQueryEp
::
mode
:-
NamedRoutes
AddWithQuery
,
listGetAPI
::
mode
:-
NamedRoutes
List
.
GETAPI
,
listJsonAPI
::
mode
:-
NamedRoutes
List
.
JSONAPI
,
list
CsvAPI
::
mode
:-
NamedRoutes
List
.
C
SVAPI
,
list
TsvAPI
::
mode
:-
NamedRoutes
List
.
T
SVAPI
,
shareUrlEp
::
mode
:-
"shareurl"
:>
NamedRoutes
ShareURL
}
deriving
Generic
...
...
src/Gargantext/API/Server/Named/Private.hs
View file @
ff0a77cd
...
...
@@ -63,6 +63,6 @@ serverPrivateGargAPI' authenticatedUser@(AuthenticatedUser userNodeId userId)
,
addWithQueryEp
=
addCorpusWithQuery
(
RootId
userNodeId
)
,
listGetAPI
=
List
.
getAPI
,
listJsonAPI
=
List
.
jsonAPI
,
list
CsvAPI
=
List
.
c
svAPI
,
list
TsvAPI
=
List
.
t
svAPI
,
shareUrlEp
=
shareURL
}
src/Gargantext/Core/Ext/IMT.hs
View file @
ff0a77cd
...
...
@@ -19,7 +19,7 @@ import Data.Morpheus.Types (GQLType)
import
Data.Set
qualified
as
S
import
Data.Text
(
splitOn
)
import
Data.Vector
qualified
as
DV
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
as
C
SV
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
as
T
SV
import
Gargantext.Core.Text.Metrics.Utils
as
Utils
import
Gargantext.Prelude
...
...
@@ -104,9 +104,9 @@ mapIdSchool :: Map Text Text
mapIdSchool
=
M
.
fromList
$
Gargantext
.
Prelude
.
map
(
\
(
School
{
school_shortName
,
school_id
})
->
(
school_id
,
school_shortName
))
schools
hal_data
::
IO
(
Either
Text
(
DV
.
Vector
C
svHal
))
hal_data
::
IO
(
Either
Text
(
DV
.
Vector
T
svHal
))
hal_data
=
do
r
<-
CSV
.
readC
svHal
"doc/corpus_imt/Gargantext_Corpus.csv"
r
<-
TSV
.
readT
svHal
"doc/corpus_imt/Gargantext_Corpus.csv"
pure
$
snd
<$>
r
names
::
S
.
Set
Text
...
...
@@ -117,7 +117,7 @@ toSchoolName t = case M.lookup t mapIdSchool of
Nothing
->
t
Just
t'
->
t'
publisBySchool
::
DV
.
Vector
C
svHal
->
[(
Maybe
Text
,
Int
)]
publisBySchool
::
DV
.
Vector
T
svHal
->
[(
Maybe
Text
,
Int
)]
publisBySchool
hal_data'
=
Gargantext
.
Prelude
.
map
(
\
(
i
,
n
)
->
(
M
.
lookup
i
mapIdSchool
,
n
))
$
DL
.
filter
(
\
i
->
S
.
member
(
fst
i
)
names
)
$
DL
.
reverse
...
...
@@ -126,5 +126,5 @@ publisBySchool hal_data' = Gargantext.Prelude.map (\(i,n) -> (M.lookup i mapIdSc
$
Utils
.
freq
$
DL
.
concat
$
DV
.
toList
$
DV
.
map
(
\
n
->
splitOn
(
", "
)
(
c
svHal_instStructId_i
n
)
)
$
DV
.
filter
(
\
n
->
c
svHal_publication_year
n
==
2017
)
hal_data'
$
DV
.
map
(
\
n
->
splitOn
(
", "
)
(
t
svHal_instStructId_i
n
)
)
$
DV
.
filter
(
\
n
->
t
svHal_publication_year
n
==
2017
)
hal_data'
src/Gargantext/Core/Ext/IMTUser.hs
View file @
ff0a77cd
...
...
@@ -22,7 +22,7 @@ import Data.Csv ( (.:), header, decodeByNameWith, FromNamedRecord(..), Header )
import
Data.Text
qualified
as
T
import
Data.Vector
(
Vector
)
import
Data.Vector
qualified
as
Vector
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
(
c
svDecodeOptions
,
Delimiter
(
Tab
)
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
(
t
svDecodeOptions
,
Delimiter
(
Tab
)
)
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
import
Gargantext.Prelude
import
System.FilePath.Posix
(
takeExtension
)
...
...
@@ -30,7 +30,7 @@ import System.FilePath.Posix (takeExtension)
------------------------------------------------------------------------
readFile_Annuaire
::
FilePath
->
IO
[
HyperdataContact
]
readFile_Annuaire
fp
=
case
takeExtension
fp
of
".csv"
->
read
C
SVFile_Annuaire
fp
".csv"
->
read
T
SVFile_Annuaire
fp
".data"
->
deserialiseImtUsersFromFile
fp
unknownExt
->
panicTrace
$
"[G.C.E.I.readFile_Annuaire] extension unknown: "
<>
T
.
pack
unknownExt
...
...
@@ -69,7 +69,7 @@ data IMTUser = IMTUser
,
date_modification
::
Maybe
Text
}
deriving
(
Eq
,
Show
,
Generic
)
-- |
C
SV instance
-- |
T
SV instance
instance
FromNamedRecord
IMTUser
where
parseNamedRecord
r
=
do
id
<-
r
.:
"id"
...
...
@@ -105,21 +105,21 @@ instance FromNamedRecord IMTUser where
date_modification
<-
r
.:
"date_modification"
pure
$
IMTUser
{
..
}
header
C
SVannuaire
::
Header
header
C
SVannuaire
=
header
T
SVannuaire
::
Header
header
T
SVannuaire
=
header
[
"id"
,
"entite"
,
"mail"
,
"nom"
,
"prenom"
,
"fonction"
,
"fonction2"
,
"tel"
,
"fax"
,
"service"
,
"groupe"
,
"entite2"
,
"service2"
,
"groupe2"
,
"bureau"
,
"url"
,
"pservice"
,
"pfonction"
,
"afonction"
,
"afonction2"
,
"grprech"
,
"appellation"
,
"lieu"
,
"aprecision"
,
"atel"
,
"sexe"
,
"statut"
,
"idutilentite"
,
"actif"
,
"idutilsiecoles"
,
"date_modification"
]
read
C
SVFile_Annuaire
::
FilePath
->
IO
[
HyperdataContact
]
read
C
SVFile_Annuaire
fp
=
do
users
<-
snd
<$>
read
C
SVFile_Annuaire'
fp
read
T
SVFile_Annuaire
::
FilePath
->
IO
[
HyperdataContact
]
read
T
SVFile_Annuaire
fp
=
do
users
<-
snd
<$>
read
T
SVFile_Annuaire'
fp
pure
$
map
imtUser2gargContact
$
Vector
.
toList
users
read
C
SVFile_Annuaire'
::
FilePath
->
IO
(
Header
,
Vector
IMTUser
)
read
CSVFile_Annuaire'
=
fmap
readC
svHalLazyBS'
.
BL
.
readFile
read
T
SVFile_Annuaire'
::
FilePath
->
IO
(
Header
,
Vector
IMTUser
)
read
TSVFile_Annuaire'
=
fmap
readT
svHalLazyBS'
.
BL
.
readFile
where
read
C
svHalLazyBS'
::
BL
.
ByteString
->
(
Header
,
Vector
IMTUser
)
read
CsvHalLazyBS'
bs
=
case
decodeByNameWith
(
c
svDecodeOptions
Tab
)
bs
of
read
T
svHalLazyBS'
::
BL
.
ByteString
->
(
Header
,
Vector
IMTUser
)
read
TsvHalLazyBS'
bs
=
case
decodeByNameWith
(
t
svDecodeOptions
Tab
)
bs
of
Left
e
->
panicTrace
(
cs
e
)
Right
rows
->
rows
...
...
src/Gargantext/Core/Text/Convert.hs
View file @
ff0a77cd
...
...
@@ -13,19 +13,19 @@ Format Converter.
{-# LANGUAGE PackageImports #-}
module
Gargantext.Core.Text.Convert
(
risPress2
c
svWrite
)
module
Gargantext.Core.Text.Convert
(
risPress2
t
svWrite
)
where
import
Gargantext.Core.Text.Corpus.Parsers
(
parseFile
,
FileFormat
(
..
),
FileType
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
(
writeDocs2C
sv
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
(
writeDocs2T
sv
)
import
Gargantext.Prelude
risPress2
c
svWrite
::
FilePath
->
IO
()
risPress2
c
svWrite
f
=
do
risPress2
t
svWrite
::
FilePath
->
IO
()
risPress2
t
svWrite
f
=
do
eContents
<-
parseFile
RisPresse
Plain
(
f
<>
".ris"
)
case
eContents
of
Right
contents
->
writeDocs2
C
sv
(
f
<>
".csv"
)
contents
Right
contents
->
writeDocs2
T
sv
(
f
<>
".csv"
)
contents
Left
e
->
panicTrace
$
"Error: "
<>
e
...
...
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
View file @
ff0a77cd
...
...
@@ -15,13 +15,13 @@ module Gargantext.Core.Text.Corpus.API.Isidore (
get
-- * Internals (possibly unused?)
,
isidore2
c
svFile
,
isidore2
t
svFile
)
where
import
Data.Text
qualified
as
Text
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers
(
cleanText
)
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
(
writeDocs2C
sv
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
(
writeDocs2T
sv
)
import
Gargantext.Core.Text.Corpus.Parsers.Date
qualified
as
Date
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
)
)
import
Gargantext.Defaults
qualified
as
Defaults
...
...
@@ -50,12 +50,12 @@ get lang l q a = do
hDocs
<-
mapM
(
isidoreToDoc
lang
)
(
toIsidoreDocs
iDocs
)
pure
hDocs
isidore2
c
svFile
::
FilePath
->
Lang
->
Maybe
Isidore
.
Limit
isidore2
t
svFile
::
FilePath
->
Lang
->
Maybe
Isidore
.
Limit
->
Maybe
Isidore
.
TextQuery
->
Maybe
Isidore
.
AuthorQuery
->
IO
()
isidore2
c
svFile
fp
lang
li
tq
aq
=
do
isidore2
t
svFile
fp
lang
li
tq
aq
=
do
hdocs
<-
get
lang
li
tq
aq
writeDocs2
C
sv
fp
hdocs
writeDocs2
T
sv
fp
hdocs
isidoreToDoc
::
Lang
->
IsidoreDoc
->
IO
HyperdataDocument
isidoreToDoc
lang
(
IsidoreDoc
t
a
d
u
s
as
)
=
do
...
...
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
ff0a77cd
...
...
@@ -47,7 +47,7 @@ import Data.Text qualified as DT
import
Data.Tuple.Extra
(
both
)
-- , first, second)
import
Gargantext.API.Node.Corpus.New.Types
(
FileFormat
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
(
parseHal
,
parseCsv
,
parseC
svC
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
(
parseHal
,
parseTsv
,
parseT
svC
)
import
Gargantext.Core.Text.Corpus.Parsers.Date
qualified
as
Date
import
Gargantext.Core.Text.Corpus.Parsers.FrameWrite
(
text2titleParagraphs
)
import
Gargantext.Core.Text.Corpus.Parsers.Iramuteq
qualified
as
Iramuteq
...
...
@@ -78,8 +78,8 @@ type ParseError = Text
data
FileType
=
WOS
|
RIS
|
RisPresse
|
C
svGargV3
|
C
svHal
|
T
svGargV3
|
T
svHal
|
Iramuteq
|
JSON
|
Istex
...
...
@@ -109,11 +109,11 @@ parseFormatC ft ff bs0 = first ParseFormatError <$> do_parse ft ff bs0
->
FileFormat
->
DB
.
ByteString
->
m
(
Either
DT
.
Text
(
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
do_parse
C
svGargV3
Plain
bs
=
do
let
eParsedC
=
parse
C
svC
$
DBL
.
fromStrict
bs
do_parse
T
svGargV3
Plain
bs
=
do
let
eParsedC
=
parse
T
svC
$
DBL
.
fromStrict
bs
pure
(
second
(
transPipe
(
pure
.
runIdentity
))
<$>
eParsedC
)
do_parse
C
svHal
Plain
bs
=
do
let
eParsedC
=
parse
C
svC
$
DBL
.
fromStrict
bs
do_parse
T
svHal
Plain
bs
=
do
let
eParsedC
=
parse
T
svC
$
DBL
.
fromStrict
bs
pure
(
second
(
transPipe
(
pure
.
runIdentity
))
<$>
eParsedC
)
do_parse
Istex
Plain
bs
=
do
ep
<-
liftBase
$
parseIstex
EN
$
DBL
.
fromStrict
bs
...
...
@@ -188,8 +188,8 @@ etale = concatMap etale'
-- parseFormat :: FileType -> DB.ByteString -> IO (Either Prelude.String [HyperdataDocument])
-- parseFormat
CsvGargV3 bs = pure $ parseC
sv' $ DBL.fromStrict bs
-- parseFormat
C
svHal bs = pure $ parseHal' $ DBL.fromStrict bs
-- parseFormat
TsvGargV3 bs = pure $ parseT
sv' $ DBL.fromStrict bs
-- parseFormat
T
svHal bs = pure $ parseHal' $ DBL.fromStrict bs
-- parseFormat RisPresse bs = do
-- docs <- mapM (toDoc RIS)
-- <$> snd
...
...
@@ -220,8 +220,8 @@ parseFile :: FileType
->
FileFormat
->
FilePath
->
IO
(
Either
Text
[
HyperdataDocument
])
parseFile
CsvGargV3
Plain
p
=
parseC
sv
p
parseFile
C
svHal
Plain
p
=
parseHal
p
parseFile
TsvGargV3
Plain
p
=
parseT
sv
p
parseFile
T
svHal
Plain
p
=
parseHal
p
parseFile
RisPresse
Plain
p
=
do
docs
<-
join
$
mapM
(
toDoc
RIS
)
<$>
snd
<$>
enrichWith
RisPresse
<$>
readFileWith
RIS
p
pure
$
Right
docs
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Book.hs
View file @
ff0a77cd
...
...
@@ -18,7 +18,7 @@ import Data.ByteString.Lazy qualified as DBL
import
Data.List
qualified
as
List
import
Data.Text
qualified
as
DT
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
(
hyperdataDocument2c
sv
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
(
hyperdataDocument2t
sv
)
import
Gargantext.Core.Text.Corpus.Parsers.FrameWrite
(
text2titleParagraphs
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
...
...
@@ -29,13 +29,13 @@ import System.Directory -- (getDirectoryContents)
type
FileOut
=
FilePath
book2
c
sv
::
Int
->
FileDir
->
FileOut
->
IO
()
book2
c
sv
n
f_in
f_out
=
do
book2
t
sv
::
Int
->
FileDir
->
FileOut
->
IO
()
book2
t
sv
n
f_in
f_out
=
do
files
<-
filesOf
f_in
texts
<-
readPublis
f_in
files
let
publis
=
concatMap
(
file2publi
n
)
texts
let
docs
=
zipWith
publiToHyperdata
[
1
..
]
publis
DBL
.
writeFile
f_out
(
hyperdataDocument2
c
sv
docs
)
DBL
.
writeFile
f_out
(
hyperdataDocument2
t
sv
docs
)
filesOf
::
FileDir
->
IO
[
FilePath
]
filesOf
fd
=
List
.
sort
-- sort by filename
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
View file @
ff0a77cd
...
...
@@ -14,14 +14,14 @@ Json parser to export towoard CSV GargV3 format.
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Core.Text.Corpus.Parsers.Json2Csv
(
json2
c
sv
,
readPatents
)
module
Gargantext.Core.Text.Corpus.Parsers.Json2Csv
(
json2
t
sv
,
readPatents
)
where
import
Data.Aeson
(
decode
)
import
Data.ByteString.Lazy
(
readFile
)
import
Data.Text
(
unpack
)
import
Data.Vector
(
fromList
)
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
(
CsvDoc
(
..
),
writeFile
,
headerC
svGargV3
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
(
TsvDoc
(
..
),
writeFile
,
headerT
svGargV3
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Defaults
qualified
as
Defaults
import
Gargantext.Prelude
hiding
(
readFile
,
writeFile
)
...
...
@@ -41,20 +41,20 @@ readPatents fp = decode <$> readFile fp
type
FilePathIn
=
FilePath
type
FilePathOut
=
FilePath
json2
c
sv
::
FilePathIn
->
FilePathOut
->
IO
()
json2
c
sv
fin
fout
=
do
patents
<-
maybe
(
panicTrace
"json2
c
sv error"
)
identity
<$>
readPatents
fin
writeFile
fout
(
header
CsvGargV3
,
fromList
$
map
patent2c
svDoc
patents
)
patent2
csvDoc
::
Patent
->
C
svDoc
patent2
c
svDoc
(
Patent
{
..
})
=
CsvDoc
{
c
sv_title
=
_patent_title
,
c
sv_source
=
"Source"
,
c
sv_publication_year
=
Just
$
read
(
unpack
_patent_year
)
,
c
sv_publication_month
=
Just
$
Defaults
.
month
,
c
sv_publication_day
=
Just
$
Defaults
.
day
,
c
sv_abstract
=
_patent_abstract
,
c
sv_authors
=
"Authors"
}
json2
t
sv
::
FilePathIn
->
FilePathOut
->
IO
()
json2
t
sv
fin
fout
=
do
patents
<-
maybe
(
panicTrace
"json2
t
sv error"
)
identity
<$>
readPatents
fin
writeFile
fout
(
header
TsvGargV3
,
fromList
$
map
patent2t
svDoc
patents
)
patent2
tsvDoc
::
Patent
->
T
svDoc
patent2
t
svDoc
(
Patent
{
..
})
=
TsvDoc
{
t
sv_title
=
_patent_title
,
t
sv_source
=
"Source"
,
t
sv_publication_year
=
Just
$
read
(
unpack
_patent_year
)
,
t
sv_publication_month
=
Just
$
Defaults
.
month
,
t
sv_publication_day
=
Just
$
Defaults
.
day
,
t
sv_abstract
=
_patent_abstract
,
t
sv_authors
=
"Authors"
}
...
...
src/Gargantext/Core/Text/Corpus/Parsers/
C
SV.hs
→
src/Gargantext/Core/Text/Corpus/Parsers/
T
SV.hs
View file @
ff0a77cd
This diff is collapsed.
Click to expand it.
src/Gargantext/Core/Text/List/Formats/
C
SV.hs
→
src/Gargantext/Core/Text/List/Formats/
T
SV.hs
View file @
ff0a77cd
{-|
Module : Gargantext.Core.Text.List.Formats.
C
SV
Module : Gargantext.Core.Text.List.Formats.
T
SV
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
...
...
@@ -7,12 +7,12 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
C
SV parser for Gargantext corpus files.
T
SV parser for Gargantext corpus files.
-}
module
Gargantext.Core.Text.List.Formats.
C
SV
where
module
Gargantext.Core.Text.List.Formats.
T
SV
where
import
Control.Applicative
import
Data.ByteString.Lazy
qualified
as
BL
...
...
@@ -25,74 +25,74 @@ import Gargantext.Core.Text.Context
import
Gargantext.Prelude
hiding
(
length
)
------------------------------------------------------------------------
c
svMapTermList
::
FilePath
->
IO
TermList
csvMapTermList
fp
=
csv2list
CsvMap
<$>
snd
<$>
fromC
svListFile
fp
t
svMapTermList
::
FilePath
->
IO
TermList
tsvMapTermList
fp
=
tsv2list
TsvMap
<$>
snd
<$>
fromT
svListFile
fp
csv2list
::
CsvListType
->
Vector
C
svList
->
TermList
csv2list
lt
vs
=
V
.
toList
$
V
.
map
(
\
(
C
svList
_
label
forms
)
->
(
DT
.
words
label
,
[
DT
.
words
label
]
<>
(
filter
(
not
.
null
)
.
map
DT
.
words
$
DT
.
splitOn
c
svListFormsDelimiter
forms
)))
$
V
.
filter
(
\
l
->
c
svList_status
l
==
lt
)
vs
tsv2list
::
TsvListType
->
Vector
T
svList
->
TermList
tsv2list
lt
vs
=
V
.
toList
$
V
.
map
(
\
(
T
svList
_
label
forms
)
->
(
DT
.
words
label
,
[
DT
.
words
label
]
<>
(
filter
(
not
.
null
)
.
map
DT
.
words
$
DT
.
splitOn
t
svListFormsDelimiter
forms
)))
$
V
.
filter
(
\
l
->
t
svList_status
l
==
lt
)
vs
------------------------------------------------------------------------
data
CsvListType
=
CsvMap
|
CsvStop
|
C
svCandidate
data
TsvListType
=
TsvMap
|
TsvStop
|
T
svCandidate
deriving
(
Read
,
Show
,
Eq
)
------------------------------------------------------------------------
--
C
SV List Main Configuration
c
svListFieldDelimiter
::
Char
c
svListFieldDelimiter
=
'
\t
'
--
T
SV List Main Configuration
t
svListFieldDelimiter
::
Char
t
svListFieldDelimiter
=
'
\t
'
c
svListFormsDelimiter
::
Text
c
svListFormsDelimiter
=
"|&|"
t
svListFormsDelimiter
::
Text
t
svListFormsDelimiter
=
"|&|"
------------------------------------------------------------------------
data
CsvList
=
C
svList
{
csvList_status
::
!
C
svListType
,
c
svList_label
::
!
Text
,
c
svList_forms
::
!
Text
data
TsvList
=
T
svList
{
tsvList_status
::
!
T
svListType
,
t
svList_label
::
!
Text
,
t
svList_forms
::
!
Text
}
deriving
(
Show
)
------------------------------------------------------------------------
instance
FromNamedRecord
C
svList
where
parseNamedRecord
r
=
C
svList
<$>
r
.:
"status"
instance
FromNamedRecord
T
svList
where
parseNamedRecord
r
=
T
svList
<$>
r
.:
"status"
<*>
r
.:
"label"
<*>
r
.:
"forms"
instance
ToNamedRecord
C
svList
where
toNamedRecord
(
C
svList
s
l
f
)
=
instance
ToNamedRecord
T
svList
where
toNamedRecord
(
T
svList
s
l
f
)
=
namedRecord
[
"status"
.=
s
,
"label"
.=
l
,
"forms"
.=
f
]
------------------------------------------------------------------------
instance
FromField
C
svListType
where
parseField
"map"
=
pure
C
svMap
parseField
"main"
=
pure
C
svCandidate
parseField
"candidate"
=
pure
C
svCandidate
-- backward compat
parseField
"stop"
=
pure
C
svStop
instance
FromField
T
svListType
where
parseField
"map"
=
pure
T
svMap
parseField
"main"
=
pure
T
svCandidate
parseField
"candidate"
=
pure
T
svCandidate
-- backward compat
parseField
"stop"
=
pure
T
svStop
parseField
_
=
mzero
instance
ToField
C
svListType
where
toField
C
svMap
=
"map"
toField
C
svCandidate
=
"main"
toField
C
svStop
=
"stop"
instance
ToField
T
svListType
where
toField
T
svMap
=
"map"
toField
T
svCandidate
=
"main"
toField
T
svStop
=
"stop"
------------------------------------------------------------------------
c
svDecodeOptions
::
DecodeOptions
c
svDecodeOptions
=
(
defaultDecodeOptions
{
decDelimiter
=
fromIntegral
$
ord
c
svListFieldDelimiter
}
t
svDecodeOptions
::
DecodeOptions
t
svDecodeOptions
=
(
defaultDecodeOptions
{
decDelimiter
=
fromIntegral
$
ord
t
svListFieldDelimiter
}
)
c
svEncodeOptions
::
EncodeOptions
c
svEncodeOptions
=
(
defaultEncodeOptions
{
encDelimiter
=
fromIntegral
$
ord
c
svListFieldDelimiter
}
t
svEncodeOptions
::
EncodeOptions
t
svEncodeOptions
=
(
defaultEncodeOptions
{
encDelimiter
=
fromIntegral
$
ord
t
svListFieldDelimiter
}
)
------------------------------------------------------------------------
from
CsvListFile
::
FilePath
->
IO
(
Header
,
Vector
C
svList
)
from
C
svListFile
fp
=
do
c
svData
<-
BL
.
readFile
fp
case
decodeByNameWith
csvDecodeOptions
c
svData
of
from
TsvListFile
::
FilePath
->
IO
(
Header
,
Vector
T
svList
)
from
T
svListFile
fp
=
do
t
svData
<-
BL
.
readFile
fp
case
decodeByNameWith
tsvDecodeOptions
t
svData
of
Left
e
->
panicTrace
(
pack
e
)
Right
csvList
->
pure
c
svList
Right
tsvList
->
pure
t
svList
------------------------------------------------------------------------
to
CsvListFile
::
FilePath
->
(
Header
,
Vector
C
svList
)
->
IO
()
to
C
svListFile
fp
(
h
,
vs
)
=
BL
.
writeFile
fp
$
encodeByNameWith
c
svEncodeOptions
h
(
V
.
toList
vs
)
to
TsvListFile
::
FilePath
->
(
Header
,
Vector
T
svList
)
->
IO
()
to
T
svListFile
fp
(
h
,
vs
)
=
BL
.
writeFile
fp
$
encodeByNameWith
t
svEncodeOptions
h
(
V
.
toList
vs
)
------------------------------------------------------------------------
src/Gargantext/Core/Text/Search.hs
View file @
ff0a77cd
...
...
@@ -7,7 +7,7 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
This search Engine is first made to clean
C
SV file according to a query.
This search Engine is first made to clean
T
SV file according to a query.
Starting from this model, a specific Gargantext engine will be made
(using more metrics scores/features).
...
...
@@ -17,7 +17,7 @@ module Gargantext.Core.Text.Search where
import
Data.Ix
import
Data.SearchEngine
import
Gargantext.Core.Text.Corpus.Parsers.
C
SV
import
Gargantext.Core.Text.Corpus.Parsers.
T
SV
import
Gargantext.Core.Text.Terms.Mono
(
monoTexts
)
import
Gargantext.Core.Text.Terms.Mono.Stem
as
ST
import
Gargantext.Prelude
...
...
@@ -29,7 +29,7 @@ import Gargantext.Prelude
type
DocId
=
Int
type
DocSearchEngine
=
SearchEngine
C
svGargV3
T
svGargV3
DocId
DocField
NoFeatures
...
...
@@ -42,7 +42,7 @@ initialDocSearchEngine :: DocSearchEngine
initialDocSearchEngine
=
initSearchEngine
docSearchConfig
defaultSearchRankParameters
docSearchConfig
::
SearchConfig
C
svGargV3
DocId
DocField
NoFeatures
docSearchConfig
::
SearchConfig
T
svGargV3
DocId
DocField
NoFeatures
docSearchConfig
=
SearchConfig
{
documentKey
=
d_docId
,
...
...
@@ -51,7 +51,7 @@ docSearchConfig =
documentFeatureValue
=
const
noFeatures
}
where
extractTerms
::
C
svGargV3
->
DocField
->
[
Text
]
extractTerms
::
T
svGargV3
->
DocField
->
[
Text
]
extractTerms
doc
TitleField
=
monoTexts
(
d_title
doc
)
extractTerms
doc
AbstractField
=
monoTexts
(
d_abstract
doc
)
...
...
src/Gargantext/Core/Text/Upload.hs
View file @
ff0a77cd
...
...
@@ -13,7 +13,7 @@ where
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Text
as
T
import
qualified
Data.Text.Encoding
as
TE
import
Gargantext.Utils.Servant
(
C
SV
,
Markdown
)
import
Gargantext.Utils.Servant
(
T
SV
,
Markdown
)
import
Network.HTTP.Client
(
newManager
,
Request
(
..
))
import
Network.HTTP.Client.TLS
(
tlsManagerSettings
)
import
Protolude
...
...
@@ -26,10 +26,10 @@ newtype DocId = DocId { fromDocId :: Text }
newtype
Data
=
Data
{
fromData
::
Text
}
data
ContentType
a
=
CTPlain
a
|
CT
C
SV
a
|
CT
T
SV
a
-- TODO SocialCalc, Excel XML ?
instance
MimeRender
C
SV
Data
where
instance
MimeRender
T
SV
Data
where
mimeRender
p
(
Data
d
)
=
mimeRender
p
d
instance
MimeRender
PlainText
Data
where
mimeRender
p
(
Data
d
)
=
mimeRender
p
d
...
...
@@ -49,14 +49,14 @@ type EthercalcAPI =
:>
ReqBody
'[
P
lainText
]
Data
:>
Put
'[
P
lainText
]
Text
--
c
sv
--
t
sv
:<|>
ReqBody
'[
C
SV
]
Data
:>
Post
'[
P
lainText
,
C
SV
]
Text
ReqBody
'[
T
SV
]
Data
:>
Post
'[
P
lainText
,
T
SV
]
Text
:<|>
Capture
"docId"
DocId
:>
ReqBody
'[
C
SV
]
Data
:>
Put
'[
P
lainText
,
C
SV
]
Text
:>
ReqBody
'[
T
SV
]
Data
:>
Put
'[
P
lainText
,
T
SV
]
Text
)
ethercalcAPI
::
Proxy
EthercalcAPI
...
...
@@ -64,16 +64,16 @@ ethercalcAPI = Proxy
ethercalcNewPlain
::
Data
->
ClientM
Text
ethercalcUpdatePlain
::
DocId
->
Data
->
ClientM
Text
ethercalcNew
C
SV
::
Data
->
ClientM
Text
ethercalcUpdate
C
SV
::
DocId
->
Data
->
ClientM
Text
ethercalcNew
T
SV
::
Data
->
ClientM
Text
ethercalcUpdate
T
SV
::
DocId
->
Data
->
ClientM
Text
ethercalcNewPlain
:<|>
ethercalcUpdatePlain
:<|>
ethercalcNew
CSV
:<|>
ethercalcUpdateC
SV
=
client
ethercalcAPI
:<|>
ethercalcNew
TSV
:<|>
ethercalcUpdateT
SV
=
client
ethercalcAPI
------------------------------
-- | Create new or update existing Ethercalc document (depending on
-- `Maybe DocId` constructor). `Data` can be in various formats (
C
SV,
-- `Maybe DocId` constructor). `Data` can be in various formats (
T
SV,
-- etc).
ethercalc
::
Host
->
Maybe
DocId
->
ContentType
Data
->
IO
(
Either
ClientError
Text
)
ethercalc
(
Host
host
)
mDocId
ctD
=
do
...
...
@@ -81,9 +81,9 @@ ethercalc (Host host) mDocId ctD = do
let
env
=
mkClientEnv
manager'
(
BaseUrl
Https
(
T
.
unpack
host
)
443
""
)
case
(
mDocId
,
ctD
)
of
(
Nothing
,
CTPlain
d
)
->
runClientM
(
ethercalcNewPlain
d
)
env
(
Nothing
,
CT
CSV
d
)
->
runClientM
(
ethercalcNewC
SV
d
)
env
(
Nothing
,
CT
TSV
d
)
->
runClientM
(
ethercalcNewT
SV
d
)
env
(
Just
docId
,
CTPlain
d
)
->
runClientM
(
ethercalcUpdatePlain
docId
d
)
env
(
Just
docId
,
CT
CSV
d
)
->
runClientM
(
ethercalcUpdateC
SV
docId
d
)
env
(
Just
docId
,
CT
TSV
d
)
->
runClientM
(
ethercalcUpdateT
SV
docId
d
)
env
-----------------------------------
...
...
src/Gargantext/Core/Viz/Phylo.hs
View file @
ff0a77cd
...
...
@@ -43,8 +43,8 @@ import Test.QuickCheck.Instances.Vector()
data
CorpusParser
=
Wos
{
_wos_limit
::
Int
}
|
Csv
{
_c
sv_limit
::
Int
}
|
Csv'
{
_c
sv'_limit
::
Int
}
|
Tsv
{
_t
sv_limit
::
Int
}
|
Tsv'
{
_t
sv'_limit
::
Int
}
deriving
(
Show
,
Generic
,
Eq
,
ToExpr
)
instance
ToSchema
CorpusParser
where
...
...
@@ -223,7 +223,7 @@ defaultConfig =
PhyloConfig
{
corpusPath
=
"corpus.csv"
-- useful for commandline only
,
listPath
=
"list.csv"
-- useful for commandline only
,
outputPath
=
"data/"
,
corpusParser
=
C
sv
150000
,
corpusParser
=
T
sv
150000
,
listParser
=
V4
,
phyloName
=
pack
"Phylo Name"
,
phyloScale
=
2
...
...
@@ -725,8 +725,8 @@ instance Arbitrary PhyloConfig where
instance
Arbitrary
CorpusParser
where
arbitrary
=
oneof
[
Wos
<$>
arbitrary
,
C
sv
<$>
arbitrary
,
C
sv'
<$>
arbitrary
,
T
sv
<$>
arbitrary
,
T
sv'
<$>
arbitrary
]
instance
Arbitrary
ListParser
where
...
...
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
ff0a77cd
...
...
@@ -344,7 +344,7 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
Fis
_
_
->
let
fis
=
parMap
rpar
(
\
(
prd
,
docs
)
->
case
(
corpusParser
$
getConfig
phylo
)
of
C
sv'
_
->
let
lst
=
toList
T
sv'
_
->
let
lst
=
toList
$
fisWithSizePolyMap'
(
Segment
1
20
)
1
(
map
(
\
d
->
(
ngramsToIdx
(
text
d
)
(
getRoots
phylo
),
(
weight
d
,
(
sourcesToIdx
(
sources
d
)
(
getSources
phylo
)))))
docs
)
in
(
prd
,
map
(
\
f
->
Clustering
(
Set
.
toList
$
fst
f
)
((
fst
.
snd
)
f
)
prd
((
fst
.
snd
.
snd
)
f
)
(((
snd
.
snd
.
snd
)
f
)))
lst
)
_
->
let
lst
=
toList
...
...
src/Gargantext/Database/Admin/Types/Hyperdata/Frame.hs
View file @
ff0a77cd
...
...
@@ -70,8 +70,8 @@ getHyperdataFrameContents (HyperdataFrame { _hf_base, _hf_frame_id }) = do
r
<-
Wreq
.
get
$
T
.
unpack
path
pure
$
decodeUtf8
$
toStrict
$
r
^.
Wreq
.
responseBody
getHyperdataFrame
C
SV
::
HyperdataFrame
->
IO
Text
getHyperdataFrame
C
SV
(
HyperdataFrame
{
_hf_base
,
_hf_frame_id
})
=
do
getHyperdataFrame
T
SV
::
HyperdataFrame
->
IO
Text
getHyperdataFrame
T
SV
(
HyperdataFrame
{
_hf_base
,
_hf_frame_id
})
=
do
let
path
=
T
.
concat
[
_hf_base
,
"/"
,
_hf_frame_id
,
".csv"
]
r
<-
Wreq
.
get
$
T
.
unpack
path
pure
$
decodeUtf8
$
toStrict
$
r
^.
Wreq
.
responseBody
src/Gargantext/Utils/Jobs.hs
View file @
ff0a77cd
...
...
@@ -69,7 +69,7 @@ parseGargJob s = case s of
"tablengrams"
->
Just
TableNgramsJob
"forgotpassword"
->
Just
ForgotPasswordJob
"updatengramslistjson"
->
Just
UpdateNgramsListJobJSON
"updatengramslist
csv"
->
Just
UpdateNgramsListJobC
SV
"updatengramslist
tsv"
->
Just
UpdateNgramsListJobT
SV
"addcontact"
->
Just
AddContactJob
"addfile"
->
Just
AddFileJob
"documentfromwritenode"
->
Just
DocumentFromWriteNodeJob
...
...
src/Gargantext/Utils/Servant.hs
View file @
ff0a77cd
...
...
@@ -21,18 +21,18 @@ import Protolude.Partial (read)
import
Servant
(
Accept
(
contentType
),
MimeRender
(
..
),
MimeUnrender
(
mimeUnrender
)
)
data
CSV
=
C
SV
data
TSV
=
T
SV
instance
Accept
C
SV
where
instance
Accept
T
SV
where
contentType
_
=
"text"
//
"csv"
/:
(
"charset"
,
"utf-8"
)
instance
(
DefaultOrdered
a
,
ToNamedRecord
a
)
=>
MimeRender
C
SV
[
a
]
where
instance
(
DefaultOrdered
a
,
ToNamedRecord
a
)
=>
MimeRender
T
SV
[
a
]
where
mimeRender
_
=
encodeDefaultOrderedByName
instance
MimeRender
C
SV
T
.
Text
where
instance
MimeRender
T
SV
T
.
Text
where
mimeRender
_
=
BSC
.
fromStrict
.
TE
.
encodeUtf8
instance
Read
a
=>
MimeUnrender
C
SV
a
where
instance
Read
a
=>
MimeUnrender
T
SV
a
where
mimeUnrender
_
bs
=
case
BSC
.
take
len
bs
of
"text/csv"
->
pure
.
read
.
BSC
.
unpack
$
BSC
.
drop
len
bs
_
->
Left
"didn't start with the magic incantation"
...
...
@@ -40,7 +40,7 @@ instance Read a => MimeUnrender CSV a where
len
::
Int64
len
=
fromIntegral
$
length
(
"text/csv"
::
Prelude
.
String
)
--instance ToNamedRecord a => MimeRender
C
SV [a] where
--instance ToNamedRecord a => MimeRender
T
SV [a] where
-- mimeRender _ val = encode val
----------------------------
...
...
test/Test/API/UpdateList.hs
View file @
ff0a77cd
...
...
@@ -25,7 +25,7 @@ import Data.Text.IO qualified as TIO
import
Fmt
import
Gargantext.API.Admin.Auth.Types
(
Token
)
import
Gargantext.API.Ngrams
qualified
as
APINgrams
import
Gargantext.API.Ngrams.List
(
ngramsListFrom
C
SVData
)
import
Gargantext.API.Ngrams.List
(
ngramsListFrom
T
SVData
)
import
Gargantext.API.Ngrams.Types
(
MSet
(
..
),
NgramsPatch
(
..
),
NgramsRepoElement
(
..
),
NgramsTablePatch
(
..
),
NgramsTerm
(
..
),
Versioned
(
..
),
mSetToList
,
toNgramsPatch
,
ne_children
,
ne_ngrams
,
vc_data
,
_NgramsTable
)
import
Gargantext.Core.Text.Ngrams
import
Gargantext.Core.Types
(
CorpusId
,
ListId
,
ListType
(
..
),
NodeId
,
_NodeId
)
...
...
@@ -190,7 +190,7 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
it
"parses CSV via ngramsListFromCSVData"
$
\
((
_testEnv
,
_port
),
_app
)
->
do
simpleNgrams
<-
liftIO
(
TIO
.
readFile
=<<
getDataFileName
"test-data/ngrams/simple.csv"
)
ngramsListFrom
C
SVData
simpleNgrams
`
shouldBe
`
ngramsListFrom
T
SVData
simpleNgrams
`
shouldBe
`
Right
(
Map
.
fromList
[
(
NgramsTerms
,
Versioned
0
$
Map
.
fromList
[
(
NgramsTerm
"abelian group"
,
NgramsRepoElement
1
MapTerm
Nothing
Nothing
(
MSet
mempty
))
,
(
NgramsTerm
"brazorf"
,
NgramsRepoElement
1
StopTerm
Nothing
Nothing
(
MSet
mempty
))
...
...
test/Test/Offline/Phylo.hs
View file @
ff0a77cd
...
...
@@ -12,7 +12,7 @@ import Data.GraphViz.Attributes.Complete qualified as Graphviz
import
Data.Text.Lazy
as
TL
import
Data.TreeDiff
import
Data.Vector
qualified
as
V
import
Gargantext.Core.Text.List.Formats.
C
SV
import
Gargantext.Core.Text.List.Formats.
T
SV
import
Gargantext.Core.Types.Phylo
import
Gargantext.Core.Viz.Phylo
hiding
(
EdgeType
(
..
))
import
Gargantext.Core.Viz.Phylo.API.Tools
(
readPhylo
,
phylo2dot2json
)
...
...
@@ -30,7 +30,7 @@ phyloTestConfig = PhyloConfig {
corpusPath
=
"corpus.csv"
,
listPath
=
"list.csv"
,
outputPath
=
"data/"
,
corpusParser
=
Csv
{
_c
sv_limit
=
150000
}
,
corpusParser
=
Tsv
{
_t
sv_limit
=
150000
}
,
listParser
=
V4
,
phyloName
=
"Phylo Name"
,
phyloScale
=
2
...
...
@@ -240,7 +240,7 @@ testToPhyloDeterminism = do
,
listPath
=
listPath'
,
listParser
=
V3
}
mapList
<-
c
svMapTermList
(
listPath
config
)
mapList
<-
t
svMapTermList
(
listPath
config
)
corpus
<-
fileToDocsDefault
(
corpusParser
config
)
(
corpusPath
config
)
[
Year
3
1
5
,
Month
3
1
5
,
Week
4
2
5
]
...
...
Przemyslaw Kaminski
@cgenie
mentioned in commit
5660aec0
·
Oct 08, 2024
mentioned in commit
5660aec0
mentioned in commit 5660aec07ec5a0a0a5468f440092c1a8f57a864e
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment