Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
160
Issues
160
List
Board
Labels
Milestones
Merge Requests
14
Merge Requests
14
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
ff0a77cd
Commit
ff0a77cd
authored
Jun 07, 2024
by
Loïc Chapron
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Change CSV into TSV
parent
49946361
Pipeline
#6205
failed with stages
in 162 minutes and 52 seconds
Changes
36
Pipelines
1
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
36 changed files
with
791 additions
and
204 deletions
+791
-204
CleanCsvCorpus.hs
bin/gargantext-cli/CleanCsvCorpus.hs
+11
-11
Main.hs
bin/gargantext-cli/Main.hs
+5
-5
Main.hs
bin/gargantext-import/Main.hs
+5
-5
Main.hs
bin/gargantext-phylo-profile/Main.hs
+1
-1
Main.hs
bin/gargantext-phylo/Main.hs
+3
-3
Common.hs
bin/gargantext-phylo/Phylo/Common.hs
+22
-22
gargantext.cabal
gargantext.cabal
+2
-2
EnvTypes.hs
src/Gargantext/API/Admin/EnvTypes.hs
+1
-1
List.hs
src/Gargantext/API/Ngrams/List.hs
+30
-30
Types.hs
src/Gargantext/API/Ngrams/Types.hs
+7
-7
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+2
-2
Types.hs
src/Gargantext/API/Node/Corpus/New/Types.hs
+5
-5
Export.hs
src/Gargantext/API/Node/Document/Export.hs
+4
-4
FrameCalcUpload.hs
src/Gargantext/API/Node/FrameCalcUpload.hs
+1
-1
Document.hs
src/Gargantext/API/Routes/Named/Document.hs
+1
-1
List.hs
src/Gargantext/API/Routes/Named/List.hs
+4
-4
Private.hs
src/Gargantext/API/Routes/Named/Private.hs
+1
-1
Private.hs
src/Gargantext/API/Server/Named/Private.hs
+1
-1
IMT.hs
src/Gargantext/Core/Ext/IMT.hs
+6
-6
IMTUser.hs
src/Gargantext/Core/Ext/IMTUser.hs
+12
-12
Convert.hs
src/Gargantext/Core/Text/Convert.hs
+5
-5
Isidore.hs
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
+5
-5
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+11
-11
Book.hs
src/Gargantext/Core/Text/Corpus/Parsers/Book.hs
+4
-4
Json2Csv.hs
src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
+16
-16
TSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/TSV.hs
+489
-0
TSV.hs
src/Gargantext/Core/Text/List/Formats/TSV.hs
+98
-0
Search.hs
src/Gargantext/Core/Text/Search.hs
+5
-5
Upload.hs
src/Gargantext/Core/Text/Upload.hs
+14
-14
Phylo.hs
src/Gargantext/Core/Viz/Phylo.hs
+5
-5
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+1
-1
Frame.hs
src/Gargantext/Database/Admin/Types/Hyperdata/Frame.hs
+2
-2
Jobs.hs
src/Gargantext/Utils/Jobs.hs
+1
-1
Servant.hs
src/Gargantext/Utils/Servant.hs
+6
-6
UpdateList.hs
test/Test/API/UpdateList.hs
+2
-2
Phylo.hs
test/Test/Offline/Phylo.hs
+3
-3
No files found.
bin/gargantext-cli/CleanCsvCorpus.hs
View file @
ff0a77cd
...
...
@@ -19,15 +19,15 @@ import Data.Set qualified as S
import
Data.Text
(
pack
)
import
Data.Vector
(
Vector
)
import
Data.Vector
qualified
as
V
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
qualified
as
C
SV
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
qualified
as
T
SV
import
Gargantext.Core.Text.Search
import
Gargantext.Prelude
------------------------------------------------------------------------
type
Query
=
[
S
.
Term
]
filterDocs
::
[
DocId
]
->
Vector
CSV
.
CsvGargV3
->
Vector
CSV
.
C
svGargV3
filterDocs
docIds
=
V
.
filter
(
\
doc
->
S
.
member
(
C
SV
.
d_docId
doc
)
$
S
.
fromList
docIds
)
filterDocs
::
[
DocId
]
->
Vector
TSV
.
TsvGargV3
->
Vector
TSV
.
T
svGargV3
filterDocs
docIds
=
V
.
filter
(
\
doc
->
S
.
member
(
T
SV
.
d_docId
doc
)
$
S
.
fromList
docIds
)
main
::
IO
()
...
...
@@ -37,19 +37,19 @@ main = do
--let q = ["water", "scarcity", "morocco", "shortage","flood"]
let
q
=
[
"gratuit"
,
"gratuité"
,
"culture"
,
"culturel"
]
eDocs
<-
CSV
.
readC
SVFile
rPath
eDocs
<-
TSV
.
readT
SVFile
rPath
case
eDocs
of
Right
(
h
,
c
svDocs
)
->
do
putStrLn
(
"Number of documents before:"
<>
show
(
V
.
length
c
svDocs
)
::
Text
)
putStrLn
(
"Mean size of docs:"
<>
show
(
CSV
.
docsSize
c
svDocs
)
::
Text
)
Right
(
h
,
t
svDocs
)
->
do
putStrLn
(
"Number of documents before:"
<>
show
(
V
.
length
t
svDocs
)
::
Text
)
putStrLn
(
"Mean size of docs:"
<>
show
(
TSV
.
docsSize
t
svDocs
)
::
Text
)
let
docs
=
CSV
.
toDocs
c
svDocs
let
docs
=
TSV
.
toDocs
t
svDocs
let
engine
=
S
.
insertDocs
docs
initialDocSearchEngine
let
docIds
=
S
.
query
engine
(
map
pack
q
)
let
docs'
=
C
SV
.
fromDocs
$
filterDocs
docIds
(
V
.
fromList
docs
)
let
docs'
=
T
SV
.
fromDocs
$
filterDocs
docIds
(
V
.
fromList
docs
)
putStrLn
(
"Number of documents after:"
<>
show
(
V
.
length
docs'
)
::
Text
)
putStrLn
(
"Mean size of docs:"
<>
show
(
C
SV
.
docsSize
docs'
)
::
Text
)
putStrLn
(
"Mean size of docs:"
<>
show
(
T
SV
.
docsSize
docs'
)
::
Text
)
C
SV
.
writeFile
wPath
(
h
,
docs'
)
T
SV
.
writeFile
wPath
(
h
,
docs'
)
Left
e
->
panicTrace
$
"Error: "
<>
e
bin/gargantext-cli/Main.hs
View file @
ff0a77cd
...
...
@@ -26,8 +26,8 @@ import Data.Text.Lazy.Encoding qualified as TLE
import
Data.Tuple.Extra
(
both
)
import
Data.Vector
qualified
as
DV
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
(
readCSVFile
,
csv_title
,
csv_abstract
,
c
sv_publication_year
,
fromMIntOrDec
,
defaultYear
)
import
Gargantext.Core.Text.List.Formats.
CSV
(
c
svMapTermList
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
(
readTSVFile
,
tsv_title
,
tsv_abstract
,
t
sv_publication_year
,
fromMIntOrDec
,
defaultYear
)
import
Gargantext.Core.Text.List.Formats.
TSV
(
t
svMapTermList
)
import
Gargantext.Core.Text.Metrics.Count
(
coocOnContexts
,
Coocs
)
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Prelude
...
...
@@ -72,16 +72,16 @@ main = do
[
corpusFile
,
termListFile
,
outputFile
]
<-
getArgs
--corpus :: IO (DM.IntMap [[Text]])
eCorpusFile
<-
read
C
SVFile
corpusFile
eCorpusFile
<-
read
T
SVFile
corpusFile
case
eCorpusFile
of
Right
cf
->
do
let
corpus
=
DM
.
fromListWith
(
<>
)
.
DV
.
toList
.
DV
.
map
(
\
n
->
(
fromMIntOrDec
defaultYear
$
csv_publication_year
n
,
[(
csv_title
n
)
<>
" "
<>
(
c
sv_abstract
n
)]))
.
DV
.
map
(
\
n
->
(
fromMIntOrDec
defaultYear
$
tsv_publication_year
n
,
[(
tsv_title
n
)
<>
" "
<>
(
t
sv_abstract
n
)]))
.
snd
$
cf
-- termListMap :: [Text]
termList
<-
c
svMapTermList
termListFile
termList
<-
t
svMapTermList
termListFile
putText
$
show
$
length
termList
...
...
bin/gargantext-import/Main.hs
View file @
ff0a77cd
...
...
@@ -41,7 +41,7 @@ main = do
let
--tt = (Unsupervised EN 6 0 Nothing)
tt
=
(
Multi
EN
)
format
=
CsvGargV3
-- C
svHal --WOS
format
=
TsvGargV3
-- T
svHal --WOS
limit'
=
case
(
readMaybe
limit
::
Maybe
Limit
)
of
Nothing
->
panicTrace
$
"Cannot read limit: "
<>
(
Text
.
pack
limit
)
Just
l
->
l
...
...
@@ -49,8 +49,8 @@ main = do
mkCorpusUser
=
MkCorpusUserNormalCorpusName
(
UserName
$
cs
user
)
(
cs
name
::
Text
)
corpus
=
flowCorpusFile
mkCorpusUser
limit'
tt
format
Plain
corpusPath
Nothing
DevJobHandle
corpus
C
svHal
::
forall
m
.
(
FlowCmdM
DevEnv
BackendInternalError
m
,
MonadJobStatus
m
,
JobHandle
m
~
DevJobHandle
)
=>
m
CorpusId
corpus
CsvHal
=
flowCorpusFile
mkCorpusUser
limit'
tt
C
svHal
Plain
corpusPath
Nothing
DevJobHandle
corpus
T
svHal
::
forall
m
.
(
FlowCmdM
DevEnv
BackendInternalError
m
,
MonadJobStatus
m
,
JobHandle
m
~
DevJobHandle
)
=>
m
CorpusId
corpus
TsvHal
=
flowCorpusFile
mkCorpusUser
limit'
tt
T
svHal
Plain
corpusPath
Nothing
DevJobHandle
annuaire
::
forall
m
.
(
FlowCmdM
DevEnv
BackendInternalError
m
,
MonadJobStatus
m
,
JobHandle
m
~
DevJobHandle
)
=>
m
CorpusId
annuaire
=
flowAnnuaire
(
MkCorpusUserNormalCorpusName
(
UserName
$
cs
user
)
"Annuaire"
)
(
Multi
EN
)
corpusPath
DevJobHandle
...
...
@@ -71,8 +71,8 @@ main = do
then
runCmdGargDev
env
corpus
else
pure
0
--(cs "false")
_
<-
if
fun
==
"corpus
C
svHal"
then
runCmdGargDev
env
corpus
C
svHal
_
<-
if
fun
==
"corpus
T
svHal"
then
runCmdGargDev
env
corpus
T
svHal
else
pure
0
--(cs "false")
_
<-
if
fun
==
"annuaire"
...
...
bin/gargantext-phylo-profile/Main.hs
View file @
ff0a77cd
...
...
@@ -26,7 +26,7 @@ phyloConfig outdir = PhyloConfig {
corpusPath
=
"corpus.csv"
,
listPath
=
"list.csv"
,
outputPath
=
outdir
,
corpusParser
=
Csv
{
_c
sv_limit
=
150000
}
,
corpusParser
=
Tsv
{
_t
sv_limit
=
150000
}
,
listParser
=
V4
,
phyloName
=
"phylo_profile_test"
,
phyloScale
=
2
...
...
bin/gargantext-phylo/Main.hs
View file @
ff0a77cd
...
...
@@ -31,9 +31,9 @@ import Gargantext.API.Ngrams.Prelude (toTermList)
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
FileType
(
..
),
parseFile
)
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
c
sv'_weight
)
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
qualified
as
C
sv
import
Gargantext.Core.Text.List.Formats.
CSV
(
c
svMapTermList
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
(
tsv_title
,
tsv_abstract
,
tsv_publication_year
,
tsv_publication_month
,
tsv_publication_day
,
tsv'_source
,
tsv'_title
,
tsv'_abstract
,
tsv'_publication_year
,
tsv'_publication_month
,
tsv'_publication_day
,
t
sv'_weight
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
qualified
as
T
sv
import
Gargantext.Core.Text.List.Formats.
TSV
(
t
svMapTermList
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
...
...
bin/gargantext-phylo/Phylo/Common.hs
View file @
ff0a77cd
...
...
@@ -15,9 +15,9 @@ import Gargantext.API.Ngrams.Prelude (toTermList)
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
FileType
(
..
),
parseFile
)
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
c
sv'_weight
)
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
qualified
as
C
sv
import
Gargantext.Core.Text.List.Formats.
CSV
(
c
svMapTermList
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
(
tsv_title
,
tsv_abstract
,
tsv_publication_year
,
tsv_publication_month
,
tsv_publication_day
,
tsv'_source
,
tsv'_title
,
tsv'_abstract
,
tsv'_publication_year
,
tsv'_publication_month
,
tsv'_publication_day
,
t
sv'_weight
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
qualified
as
T
sv
import
Gargantext.Core.Text.List.Formats.
TSV
(
t
svMapTermList
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
...
...
@@ -76,29 +76,29 @@ wosToDocs limit patterns time path = do
<$>
fromRight
[]
<$>
parseFile
WOS
Plain
(
path
<>
file
)
)
files
-- To transform a
C
sv file into a list of Document
c
svToDocs
::
CorpusParser
->
Patterns
->
TimeUnit
->
FilePath
->
IO
[
Document
]
c
svToDocs
parser
patterns
time
path
=
-- To transform a
T
sv file into a list of Document
t
svToDocs
::
CorpusParser
->
Patterns
->
TimeUnit
->
FilePath
->
IO
[
Document
]
t
svToDocs
parser
patterns
time
path
=
case
parser
of
Wos
_
->
Prelude
.
error
"
c
svToDocs: unimplemented"
C
sv
limit
->
Vector
.
toList
Wos
_
->
Prelude
.
error
"
t
svToDocs: unimplemented"
T
sv
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
c
sv_publication_day
row
)
time
)
(
toPhyloDate'
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
c
sv_publication_day
row
)
time
)
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
c
sv_abstract
row
))
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
Tsv
.
fromMIntOrDec
Tsv
.
defaultYear
$
tsv_publication_year
row
)
(
fromMaybe
Tsv
.
defaultMonth
$
tsv_publication_month
row
)
(
fromMaybe
Tsv
.
defaultDay
$
t
sv_publication_day
row
)
time
)
(
toPhyloDate'
(
Tsv
.
fromMIntOrDec
Tsv
.
defaultYear
$
tsv_publication_year
row
)
(
fromMaybe
Tsv
.
defaultMonth
$
tsv_publication_month
row
)
(
fromMaybe
Tsv
.
defaultDay
$
t
sv_publication_day
row
)
time
)
(
termsInText
patterns
$
(
tsv_title
row
)
<>
" "
<>
(
t
sv_abstract
row
))
Nothing
[]
time
)
<$>
snd
<$>
either
(
\
err
->
panicTrace
$
"
CSV error"
<>
(
show
err
))
identity
<$>
Csv
.
readC
SVFile
path
C
sv'
limit
->
Vector
.
toList
)
<$>
snd
<$>
either
(
\
err
->
panicTrace
$
"
TSV error"
<>
(
show
err
))
identity
<$>
Tsv
.
readT
SVFile
path
T
sv'
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
c
sv'_publication_day
row
)
time
)
(
toPhyloDate'
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
c
sv'_publication_day
row
)
time
)
(
termsInText
patterns
$
(
csv'_title
row
)
<>
" "
<>
(
c
sv'_abstract
row
))
(
Just
$
c
sv'_weight
row
)
(
map
(
T
.
strip
.
pack
)
$
splitOn
";"
(
unpack
$
(
c
sv'_source
row
)))
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
tsv'_publication_year
row
)
(
tsv'_publication_month
row
)
(
t
sv'_publication_day
row
)
time
)
(
toPhyloDate'
(
tsv'_publication_year
row
)
(
tsv'_publication_month
row
)
(
t
sv'_publication_day
row
)
time
)
(
termsInText
patterns
$
(
tsv'_title
row
)
<>
" "
<>
(
t
sv'_abstract
row
))
(
Just
$
t
sv'_weight
row
)
(
map
(
T
.
strip
.
pack
)
$
splitOn
";"
(
unpack
$
(
t
sv'_source
row
)))
time
)
<$>
snd
<$>
Csv
.
readWeightedC
sv
path
)
<$>
snd
<$>
Tsv
.
readWeightedT
sv
path
-- To parse a file into a list of Document
...
...
@@ -107,8 +107,8 @@ fileToDocsAdvanced parser path time lst = do
let
patterns
=
buildPatterns
lst
case
parser
of
Wos
limit
->
wosToDocs
limit
patterns
time
path
Csv
_
->
c
svToDocs
parser
patterns
time
path
Csv'
_
->
c
svToDocs
parser
patterns
time
path
Tsv
_
->
t
svToDocs
parser
patterns
time
path
Tsv'
_
->
t
svToDocs
parser
patterns
time
path
fileToDocsDefault
::
CorpusParser
->
FilePath
->
[
TimeUnit
]
->
TermList
->
IO
[
Document
]
fileToDocsDefault
parser
path
timeUnits
lst
=
...
...
@@ -140,7 +140,7 @@ readListV4 path = do
fileToList
::
ListParser
->
FilePath
->
IO
TermList
fileToList
parser
path
=
case
parser
of
V3
->
c
svMapTermList
path
V3
->
t
svMapTermList
path
V4
->
fromJust
<$>
toTermList
MapTerm
NgramsTerms
<$>
readListV4
path
...
...
gargantext.cabal
View file @
ff0a77cd
...
...
@@ -181,13 +181,13 @@ library
Gargantext.Core.Text.Corpus.API.OpenAlex
Gargantext.Core.Text.Corpus.API.Pubmed
Gargantext.Core.Text.Corpus.Parsers
Gargantext.Core.Text.Corpus.Parsers.
C
SV
Gargantext.Core.Text.Corpus.Parsers.
T
SV
Gargantext.Core.Text.Corpus.Parsers.Date
Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
Gargantext.Core.Text.Corpus.Query
Gargantext.Core.Text.List
Gargantext.Core.Text.List.Group.WithStem
Gargantext.Core.Text.List.Formats.
C
SV
Gargantext.Core.Text.List.Formats.
T
SV
Gargantext.Core.Text.Metrics
Gargantext.Core.Text.Metrics.CharByChar
Gargantext.Core.Text.Metrics.Count
...
...
src/Gargantext/API/Admin/EnvTypes.hs
View file @
ff0a77cd
...
...
@@ -91,7 +91,7 @@ data GargJob
=
TableNgramsJob
|
ForgotPasswordJob
|
UpdateNgramsListJobJSON
|
UpdateNgramsListJob
C
SV
|
UpdateNgramsListJob
T
SV
|
AddContactJob
|
AddFileJob
|
DocumentFromWriteNodeJob
...
...
src/Gargantext/API/Ngrams/List.hs
View file @
ff0a77cd
...
...
@@ -18,7 +18,7 @@ module Gargantext.API.Ngrams.List
where
import
Data.ByteString.Lazy
qualified
as
BSL
import
Data.Csv
qualified
as
C
sv
import
Data.Csv
qualified
as
T
sv
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.Map.Strict
(
toList
)
...
...
@@ -57,7 +57,7 @@ getAPI :: Named.GETAPI (AsServerT (GargM Env BackendInternalError))
getAPI
=
Named
.
GETAPI
$
\
listId
->
Named
.
ListEndpoints
{
listJSONEp
=
getJson
listId
,
listJSONZipEp
=
getJsonZip
listId
,
list
CSVEp
=
getC
sv
listId
,
list
TSVEp
=
getT
sv
listId
}
--
...
...
@@ -91,17 +91,17 @@ getJsonZip lId = do
]
)
nlz
get
C
sv
::
HasNodeStory
env
err
m
get
T
sv
::
HasNodeStory
env
err
m
=>
ListId
->
m
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsTableMap
)
get
C
sv
lId
=
do
get
T
sv
lId
=
do
lst
<-
getNgramsList
lId
pure
$
case
Map
.
lookup
NgramsTerms
lst
of
Nothing
->
noHeader
Map
.
empty
Just
(
Versioned
{
_v_data
})
->
addHeader
(
concat
[
"attachment; filename=GarganText_NgramsList-"
,
pack
$
show
(
_NodeId
lId
)
,
".
c
sv"
,
".
t
sv"
]
)
_v_data
...
...
@@ -140,43 +140,43 @@ postAsyncJSON l ngramsList jobHandle = do
--
--
C
SV API
--
T
SV API
--
csvAPI
::
Named
.
C
SVAPI
(
AsServerT
(
GargM
Env
BackendInternalError
))
csvAPI
=
c
svPostAsync
tsvAPI
::
Named
.
T
SVAPI
(
AsServerT
(
GargM
Env
BackendInternalError
))
tsvAPI
=
t
svPostAsync
------------------------------------------------------------------------
csvPostAsync
::
Named
.
C
SVAPI
(
AsServerT
(
GargM
Env
BackendInternalError
))
csvPostAsync
=
Named
.
C
SVAPI
$
\
lId
->
AsyncJobs
$
serveJobsAPI
UpdateNgramsListJob
C
SV
$
\
jHandle
f
->
do
case
ngramsListFrom
C
SVData
(
_wtf_data
f
)
of
tsvPostAsync
::
Named
.
T
SVAPI
(
AsServerT
(
GargM
Env
BackendInternalError
))
tsvPostAsync
=
Named
.
T
SVAPI
$
\
lId
->
AsyncJobs
$
serveJobsAPI
UpdateNgramsListJob
T
SV
$
\
jHandle
f
->
do
case
ngramsListFrom
T
SVData
(
_wtf_data
f
)
of
Left
err
->
serverError
$
err500
{
errReasonPhrase
=
err
}
Right
ngramsList
->
postAsyncJSON
lId
ngramsList
jHandle
-- | Tries converting a text file into an 'NgramList', so that we can reuse the
-- existing JSON endpoint for the
C
SV upload.
ngramsListFrom
C
SVData
::
Text
->
Either
Prelude
.
String
NgramsList
ngramsListFrom
CSVData
csvData
=
case
decodeC
sv
of
-- /NOTE/ The legacy
C
SV data only supports terms in imports and exports, so this is
-- existing JSON endpoint for the
T
SV upload.
ngramsListFrom
T
SVData
::
Text
->
Either
Prelude
.
String
NgramsList
ngramsListFrom
TSVData
tsvData
=
case
decodeT
sv
of
-- /NOTE/ The legacy
T
SV data only supports terms in imports and exports, so this is
-- all we care about.
Left
err
->
Left
$
"Invalid
CSV found in ngramsListFromC
SVData: "
<>
err
Left
err
->
Left
$
"Invalid
TSV found in ngramsListFromT
SVData: "
<>
err
Right
terms
->
pure
$
Map
.
fromList
[
(
NgramsTerms
,
Versioned
0
$
mconcat
.
Vec
.
toList
$
terms
)
]
where
binaryData
=
BSL
.
fromStrict
$
P
.
encodeUtf8
c
svData
binaryData
=
BSL
.
fromStrict
$
P
.
encodeUtf8
t
svData
decode
C
sv
::
Either
Prelude
.
String
(
Vector
NgramsTableMap
)
decode
Csv
=
Csv
.
decodeWithP
c
svToNgramsTableMap
(
Csv
.
defaultDecodeOptions
{
C
sv
.
decDelimiter
=
fromIntegral
(
P
.
ord
'
\t
'
)
})
C
sv
.
HasHeader
decode
T
sv
::
Either
Prelude
.
String
(
Vector
NgramsTableMap
)
decode
Tsv
=
Tsv
.
decodeWithP
t
svToNgramsTableMap
(
Tsv
.
defaultDecodeOptions
{
T
sv
.
decDelimiter
=
fromIntegral
(
P
.
ord
'
\t
'
)
})
T
sv
.
HasHeader
binaryData
-- | Converts a plain
C
SV 'Record' into an NgramsTableMap
csvToNgramsTableMap
::
Csv
.
Record
->
C
sv
.
Parser
NgramsTableMap
c
svToNgramsTableMap
record
=
case
Vec
.
toList
record
of
-- | Converts a plain
T
SV 'Record' into an NgramsTableMap
tsvToNgramsTableMap
::
Tsv
.
Record
->
T
sv
.
Parser
NgramsTableMap
t
svToNgramsTableMap
record
=
case
Vec
.
toList
record
of
(
map
P
.
decodeUtf8
->
[
status
,
label
,
forms
])
->
pure
$
conv
status
label
forms
_
->
Prelude
.
fail
"
c
svToNgramsTableMap failed"
_
->
Prelude
.
fail
"
t
svToNgramsTableMap failed"
where
conv
::
Text
->
Text
->
Text
->
NgramsTableMap
...
...
@@ -199,12 +199,12 @@ csvToNgramsTableMap record = case Vec.toList record of
------------------------------------------------------------------------
-- | This is for debugging the
C
SV parser in the REPL
import
C
svFile
::
forall
env
err
m
.
(
HasNodeStory
env
err
m
,
HasServerError
err
,
MonadJobStatus
m
)
-- | This is for debugging the
T
SV parser in the REPL
import
T
svFile
::
forall
env
err
m
.
(
HasNodeStory
env
err
m
,
HasServerError
err
,
MonadJobStatus
m
)
=>
ListId
->
P
.
FilePath
->
m
()
import
C
svFile
lId
fp
=
do
import
T
svFile
lId
fp
=
do
contents
<-
liftBase
$
P
.
readFile
fp
case
ngramsListFrom
C
SVData
contents
of
case
ngramsListFrom
T
SVData
contents
of
Left
err
->
serverError
$
err500
{
errReasonPhrase
=
err
}
Right
ngramsList
->
postAsyncJSON
lId
ngramsList
(
noJobHandle
@
m
Proxy
)
...
...
src/Gargantext/API/Ngrams/Types.hs
View file @
ff0a77cd
...
...
@@ -24,7 +24,7 @@ import Control.Category ((>>>))
import
Control.Lens
(
makePrisms
,
Iso
'
,
iso
,
from
,
(
.=
),
(
?=
),
(
#
),
to
,
folded
,
{-withIndex, ifolded,-}
view
,
use
,
(
^?
),
(
%~
),
(
%=
),
at
,
_Just
,
Each
(
..
),
itraverse_
,
both
,
forOf_
,
(
?~
),
over
)
import
Data.Aeson
hiding
((
.=
))
import
Data.Csv
(
defaultEncodeOptions
,
encodeByNameWith
,
header
,
namedRecord
,
EncodeOptions
(
..
),
NamedRecord
,
Quoting
(
QuoteNone
))
import
Data.Csv
qualified
as
C
sv
import
Data.Csv
qualified
as
T
sv
import
Data.HashMap.Strict.InsOrd
qualified
as
InsOrdHashMap
import
Data.List
qualified
as
List
import
Data.Map.Strict
qualified
as
Map
...
...
@@ -47,7 +47,7 @@ import Gargantext.Database.Admin.Types.Node (ContextId)
import
Gargantext.Database.Prelude
(
fromField'
,
HasConnectionPool
,
HasConfig
,
CmdM
'
)
import
Gargantext.Prelude
hiding
(
IsString
,
hash
,
from
,
replace
,
to
)
import
Gargantext.Prelude.Crypto.Hash
(
IsHashable
(
..
))
import
Gargantext.Utils.Servant
(
C
SV
,
ZIP
)
import
Gargantext.Utils.Servant
(
T
SV
,
ZIP
)
import
Gargantext.Utils.Zip
(
zipContentsPure
)
import
Servant
(
FromHttpApiData
(
parseUrlPiece
),
ToHttpApiData
(
toUrlPiece
),
Required
,
Strict
,
QueryParam
'
,
MimeRender
(
..
),
MimeUnrender
(
..
))
import
Servant.Job.Utils
(
jsonOptions
)
...
...
@@ -299,10 +299,10 @@ data NgramsSearchQuery = NgramsSearchQuery
type
NgramsTableMap
=
Map
NgramsTerm
NgramsRepoElement
--
C
SV:
--
T
SV:
-- header: status\tlabel\tforms
-- item: map\taccountability\taccounting|&|accoutns|&|account
instance
MimeRender
C
SV
NgramsTableMap
where
instance
MimeRender
T
SV
NgramsTableMap
where
-- mimeRender _ _val = encode ([] :: [(Text, Text)])
mimeRender
_
val
=
encodeByNameWith
encOptions
(
header
[
"status"
,
"label"
,
"forms"
])
$
fn
<$>
Map
.
toList
val
where
...
...
@@ -310,9 +310,9 @@ instance MimeRender CSV NgramsTableMap where
,
encQuoting
=
QuoteNone
}
fn
::
(
NgramsTerm
,
NgramsRepoElement
)
->
NamedRecord
fn
(
NgramsTerm
term
,
NgramsRepoElement
{
_nre_list
,
_nre_children
})
=
namedRecord
[
"status"
C
sv
..=
toText
_nre_list
,
"label"
C
sv
..=
term
,
"forms"
C
sv
..=
T
.
intercalate
"|&|"
(
unNgramsTerm
<$>
mSetToList
_nre_children
)]
namedRecord
[
"status"
T
sv
..=
toText
_nre_list
,
"label"
T
sv
..=
term
,
"forms"
T
sv
..=
T
.
intercalate
"|&|"
(
unNgramsTerm
<$>
mSetToList
_nre_children
)]
toText
::
ListType
->
Text
toText
CandidateTerm
=
"candidate"
toText
MapTerm
=
"map"
...
...
src/Gargantext/API/Node/Corpus/New.hs
View file @
ff0a77cd
...
...
@@ -240,8 +240,8 @@ addToCorpusWithForm user cid nwf jobHandle = do
let
limit
=
fromIntegral
limit'
::
Integer
let
parseC
=
case
(
nwf
^.
wf_filetype
)
of
CSV
->
Parser
.
parseFormatC
Parser
.
C
svGargV3
CSV_HAL
->
Parser
.
parseFormatC
Parser
.
C
svHal
TSV
->
Parser
.
parseFormatC
Parser
.
T
svGargV3
TSV_HAL
->
Parser
.
parseFormatC
Parser
.
T
svHal
Iramuteq
->
Parser
.
parseFormatC
Parser
.
Iramuteq
Istex
->
Parser
.
parseFormatC
Parser
.
Istex
JSON
->
Parser
.
parseFormatC
Parser
.
JSON
...
...
src/Gargantext/API/Node/Corpus/New/Types.hs
View file @
ff0a77cd
...
...
@@ -8,8 +8,8 @@ import Servant
import
Test.QuickCheck
(
elements
)
import
Test.QuickCheck.Arbitrary
(
Arbitrary
,
arbitrary
)
data
FileType
=
C
SV
|
C
SV_HAL
data
FileType
=
T
SV
|
T
SV_HAL
|
Istex
|
PresseRIS
|
WOS
...
...
@@ -17,14 +17,14 @@ data FileType = CSV
|
JSON
deriving
(
Eq
,
Show
,
Generic
)
instance
ToSchema
FileType
instance
Arbitrary
FileType
where
arbitrary
=
elements
[
C
SV
,
PresseRIS
]
instance
Arbitrary
FileType
where
arbitrary
=
elements
[
T
SV
,
PresseRIS
]
instance
ToParamSchema
FileType
instance
FromJSON
FileType
instance
ToJSON
FileType
instance
FromHttpApiData
FileType
where
parseUrlPiece
"
CSV"
=
pure
C
SV
parseUrlPiece
"
CSV_HAL"
=
pure
C
SV_HAL
parseUrlPiece
"
TSV"
=
pure
T
SV
parseUrlPiece
"
TSV_HAL"
=
pure
T
SV_HAL
parseUrlPiece
"Istex"
=
pure
Istex
parseUrlPiece
"PresseRis"
=
pure
PresseRIS
parseUrlPiece
"WOS"
=
pure
WOS
...
...
src/Gargantext/API/Node/Document/Export.hs
View file @
ff0a77cd
...
...
@@ -41,7 +41,7 @@ documentExportAPI :: IsGargServer env err m
documentExportAPI
userNodeId
dId
=
Named
.
DocumentExportAPI
$
Named
.
DocumentExportEndpoints
{
exportJSONEp
=
getDocumentsJSON
userNodeId
dId
,
exportJSONZipEp
=
getDocumentsJSONZip
userNodeId
dId
,
export
CSVEp
=
getDocumentsC
SV
userNodeId
dId
,
export
TSVEp
=
getDocumentsT
SV
userNodeId
dId
}
--------------------------------------------------
...
...
@@ -98,16 +98,16 @@ getDocumentsJSONZip userNodeId pId = do
,
dezFileName
dexpz
,
".zip"
])
dexpz
getDocuments
C
SV
::
NodeId
getDocuments
T
SV
::
NodeId
-- ^ The Node ID of the target user
->
DocId
->
GargNoServer
(
Headers
'[
H
eader
"Content-Disposition"
T
.
Text
]
T
.
Text
)
-- [Document]
getDocuments
C
SV
userNodeId
pId
=
do
getDocuments
T
SV
userNodeId
pId
=
do
dJSON
<-
getDocumentsJSON
userNodeId
pId
let
DocumentExport
{
_de_documents
}
=
getResponse
dJSON
let
ret
=
TE
.
decodeUtf8
$
BSC
.
toStrict
$
encodeDefaultOrderedByNameWith
(
defaultEncodeOptions
{
encDelimiter
=
fromIntegral
$
ord
'
\t
'
,
encQuoting
=
QuoteAll
})
_de_documents
pure
$
addHeader
(
T
.
concat
[
"attachment; filename=GarganText_DocsList-"
,
T
.
pack
$
show
pId
,
".
c
sv"
])
,
".
t
sv"
])
ret
src/Gargantext/API/Node/FrameCalcUpload.hs
View file @
ff0a77cd
...
...
@@ -89,7 +89,7 @@ frameCalcUploadAsync authenticatedUser nId (FrameCalcUpload _wf_lang _wf_selecti
-- FIXME(adn) Audit this conversion.
addToCorpusWithForm
(
RootId
userNodeId
)
cId
(
NewWithForm
{
_wf_filetype
=
C
SV
(
NewWithForm
{
_wf_filetype
=
T
SV
,
_wf_fileformat
=
Plain
,
_wf_data
=
body
,
_wf_lang
...
...
src/Gargantext/API/Routes/Named/Document.hs
View file @
ff0a77cd
...
...
@@ -31,7 +31,7 @@ data DocumentExportEndpoints mode = DocumentExportEndpoints
:>
Get
'[
J
SON
]
(
Headers
'[
S
ervant
.
Header
"Content-Disposition"
Text
]
DocumentExport
)
,
exportJSONZipEp
::
mode
:-
"json.zip"
:>
Get
'[
Z
IP
]
(
Headers
'[
S
ervant
.
Header
"Content-Disposition"
Text
]
DocumentExportZIP
)
,
export
C
SVEp
::
mode
:-
"csv"
,
export
T
SVEp
::
mode
:-
"csv"
:>
Get
'[
P
lainText
]
(
Headers
'[
S
ervant
.
Header
"Content-Disposition"
Text
]
Text
)
}
deriving
Generic
...
...
src/Gargantext/API/Routes/Named/List.hs
View file @
ff0a77cd
...
...
@@ -5,7 +5,7 @@ module Gargantext.API.Routes.Named.List (
GETAPI
(
..
)
,
ListEndpoints
(
..
)
,
JSONAPI
(
..
)
,
C
SVAPI
(
..
)
,
T
SVAPI
(
..
)
)
where
import
Data.Text
(
Text
)
...
...
@@ -30,7 +30,7 @@ data ListEndpoints mode = ListEndpoints
:>
Get
'[
J
SON
,
HTML
]
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsList
)
,
listJSONZipEp
::
mode
:-
"json.zip"
:>
Get
'[
G
US
.
ZIP
]
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsListZIP
)
,
list
CSVEp
::
mode
:-
"csv"
:>
Get
'[
G
US
.
C
SV
]
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsTableMap
)
,
list
TSVEp
::
mode
:-
"csv"
:>
Get
'[
G
US
.
T
SV
]
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsTableMap
)
}
deriving
Generic
newtype
JSONAPI
mode
=
JSONAPI
...
...
@@ -44,8 +44,8 @@ newtype JSONAPI mode = JSONAPI
}
deriving
Generic
newtype
CSVAPI
mode
=
C
SVAPI
{
updateList
CSVEp
::
mode
:-
Summary
"Update List (legacy v3 C
SV)"
newtype
TSVAPI
mode
=
T
SVAPI
{
updateList
TSVEp
::
mode
:-
Summary
"Update List (legacy v3 T
SV)"
:>
"lists"
:>
Capture
"listId"
ListId
:>
"csv"
...
...
src/Gargantext/API/Routes/Named/Private.hs
View file @
ff0a77cd
...
...
@@ -91,7 +91,7 @@ data GargPrivateAPI' mode = GargPrivateAPI'
,
addWithQueryEp
::
mode
:-
NamedRoutes
AddWithQuery
,
listGetAPI
::
mode
:-
NamedRoutes
List
.
GETAPI
,
listJsonAPI
::
mode
:-
NamedRoutes
List
.
JSONAPI
,
list
CsvAPI
::
mode
:-
NamedRoutes
List
.
C
SVAPI
,
list
TsvAPI
::
mode
:-
NamedRoutes
List
.
T
SVAPI
,
shareUrlEp
::
mode
:-
"shareurl"
:>
NamedRoutes
ShareURL
}
deriving
Generic
...
...
src/Gargantext/API/Server/Named/Private.hs
View file @
ff0a77cd
...
...
@@ -63,6 +63,6 @@ serverPrivateGargAPI' authenticatedUser@(AuthenticatedUser userNodeId userId)
,
addWithQueryEp
=
addCorpusWithQuery
(
RootId
userNodeId
)
,
listGetAPI
=
List
.
getAPI
,
listJsonAPI
=
List
.
jsonAPI
,
list
CsvAPI
=
List
.
c
svAPI
,
list
TsvAPI
=
List
.
t
svAPI
,
shareUrlEp
=
shareURL
}
src/Gargantext/Core/Ext/IMT.hs
View file @
ff0a77cd
...
...
@@ -19,7 +19,7 @@ import Data.Morpheus.Types (GQLType)
import
Data.Set
qualified
as
S
import
Data.Text
(
splitOn
)
import
Data.Vector
qualified
as
DV
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
as
C
SV
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
as
T
SV
import
Gargantext.Core.Text.Metrics.Utils
as
Utils
import
Gargantext.Prelude
...
...
@@ -104,9 +104,9 @@ mapIdSchool :: Map Text Text
mapIdSchool
=
M
.
fromList
$
Gargantext
.
Prelude
.
map
(
\
(
School
{
school_shortName
,
school_id
})
->
(
school_id
,
school_shortName
))
schools
hal_data
::
IO
(
Either
Text
(
DV
.
Vector
C
svHal
))
hal_data
::
IO
(
Either
Text
(
DV
.
Vector
T
svHal
))
hal_data
=
do
r
<-
CSV
.
readC
svHal
"doc/corpus_imt/Gargantext_Corpus.csv"
r
<-
TSV
.
readT
svHal
"doc/corpus_imt/Gargantext_Corpus.csv"
pure
$
snd
<$>
r
names
::
S
.
Set
Text
...
...
@@ -117,7 +117,7 @@ toSchoolName t = case M.lookup t mapIdSchool of
Nothing
->
t
Just
t'
->
t'
publisBySchool
::
DV
.
Vector
C
svHal
->
[(
Maybe
Text
,
Int
)]
publisBySchool
::
DV
.
Vector
T
svHal
->
[(
Maybe
Text
,
Int
)]
publisBySchool
hal_data'
=
Gargantext
.
Prelude
.
map
(
\
(
i
,
n
)
->
(
M
.
lookup
i
mapIdSchool
,
n
))
$
DL
.
filter
(
\
i
->
S
.
member
(
fst
i
)
names
)
$
DL
.
reverse
...
...
@@ -126,5 +126,5 @@ publisBySchool hal_data' = Gargantext.Prelude.map (\(i,n) -> (M.lookup i mapIdSc
$
Utils
.
freq
$
DL
.
concat
$
DV
.
toList
$
DV
.
map
(
\
n
->
splitOn
(
", "
)
(
c
svHal_instStructId_i
n
)
)
$
DV
.
filter
(
\
n
->
c
svHal_publication_year
n
==
2017
)
hal_data'
$
DV
.
map
(
\
n
->
splitOn
(
", "
)
(
t
svHal_instStructId_i
n
)
)
$
DV
.
filter
(
\
n
->
t
svHal_publication_year
n
==
2017
)
hal_data'
src/Gargantext/Core/Ext/IMTUser.hs
View file @
ff0a77cd
...
...
@@ -22,7 +22,7 @@ import Data.Csv ( (.:), header, decodeByNameWith, FromNamedRecord(..), Header )
import
Data.Text
qualified
as
T
import
Data.Vector
(
Vector
)
import
Data.Vector
qualified
as
Vector
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
(
c
svDecodeOptions
,
Delimiter
(
Tab
)
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
(
t
svDecodeOptions
,
Delimiter
(
Tab
)
)
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
import
Gargantext.Prelude
import
System.FilePath.Posix
(
takeExtension
)
...
...
@@ -30,7 +30,7 @@ import System.FilePath.Posix (takeExtension)
------------------------------------------------------------------------
readFile_Annuaire
::
FilePath
->
IO
[
HyperdataContact
]
readFile_Annuaire
fp
=
case
takeExtension
fp
of
".csv"
->
read
C
SVFile_Annuaire
fp
".csv"
->
read
T
SVFile_Annuaire
fp
".data"
->
deserialiseImtUsersFromFile
fp
unknownExt
->
panicTrace
$
"[G.C.E.I.readFile_Annuaire] extension unknown: "
<>
T
.
pack
unknownExt
...
...
@@ -69,7 +69,7 @@ data IMTUser = IMTUser
,
date_modification
::
Maybe
Text
}
deriving
(
Eq
,
Show
,
Generic
)
-- |
C
SV instance
-- |
T
SV instance
instance
FromNamedRecord
IMTUser
where
parseNamedRecord
r
=
do
id
<-
r
.:
"id"
...
...
@@ -105,21 +105,21 @@ instance FromNamedRecord IMTUser where
date_modification
<-
r
.:
"date_modification"
pure
$
IMTUser
{
..
}
header
C
SVannuaire
::
Header
header
C
SVannuaire
=
header
T
SVannuaire
::
Header
header
T
SVannuaire
=
header
[
"id"
,
"entite"
,
"mail"
,
"nom"
,
"prenom"
,
"fonction"
,
"fonction2"
,
"tel"
,
"fax"
,
"service"
,
"groupe"
,
"entite2"
,
"service2"
,
"groupe2"
,
"bureau"
,
"url"
,
"pservice"
,
"pfonction"
,
"afonction"
,
"afonction2"
,
"grprech"
,
"appellation"
,
"lieu"
,
"aprecision"
,
"atel"
,
"sexe"
,
"statut"
,
"idutilentite"
,
"actif"
,
"idutilsiecoles"
,
"date_modification"
]
read
C
SVFile_Annuaire
::
FilePath
->
IO
[
HyperdataContact
]
read
C
SVFile_Annuaire
fp
=
do
users
<-
snd
<$>
read
C
SVFile_Annuaire'
fp
read
T
SVFile_Annuaire
::
FilePath
->
IO
[
HyperdataContact
]
read
T
SVFile_Annuaire
fp
=
do
users
<-
snd
<$>
read
T
SVFile_Annuaire'
fp
pure
$
map
imtUser2gargContact
$
Vector
.
toList
users
read
C
SVFile_Annuaire'
::
FilePath
->
IO
(
Header
,
Vector
IMTUser
)
read
CSVFile_Annuaire'
=
fmap
readC
svHalLazyBS'
.
BL
.
readFile
read
T
SVFile_Annuaire'
::
FilePath
->
IO
(
Header
,
Vector
IMTUser
)
read
TSVFile_Annuaire'
=
fmap
readT
svHalLazyBS'
.
BL
.
readFile
where
read
C
svHalLazyBS'
::
BL
.
ByteString
->
(
Header
,
Vector
IMTUser
)
read
CsvHalLazyBS'
bs
=
case
decodeByNameWith
(
c
svDecodeOptions
Tab
)
bs
of
read
T
svHalLazyBS'
::
BL
.
ByteString
->
(
Header
,
Vector
IMTUser
)
read
TsvHalLazyBS'
bs
=
case
decodeByNameWith
(
t
svDecodeOptions
Tab
)
bs
of
Left
e
->
panicTrace
(
cs
e
)
Right
rows
->
rows
...
...
src/Gargantext/Core/Text/Convert.hs
View file @
ff0a77cd
...
...
@@ -13,19 +13,19 @@ Format Converter.
{-# LANGUAGE PackageImports #-}
module
Gargantext.Core.Text.Convert
(
risPress2
c
svWrite
)
module
Gargantext.Core.Text.Convert
(
risPress2
t
svWrite
)
where
import
Gargantext.Core.Text.Corpus.Parsers
(
parseFile
,
FileFormat
(
..
),
FileType
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
(
writeDocs2C
sv
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
(
writeDocs2T
sv
)
import
Gargantext.Prelude
risPress2
c
svWrite
::
FilePath
->
IO
()
risPress2
c
svWrite
f
=
do
risPress2
t
svWrite
::
FilePath
->
IO
()
risPress2
t
svWrite
f
=
do
eContents
<-
parseFile
RisPresse
Plain
(
f
<>
".ris"
)
case
eContents
of
Right
contents
->
writeDocs2
C
sv
(
f
<>
".csv"
)
contents
Right
contents
->
writeDocs2
T
sv
(
f
<>
".csv"
)
contents
Left
e
->
panicTrace
$
"Error: "
<>
e
...
...
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
View file @
ff0a77cd
...
...
@@ -15,13 +15,13 @@ module Gargantext.Core.Text.Corpus.API.Isidore (
get
-- * Internals (possibly unused?)
,
isidore2
c
svFile
,
isidore2
t
svFile
)
where
import
Data.Text
qualified
as
Text
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers
(
cleanText
)
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
(
writeDocs2C
sv
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
(
writeDocs2T
sv
)
import
Gargantext.Core.Text.Corpus.Parsers.Date
qualified
as
Date
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
)
)
import
Gargantext.Defaults
qualified
as
Defaults
...
...
@@ -50,12 +50,12 @@ get lang l q a = do
hDocs
<-
mapM
(
isidoreToDoc
lang
)
(
toIsidoreDocs
iDocs
)
pure
hDocs
isidore2
c
svFile
::
FilePath
->
Lang
->
Maybe
Isidore
.
Limit
isidore2
t
svFile
::
FilePath
->
Lang
->
Maybe
Isidore
.
Limit
->
Maybe
Isidore
.
TextQuery
->
Maybe
Isidore
.
AuthorQuery
->
IO
()
isidore2
c
svFile
fp
lang
li
tq
aq
=
do
isidore2
t
svFile
fp
lang
li
tq
aq
=
do
hdocs
<-
get
lang
li
tq
aq
writeDocs2
C
sv
fp
hdocs
writeDocs2
T
sv
fp
hdocs
isidoreToDoc
::
Lang
->
IsidoreDoc
->
IO
HyperdataDocument
isidoreToDoc
lang
(
IsidoreDoc
t
a
d
u
s
as
)
=
do
...
...
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
ff0a77cd
...
...
@@ -47,7 +47,7 @@ import Data.Text qualified as DT
import
Data.Tuple.Extra
(
both
)
-- , first, second)
import
Gargantext.API.Node.Corpus.New.Types
(
FileFormat
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
(
parseHal
,
parseCsv
,
parseC
svC
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
(
parseHal
,
parseTsv
,
parseT
svC
)
import
Gargantext.Core.Text.Corpus.Parsers.Date
qualified
as
Date
import
Gargantext.Core.Text.Corpus.Parsers.FrameWrite
(
text2titleParagraphs
)
import
Gargantext.Core.Text.Corpus.Parsers.Iramuteq
qualified
as
Iramuteq
...
...
@@ -78,8 +78,8 @@ type ParseError = Text
data
FileType
=
WOS
|
RIS
|
RisPresse
|
C
svGargV3
|
C
svHal
|
T
svGargV3
|
T
svHal
|
Iramuteq
|
JSON
|
Istex
...
...
@@ -109,11 +109,11 @@ parseFormatC ft ff bs0 = first ParseFormatError <$> do_parse ft ff bs0
->
FileFormat
->
DB
.
ByteString
->
m
(
Either
DT
.
Text
(
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
do_parse
C
svGargV3
Plain
bs
=
do
let
eParsedC
=
parse
C
svC
$
DBL
.
fromStrict
bs
do_parse
T
svGargV3
Plain
bs
=
do
let
eParsedC
=
parse
T
svC
$
DBL
.
fromStrict
bs
pure
(
second
(
transPipe
(
pure
.
runIdentity
))
<$>
eParsedC
)
do_parse
C
svHal
Plain
bs
=
do
let
eParsedC
=
parse
C
svC
$
DBL
.
fromStrict
bs
do_parse
T
svHal
Plain
bs
=
do
let
eParsedC
=
parse
T
svC
$
DBL
.
fromStrict
bs
pure
(
second
(
transPipe
(
pure
.
runIdentity
))
<$>
eParsedC
)
do_parse
Istex
Plain
bs
=
do
ep
<-
liftBase
$
parseIstex
EN
$
DBL
.
fromStrict
bs
...
...
@@ -188,8 +188,8 @@ etale = concatMap etale'
-- parseFormat :: FileType -> DB.ByteString -> IO (Either Prelude.String [HyperdataDocument])
-- parseFormat
CsvGargV3 bs = pure $ parseC
sv' $ DBL.fromStrict bs
-- parseFormat
C
svHal bs = pure $ parseHal' $ DBL.fromStrict bs
-- parseFormat
TsvGargV3 bs = pure $ parseT
sv' $ DBL.fromStrict bs
-- parseFormat
T
svHal bs = pure $ parseHal' $ DBL.fromStrict bs
-- parseFormat RisPresse bs = do
-- docs <- mapM (toDoc RIS)
-- <$> snd
...
...
@@ -220,8 +220,8 @@ parseFile :: FileType
->
FileFormat
->
FilePath
->
IO
(
Either
Text
[
HyperdataDocument
])
parseFile
CsvGargV3
Plain
p
=
parseC
sv
p
parseFile
C
svHal
Plain
p
=
parseHal
p
parseFile
TsvGargV3
Plain
p
=
parseT
sv
p
parseFile
T
svHal
Plain
p
=
parseHal
p
parseFile
RisPresse
Plain
p
=
do
docs
<-
join
$
mapM
(
toDoc
RIS
)
<$>
snd
<$>
enrichWith
RisPresse
<$>
readFileWith
RIS
p
pure
$
Right
docs
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Book.hs
View file @
ff0a77cd
...
...
@@ -18,7 +18,7 @@ import Data.ByteString.Lazy qualified as DBL
import
Data.List
qualified
as
List
import
Data.Text
qualified
as
DT
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
(
hyperdataDocument2c
sv
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
(
hyperdataDocument2t
sv
)
import
Gargantext.Core.Text.Corpus.Parsers.FrameWrite
(
text2titleParagraphs
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
...
...
@@ -29,13 +29,13 @@ import System.Directory -- (getDirectoryContents)
type
FileOut
=
FilePath
book2
c
sv
::
Int
->
FileDir
->
FileOut
->
IO
()
book2
c
sv
n
f_in
f_out
=
do
book2
t
sv
::
Int
->
FileDir
->
FileOut
->
IO
()
book2
t
sv
n
f_in
f_out
=
do
files
<-
filesOf
f_in
texts
<-
readPublis
f_in
files
let
publis
=
concatMap
(
file2publi
n
)
texts
let
docs
=
zipWith
publiToHyperdata
[
1
..
]
publis
DBL
.
writeFile
f_out
(
hyperdataDocument2
c
sv
docs
)
DBL
.
writeFile
f_out
(
hyperdataDocument2
t
sv
docs
)
filesOf
::
FileDir
->
IO
[
FilePath
]
filesOf
fd
=
List
.
sort
-- sort by filename
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
View file @
ff0a77cd
...
...
@@ -14,14 +14,14 @@ Json parser to export towoard CSV GargV3 format.
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Core.Text.Corpus.Parsers.Json2Csv
(
json2
c
sv
,
readPatents
)
module
Gargantext.Core.Text.Corpus.Parsers.Json2Csv
(
json2
t
sv
,
readPatents
)
where
import
Data.Aeson
(
decode
)
import
Data.ByteString.Lazy
(
readFile
)
import
Data.Text
(
unpack
)
import
Data.Vector
(
fromList
)
import
Gargantext.Core.Text.Corpus.Parsers.
CSV
(
CsvDoc
(
..
),
writeFile
,
headerC
svGargV3
)
import
Gargantext.Core.Text.Corpus.Parsers.
TSV
(
TsvDoc
(
..
),
writeFile
,
headerT
svGargV3
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Defaults
qualified
as
Defaults
import
Gargantext.Prelude
hiding
(
readFile
,
writeFile
)
...
...
@@ -41,20 +41,20 @@ readPatents fp = decode <$> readFile fp
type
FilePathIn
=
FilePath
type
FilePathOut
=
FilePath
json2
c
sv
::
FilePathIn
->
FilePathOut
->
IO
()
json2
c
sv
fin
fout
=
do
patents
<-
maybe
(
panicTrace
"json2
c
sv error"
)
identity
<$>
readPatents
fin
writeFile
fout
(
header
CsvGargV3
,
fromList
$
map
patent2c
svDoc
patents
)
patent2
csvDoc
::
Patent
->
C
svDoc
patent2
c
svDoc
(
Patent
{
..
})
=
CsvDoc
{
c
sv_title
=
_patent_title
,
c
sv_source
=
"Source"
,
c
sv_publication_year
=
Just
$
read
(
unpack
_patent_year
)
,
c
sv_publication_month
=
Just
$
Defaults
.
month
,
c
sv_publication_day
=
Just
$
Defaults
.
day
,
c
sv_abstract
=
_patent_abstract
,
c
sv_authors
=
"Authors"
}
json2
t
sv
::
FilePathIn
->
FilePathOut
->
IO
()
json2
t
sv
fin
fout
=
do
patents
<-
maybe
(
panicTrace
"json2
t
sv error"
)
identity
<$>
readPatents
fin
writeFile
fout
(
header
TsvGargV3
,
fromList
$
map
patent2t
svDoc
patents
)
patent2
tsvDoc
::
Patent
->
T
svDoc
patent2
t
svDoc
(
Patent
{
..
})
=
TsvDoc
{
t
sv_title
=
_patent_title
,
t
sv_source
=
"Source"
,
t
sv_publication_year
=
Just
$
read
(
unpack
_patent_year
)
,
t
sv_publication_month
=
Just
$
Defaults
.
month
,
t
sv_publication_day
=
Just
$
Defaults
.
day
,
t
sv_abstract
=
_patent_abstract
,
t
sv_authors
=
"Authors"
}
...
...
src/Gargantext/Core/Text/Corpus/Parsers/
C
SV.hs
→
src/Gargantext/Core/Text/Corpus/Parsers/
T
SV.hs
View file @
ff0a77cd
This diff is collapsed.
Click to expand it.
src/Gargantext/Core/Text/List/Formats/
C
SV.hs
→
src/Gargantext/Core/Text/List/Formats/
T
SV.hs
View file @
ff0a77cd
{-|
Module : Gargantext.Core.Text.List.Formats.
C
SV
Module : Gargantext.Core.Text.List.Formats.
T
SV
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
...
...
@@ -7,12 +7,12 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
C
SV parser for Gargantext corpus files.
T
SV parser for Gargantext corpus files.
-}
module
Gargantext.Core.Text.List.Formats.
C
SV
where
module
Gargantext.Core.Text.List.Formats.
T
SV
where
import
Control.Applicative
import
Data.ByteString.Lazy
qualified
as
BL
...
...
@@ -25,74 +25,74 @@ import Gargantext.Core.Text.Context
import
Gargantext.Prelude
hiding
(
length
)
------------------------------------------------------------------------
c
svMapTermList
::
FilePath
->
IO
TermList
csvMapTermList
fp
=
csv2list
CsvMap
<$>
snd
<$>
fromC
svListFile
fp
t
svMapTermList
::
FilePath
->
IO
TermList
tsvMapTermList
fp
=
tsv2list
TsvMap
<$>
snd
<$>
fromT
svListFile
fp
csv2list
::
CsvListType
->
Vector
C
svList
->
TermList
csv2list
lt
vs
=
V
.
toList
$
V
.
map
(
\
(
C
svList
_
label
forms
)
->
(
DT
.
words
label
,
[
DT
.
words
label
]
<>
(
filter
(
not
.
null
)
.
map
DT
.
words
$
DT
.
splitOn
c
svListFormsDelimiter
forms
)))
$
V
.
filter
(
\
l
->
c
svList_status
l
==
lt
)
vs
tsv2list
::
TsvListType
->
Vector
T
svList
->
TermList
tsv2list
lt
vs
=
V
.
toList
$
V
.
map
(
\
(
T
svList
_
label
forms
)
->
(
DT
.
words
label
,
[
DT
.
words
label
]
<>
(
filter
(
not
.
null
)
.
map
DT
.
words
$
DT
.
splitOn
t
svListFormsDelimiter
forms
)))
$
V
.
filter
(
\
l
->
t
svList_status
l
==
lt
)
vs
------------------------------------------------------------------------
data
CsvListType
=
CsvMap
|
CsvStop
|
C
svCandidate
data
TsvListType
=
TsvMap
|
TsvStop
|
T
svCandidate
deriving
(
Read
,
Show
,
Eq
)
------------------------------------------------------------------------
--
C
SV List Main Configuration
c
svListFieldDelimiter
::
Char
c
svListFieldDelimiter
=
'
\t
'
--
T
SV List Main Configuration
t
svListFieldDelimiter
::
Char
t
svListFieldDelimiter
=
'
\t
'
c
svListFormsDelimiter
::
Text
c
svListFormsDelimiter
=
"|&|"
t
svListFormsDelimiter
::
Text
t
svListFormsDelimiter
=
"|&|"
------------------------------------------------------------------------
data
CsvList
=
C
svList
{
csvList_status
::
!
C
svListType
,
c
svList_label
::
!
Text
,
c
svList_forms
::
!
Text
data
TsvList
=
T
svList
{
tsvList_status
::
!
T
svListType
,
t
svList_label
::
!
Text
,
t
svList_forms
::
!
Text
}
deriving
(
Show
)
------------------------------------------------------------------------
instance
FromNamedRecord
C
svList
where
parseNamedRecord
r
=
C
svList
<$>
r
.:
"status"
instance
FromNamedRecord
T
svList
where
parseNamedRecord
r
=
T
svList
<$>
r
.:
"status"
<*>
r
.:
"label"
<*>
r
.:
"forms"
instance
ToNamedRecord
C
svList
where
toNamedRecord
(
C
svList
s
l
f
)
=
instance
ToNamedRecord
T
svList
where
toNamedRecord
(
T
svList
s
l
f
)
=
namedRecord
[
"status"
.=
s
,
"label"
.=
l
,
"forms"
.=
f
]
------------------------------------------------------------------------
instance
FromField
C
svListType
where
parseField
"map"
=
pure
C
svMap
parseField
"main"
=
pure
C
svCandidate
parseField
"candidate"
=
pure
C
svCandidate
-- backward compat
parseField
"stop"
=
pure
C
svStop
instance
FromField
T
svListType
where
parseField
"map"
=
pure
T
svMap
parseField
"main"
=
pure
T
svCandidate
parseField
"candidate"
=
pure
T
svCandidate
-- backward compat
parseField
"stop"
=
pure
T
svStop
parseField
_
=
mzero
instance
ToField
C
svListType
where
toField
C
svMap
=
"map"
toField
C
svCandidate
=
"main"
toField
C
svStop
=
"stop"
instance
ToField
T
svListType
where
toField
T
svMap
=
"map"
toField
T
svCandidate
=
"main"
toField
T
svStop
=
"stop"
------------------------------------------------------------------------
c
svDecodeOptions
::
DecodeOptions
c
svDecodeOptions
=
(
defaultDecodeOptions
{
decDelimiter
=
fromIntegral
$
ord
c
svListFieldDelimiter
}
t
svDecodeOptions
::
DecodeOptions
t
svDecodeOptions
=
(
defaultDecodeOptions
{
decDelimiter
=
fromIntegral
$
ord
t
svListFieldDelimiter
}
)
c
svEncodeOptions
::
EncodeOptions
c
svEncodeOptions
=
(
defaultEncodeOptions
{
encDelimiter
=
fromIntegral
$
ord
c
svListFieldDelimiter
}
t
svEncodeOptions
::
EncodeOptions
t
svEncodeOptions
=
(
defaultEncodeOptions
{
encDelimiter
=
fromIntegral
$
ord
t
svListFieldDelimiter
}
)
------------------------------------------------------------------------
from
CsvListFile
::
FilePath
->
IO
(
Header
,
Vector
C
svList
)
from
C
svListFile
fp
=
do
c
svData
<-
BL
.
readFile
fp
case
decodeByNameWith
csvDecodeOptions
c
svData
of
from
TsvListFile
::
FilePath
->
IO
(
Header
,
Vector
T
svList
)
from
T
svListFile
fp
=
do
t
svData
<-
BL
.
readFile
fp
case
decodeByNameWith
tsvDecodeOptions
t
svData
of
Left
e
->
panicTrace
(
pack
e
)
Right
csvList
->
pure
c
svList
Right
tsvList
->
pure
t
svList
------------------------------------------------------------------------
to
CsvListFile
::
FilePath
->
(
Header
,
Vector
C
svList
)
->
IO
()
to
C
svListFile
fp
(
h
,
vs
)
=
BL
.
writeFile
fp
$
encodeByNameWith
c
svEncodeOptions
h
(
V
.
toList
vs
)
to
TsvListFile
::
FilePath
->
(
Header
,
Vector
T
svList
)
->
IO
()
to
T
svListFile
fp
(
h
,
vs
)
=
BL
.
writeFile
fp
$
encodeByNameWith
t
svEncodeOptions
h
(
V
.
toList
vs
)
------------------------------------------------------------------------
src/Gargantext/Core/Text/Search.hs
View file @
ff0a77cd
...
...
@@ -7,7 +7,7 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
This search Engine is first made to clean
C
SV file according to a query.
This search Engine is first made to clean
T
SV file according to a query.
Starting from this model, a specific Gargantext engine will be made
(using more metrics scores/features).
...
...
@@ -17,7 +17,7 @@ module Gargantext.Core.Text.Search where
import
Data.Ix
import
Data.SearchEngine
import
Gargantext.Core.Text.Corpus.Parsers.
C
SV
import
Gargantext.Core.Text.Corpus.Parsers.
T
SV
import
Gargantext.Core.Text.Terms.Mono
(
monoTexts
)
import
Gargantext.Core.Text.Terms.Mono.Stem
as
ST
import
Gargantext.Prelude
...
...
@@ -29,7 +29,7 @@ import Gargantext.Prelude
type
DocId
=
Int
type
DocSearchEngine
=
SearchEngine
C
svGargV3
T
svGargV3
DocId
DocField
NoFeatures
...
...
@@ -42,7 +42,7 @@ initialDocSearchEngine :: DocSearchEngine
initialDocSearchEngine
=
initSearchEngine
docSearchConfig
defaultSearchRankParameters
docSearchConfig
::
SearchConfig
C
svGargV3
DocId
DocField
NoFeatures
docSearchConfig
::
SearchConfig
T
svGargV3
DocId
DocField
NoFeatures
docSearchConfig
=
SearchConfig
{
documentKey
=
d_docId
,
...
...
@@ -51,7 +51,7 @@ docSearchConfig =
documentFeatureValue
=
const
noFeatures
}
where
extractTerms
::
C
svGargV3
->
DocField
->
[
Text
]
extractTerms
::
T
svGargV3
->
DocField
->
[
Text
]
extractTerms
doc
TitleField
=
monoTexts
(
d_title
doc
)
extractTerms
doc
AbstractField
=
monoTexts
(
d_abstract
doc
)
...
...
src/Gargantext/Core/Text/Upload.hs
View file @
ff0a77cd
...
...
@@ -13,7 +13,7 @@ where
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Text
as
T
import
qualified
Data.Text.Encoding
as
TE
import
Gargantext.Utils.Servant
(
C
SV
,
Markdown
)
import
Gargantext.Utils.Servant
(
T
SV
,
Markdown
)
import
Network.HTTP.Client
(
newManager
,
Request
(
..
))
import
Network.HTTP.Client.TLS
(
tlsManagerSettings
)
import
Protolude
...
...
@@ -26,10 +26,10 @@ newtype DocId = DocId { fromDocId :: Text }
newtype
Data
=
Data
{
fromData
::
Text
}
data
ContentType
a
=
CTPlain
a
|
CT
C
SV
a
|
CT
T
SV
a
-- TODO SocialCalc, Excel XML ?
instance
MimeRender
C
SV
Data
where
instance
MimeRender
T
SV
Data
where
mimeRender
p
(
Data
d
)
=
mimeRender
p
d
instance
MimeRender
PlainText
Data
where
mimeRender
p
(
Data
d
)
=
mimeRender
p
d
...
...
@@ -49,14 +49,14 @@ type EthercalcAPI =
:>
ReqBody
'[
P
lainText
]
Data
:>
Put
'[
P
lainText
]
Text
--
c
sv
--
t
sv
:<|>
ReqBody
'[
C
SV
]
Data
:>
Post
'[
P
lainText
,
C
SV
]
Text
ReqBody
'[
T
SV
]
Data
:>
Post
'[
P
lainText
,
T
SV
]
Text
:<|>
Capture
"docId"
DocId
:>
ReqBody
'[
C
SV
]
Data
:>
Put
'[
P
lainText
,
C
SV
]
Text
:>
ReqBody
'[
T
SV
]
Data
:>
Put
'[
P
lainText
,
T
SV
]
Text
)
ethercalcAPI
::
Proxy
EthercalcAPI
...
...
@@ -64,16 +64,16 @@ ethercalcAPI = Proxy
ethercalcNewPlain
::
Data
->
ClientM
Text
ethercalcUpdatePlain
::
DocId
->
Data
->
ClientM
Text
ethercalcNew
C
SV
::
Data
->
ClientM
Text
ethercalcUpdate
C
SV
::
DocId
->
Data
->
ClientM
Text
ethercalcNew
T
SV
::
Data
->
ClientM
Text
ethercalcUpdate
T
SV
::
DocId
->
Data
->
ClientM
Text
ethercalcNewPlain
:<|>
ethercalcUpdatePlain
:<|>
ethercalcNew
CSV
:<|>
ethercalcUpdateC
SV
=
client
ethercalcAPI
:<|>
ethercalcNew
TSV
:<|>
ethercalcUpdateT
SV
=
client
ethercalcAPI
------------------------------
-- | Create new or update existing Ethercalc document (depending on
-- `Maybe DocId` constructor). `Data` can be in various formats (
C
SV,
-- `Maybe DocId` constructor). `Data` can be in various formats (
T
SV,
-- etc).
ethercalc
::
Host
->
Maybe
DocId
->
ContentType
Data
->
IO
(
Either
ClientError
Text
)
ethercalc
(
Host
host
)
mDocId
ctD
=
do
...
...
@@ -81,9 +81,9 @@ ethercalc (Host host) mDocId ctD = do
let
env
=
mkClientEnv
manager'
(
BaseUrl
Https
(
T
.
unpack
host
)
443
""
)
case
(
mDocId
,
ctD
)
of
(
Nothing
,
CTPlain
d
)
->
runClientM
(
ethercalcNewPlain
d
)
env
(
Nothing
,
CT
CSV
d
)
->
runClientM
(
ethercalcNewC
SV
d
)
env
(
Nothing
,
CT
TSV
d
)
->
runClientM
(
ethercalcNewT
SV
d
)
env
(
Just
docId
,
CTPlain
d
)
->
runClientM
(
ethercalcUpdatePlain
docId
d
)
env
(
Just
docId
,
CT
CSV
d
)
->
runClientM
(
ethercalcUpdateC
SV
docId
d
)
env
(
Just
docId
,
CT
TSV
d
)
->
runClientM
(
ethercalcUpdateT
SV
docId
d
)
env
-----------------------------------
...
...
src/Gargantext/Core/Viz/Phylo.hs
View file @
ff0a77cd
...
...
@@ -43,8 +43,8 @@ import Test.QuickCheck.Instances.Vector()
data
CorpusParser
=
Wos
{
_wos_limit
::
Int
}
|
Csv
{
_c
sv_limit
::
Int
}
|
Csv'
{
_c
sv'_limit
::
Int
}
|
Tsv
{
_t
sv_limit
::
Int
}
|
Tsv'
{
_t
sv'_limit
::
Int
}
deriving
(
Show
,
Generic
,
Eq
,
ToExpr
)
instance
ToSchema
CorpusParser
where
...
...
@@ -223,7 +223,7 @@ defaultConfig =
PhyloConfig
{
corpusPath
=
"corpus.csv"
-- useful for commandline only
,
listPath
=
"list.csv"
-- useful for commandline only
,
outputPath
=
"data/"
,
corpusParser
=
C
sv
150000
,
corpusParser
=
T
sv
150000
,
listParser
=
V4
,
phyloName
=
pack
"Phylo Name"
,
phyloScale
=
2
...
...
@@ -725,8 +725,8 @@ instance Arbitrary PhyloConfig where
instance
Arbitrary
CorpusParser
where
arbitrary
=
oneof
[
Wos
<$>
arbitrary
,
C
sv
<$>
arbitrary
,
C
sv'
<$>
arbitrary
,
T
sv
<$>
arbitrary
,
T
sv'
<$>
arbitrary
]
instance
Arbitrary
ListParser
where
...
...
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
ff0a77cd
...
...
@@ -344,7 +344,7 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
Fis
_
_
->
let
fis
=
parMap
rpar
(
\
(
prd
,
docs
)
->
case
(
corpusParser
$
getConfig
phylo
)
of
C
sv'
_
->
let
lst
=
toList
T
sv'
_
->
let
lst
=
toList
$
fisWithSizePolyMap'
(
Segment
1
20
)
1
(
map
(
\
d
->
(
ngramsToIdx
(
text
d
)
(
getRoots
phylo
),
(
weight
d
,
(
sourcesToIdx
(
sources
d
)
(
getSources
phylo
)))))
docs
)
in
(
prd
,
map
(
\
f
->
Clustering
(
Set
.
toList
$
fst
f
)
((
fst
.
snd
)
f
)
prd
((
fst
.
snd
.
snd
)
f
)
(((
snd
.
snd
.
snd
)
f
)))
lst
)
_
->
let
lst
=
toList
...
...
src/Gargantext/Database/Admin/Types/Hyperdata/Frame.hs
View file @
ff0a77cd
...
...
@@ -70,8 +70,8 @@ getHyperdataFrameContents (HyperdataFrame { _hf_base, _hf_frame_id }) = do
r
<-
Wreq
.
get
$
T
.
unpack
path
pure
$
decodeUtf8
$
toStrict
$
r
^.
Wreq
.
responseBody
getHyperdataFrame
C
SV
::
HyperdataFrame
->
IO
Text
getHyperdataFrame
C
SV
(
HyperdataFrame
{
_hf_base
,
_hf_frame_id
})
=
do
getHyperdataFrame
T
SV
::
HyperdataFrame
->
IO
Text
getHyperdataFrame
T
SV
(
HyperdataFrame
{
_hf_base
,
_hf_frame_id
})
=
do
let
path
=
T
.
concat
[
_hf_base
,
"/"
,
_hf_frame_id
,
".csv"
]
r
<-
Wreq
.
get
$
T
.
unpack
path
pure
$
decodeUtf8
$
toStrict
$
r
^.
Wreq
.
responseBody
src/Gargantext/Utils/Jobs.hs
View file @
ff0a77cd
...
...
@@ -69,7 +69,7 @@ parseGargJob s = case s of
"tablengrams"
->
Just
TableNgramsJob
"forgotpassword"
->
Just
ForgotPasswordJob
"updatengramslistjson"
->
Just
UpdateNgramsListJobJSON
"updatengramslist
csv"
->
Just
UpdateNgramsListJobC
SV
"updatengramslist
tsv"
->
Just
UpdateNgramsListJobT
SV
"addcontact"
->
Just
AddContactJob
"addfile"
->
Just
AddFileJob
"documentfromwritenode"
->
Just
DocumentFromWriteNodeJob
...
...
src/Gargantext/Utils/Servant.hs
View file @
ff0a77cd
...
...
@@ -21,18 +21,18 @@ import Protolude.Partial (read)
import
Servant
(
Accept
(
contentType
),
MimeRender
(
..
),
MimeUnrender
(
mimeUnrender
)
)
data
CSV
=
C
SV
data
TSV
=
T
SV
instance
Accept
C
SV
where
instance
Accept
T
SV
where
contentType
_
=
"text"
//
"csv"
/:
(
"charset"
,
"utf-8"
)
instance
(
DefaultOrdered
a
,
ToNamedRecord
a
)
=>
MimeRender
C
SV
[
a
]
where
instance
(
DefaultOrdered
a
,
ToNamedRecord
a
)
=>
MimeRender
T
SV
[
a
]
where
mimeRender
_
=
encodeDefaultOrderedByName
instance
MimeRender
C
SV
T
.
Text
where
instance
MimeRender
T
SV
T
.
Text
where
mimeRender
_
=
BSC
.
fromStrict
.
TE
.
encodeUtf8
instance
Read
a
=>
MimeUnrender
C
SV
a
where
instance
Read
a
=>
MimeUnrender
T
SV
a
where
mimeUnrender
_
bs
=
case
BSC
.
take
len
bs
of
"text/csv"
->
pure
.
read
.
BSC
.
unpack
$
BSC
.
drop
len
bs
_
->
Left
"didn't start with the magic incantation"
...
...
@@ -40,7 +40,7 @@ instance Read a => MimeUnrender CSV a where
len
::
Int64
len
=
fromIntegral
$
length
(
"text/csv"
::
Prelude
.
String
)
--instance ToNamedRecord a => MimeRender
C
SV [a] where
--instance ToNamedRecord a => MimeRender
T
SV [a] where
-- mimeRender _ val = encode val
----------------------------
...
...
test/Test/API/UpdateList.hs
View file @
ff0a77cd
...
...
@@ -25,7 +25,7 @@ import Data.Text.IO qualified as TIO
import
Fmt
import
Gargantext.API.Admin.Auth.Types
(
Token
)
import
Gargantext.API.Ngrams
qualified
as
APINgrams
import
Gargantext.API.Ngrams.List
(
ngramsListFrom
C
SVData
)
import
Gargantext.API.Ngrams.List
(
ngramsListFrom
T
SVData
)
import
Gargantext.API.Ngrams.Types
(
MSet
(
..
),
NgramsPatch
(
..
),
NgramsRepoElement
(
..
),
NgramsTablePatch
(
..
),
NgramsTerm
(
..
),
Versioned
(
..
),
mSetToList
,
toNgramsPatch
,
ne_children
,
ne_ngrams
,
vc_data
,
_NgramsTable
)
import
Gargantext.Core.Text.Ngrams
import
Gargantext.Core.Types
(
CorpusId
,
ListId
,
ListType
(
..
),
NodeId
,
_NodeId
)
...
...
@@ -190,7 +190,7 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
it
"parses CSV via ngramsListFromCSVData"
$
\
((
_testEnv
,
_port
),
_app
)
->
do
simpleNgrams
<-
liftIO
(
TIO
.
readFile
=<<
getDataFileName
"test-data/ngrams/simple.csv"
)
ngramsListFrom
C
SVData
simpleNgrams
`
shouldBe
`
ngramsListFrom
T
SVData
simpleNgrams
`
shouldBe
`
Right
(
Map
.
fromList
[
(
NgramsTerms
,
Versioned
0
$
Map
.
fromList
[
(
NgramsTerm
"abelian group"
,
NgramsRepoElement
1
MapTerm
Nothing
Nothing
(
MSet
mempty
))
,
(
NgramsTerm
"brazorf"
,
NgramsRepoElement
1
StopTerm
Nothing
Nothing
(
MSet
mempty
))
...
...
test/Test/Offline/Phylo.hs
View file @
ff0a77cd
...
...
@@ -12,7 +12,7 @@ import Data.GraphViz.Attributes.Complete qualified as Graphviz
import
Data.Text.Lazy
as
TL
import
Data.TreeDiff
import
Data.Vector
qualified
as
V
import
Gargantext.Core.Text.List.Formats.
C
SV
import
Gargantext.Core.Text.List.Formats.
T
SV
import
Gargantext.Core.Types.Phylo
import
Gargantext.Core.Viz.Phylo
hiding
(
EdgeType
(
..
))
import
Gargantext.Core.Viz.Phylo.API.Tools
(
readPhylo
,
phylo2dot2json
)
...
...
@@ -30,7 +30,7 @@ phyloTestConfig = PhyloConfig {
corpusPath
=
"corpus.csv"
,
listPath
=
"list.csv"
,
outputPath
=
"data/"
,
corpusParser
=
Csv
{
_c
sv_limit
=
150000
}
,
corpusParser
=
Tsv
{
_t
sv_limit
=
150000
}
,
listParser
=
V4
,
phyloName
=
"Phylo Name"
,
phyloScale
=
2
...
...
@@ -240,7 +240,7 @@ testToPhyloDeterminism = do
,
listPath
=
listPath'
,
listParser
=
V3
}
mapList
<-
c
svMapTermList
(
listPath
config
)
mapList
<-
t
svMapTermList
(
listPath
config
)
corpus
<-
fileToDocsDefault
(
corpusParser
config
)
(
corpusPath
config
)
[
Year
3
1
5
,
Month
3
1
5
,
Week
4
2
5
]
...
...
Przemyslaw Kaminski
@cgenie
mentioned in commit
5660aec0
·
Oct 08, 2024
mentioned in commit
5660aec0
mentioned in commit 5660aec07ec5a0a0a5468f440092c1a8f57a864e
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment