Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Grégoire Locqueville
haskell-gargantext
Commits
1310b3a3
Commit
1310b3a3
authored
Jul 01, 2024
by
Alfredo Di Napoli
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Port gargantext-import to CLI
parent
d765ec00
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
111 additions
and
21 deletions
+111
-21
Import.hs
bin/gargantext-cli/CLI/Import.hs
+87
-0
Types.hs
bin/gargantext-cli/CLI/Types.hs
+18
-1
Main.hs
bin/gargantext-cli/Main.hs
+5
-1
gargantext.cabal
gargantext.cabal
+1
-19
No files found.
bin/gargantext-
import/Main
.hs
→
bin/gargantext-
cli/CLI/Import
.hs
View file @
1310b3a3
{-|
Module :
Main
.hs
Module :
Import
.hs
Description : Gargantext Import Corpus
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
...
...
@@ -12,10 +12,13 @@ Import a corpus binary.
-}
{-# LANGUAGE Strict #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE LambdaCase #-}
module
Main
where
module
CLI.Import
where
import
Data.Text
qualified
as
Text
import
CLI.Types
import
Gargantext.API.Admin.EnvTypes
(
DevEnv
(
..
),
DevJobHandle
(
..
))
import
Gargantext.API.Dev
(
withDevEnv
,
runCmdGargDev
)
import
Gargantext.API.Errors.Types
(
BackendInternalError
)
...
...
@@ -23,66 +26,62 @@ import Gargantext.API.Node () -- instances
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
FileType
(
..
))
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Types.Query
(
Limit
)
import
Gargantext.Database.Action.Flow
(
flowCorpusFile
,
flowAnnuaire
,
TermType
(
..
))
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
)
import
Gargantext.Database.Query.Tree.Root
(
MkCorpusUser
(
MkCorpusUserNormalCorpusName
))
import
Gargantext.Prelude
import
Gargantext.Utils.Jobs.Monad
(
MonadJobStatus
,
JobHandle
)
import
Options.Applicative
import
qualified
Data.Text
as
T
import
Prelude
(
String
)
import
Gargantext.Core.Types.Query
main
::
IO
()
main
=
do
[
fun
,
user
,
name
,
iniPath
,
limit
,
corpusPath
]
<-
getArgs
--{-
importCLI
::
ImportArgs
->
IO
()
importCLI
(
ImportArgs
fun
user
name
iniPath
limit
corpusPath
)
=
do
let
--tt = (Unsupervised EN 6 0 Nothing)
tt
=
(
Multi
EN
)
format
=
TsvGargV3
-- TsvHal --WOS
limit'
=
case
(
readMaybe
limit
::
Maybe
Limit
)
of
Nothing
->
panicTrace
$
"Cannot read limit: "
<>
(
Text
.
pack
limit
)
Just
l
->
l
tt
=
Multi
EN
format
=
TsvGargV3
corpus
::
forall
m
.
(
FlowCmdM
DevEnv
BackendInternalError
m
,
MonadJobStatus
m
,
JobHandle
m
~
DevJobHandle
)
=>
m
CorpusId
mkCorpusUser
=
MkCorpusUserNormalCorpusName
(
UserName
$
cs
user
)
(
cs
name
::
Text
)
corpus
=
flowCorpusFile
mkCorpusUser
limit
'
tt
format
Plain
corpusPath
Nothing
DevJobHandle
corpus
=
flowCorpusFile
mkCorpusUser
limit
tt
format
Plain
corpusPath
Nothing
DevJobHandle
corpusTsvHal
::
forall
m
.
(
FlowCmdM
DevEnv
BackendInternalError
m
,
MonadJobStatus
m
,
JobHandle
m
~
DevJobHandle
)
=>
m
CorpusId
corpusTsvHal
=
flowCorpusFile
mkCorpusUser
limit
'
tt
TsvHal
Plain
corpusPath
Nothing
DevJobHandle
corpusTsvHal
=
flowCorpusFile
mkCorpusUser
limit
tt
TsvHal
Plain
corpusPath
Nothing
DevJobHandle
annuaire
::
forall
m
.
(
FlowCmdM
DevEnv
BackendInternalError
m
,
MonadJobStatus
m
,
JobHandle
m
~
DevJobHandle
)
=>
m
CorpusId
annuaire
=
flowAnnuaire
(
MkCorpusUserNormalCorpusName
(
UserName
$
cs
user
)
"Annuaire"
)
(
Multi
EN
)
corpusPath
DevJobHandle
{-
let debatCorpus :: forall m. FlowCmdM DevEnv BackendInternalError m => m CorpusId
debatCorpus = do
docs <- liftIO ( splitEvery 500
<$> take (read limit :: Int)
<$> readFile corpusPath
:: IO [[GrandDebatReference ]]
)
flowCorpus (Text.pack user) (Text.pack name) (Multi FR) (map (map toHyperdataDocument) docs)
--}
withDevEnv
iniPath
$
\
env
->
do
_
<-
if
fun
==
"corpus"
then
runCmdGargDev
env
corpus
else
pure
0
--(cs "false")
void
$
case
fun
of
IF_corpus
->
runCmdGargDev
env
corpus
IF_corpusTsvHal
->
runCmdGargDev
env
corpusTsvHal
IF_annuaire
->
runCmdGargDev
env
annuaire
importCmd
::
HasCallStack
=>
Mod
CommandFields
CLI
importCmd
=
command
"import"
(
info
(
helper
<*>
fmap
CLISub
import_p
)
(
progDesc
"Import CLI."
))
renderImportFunction
::
ImportFunction
->
T
.
Text
renderImportFunction
=
T
.
drop
3
.
T
.
pack
.
show
_
<-
if
fun
==
"corpusTsvHal"
then
runCmdGargDev
env
corpusTsvHal
else
pure
0
--(cs "false")
import_p
::
Parser
CLICmd
import_p
=
fmap
CCMD_import
$
ImportArgs
<$>
(
option
(
eitherReader
function_p
)
(
long
"function"
<>
help
(
"The function to use, one between: "
<>
(
T
.
unpack
$
T
.
intercalate
","
$
map
renderImportFunction
[
minBound
..
maxBound
]))
)
)
<*>
(
option
str
(
long
"user"
)
)
<*>
(
option
str
(
long
"name"
)
)
<*>
(
option
str
(
long
"ini"
<>
help
"Path to the .ini file."
)
)
<*>
(
fmap
Limit
(
option
auto
(
long
"ini"
<>
metavar
"INT"
<>
help
"The limit for the query"
)
))
<*>
(
option
str
(
long
"corpus-path"
<>
help
"Path to corpus file"
)
)
_
<-
if
fun
==
"annuaire"
then
runCmdGargDev
env
annuaire
else
pure
0
{-
_ <- if corpusType == "csv"
then runCmdDev env csvCorpus
else if corpusType == "debat"
then runCmdDev env debatCorpus
else panic "corpusType unknown: try \"csv\" or \"debat\""
-}
pure
()
function_p
::
String
->
Either
String
ImportFunction
function_p
=
\
case
"corpus"
->
Right
IF_corpus
"corpusTsvHal"
->
Right
IF_corpusTsvHal
"annuaire"
->
Right
IF_annuaire
xs
->
Left
$
"Unrecognised function: "
<>
xs
bin/gargantext-cli/CLI/Types.hs
View file @
1310b3a3
module
CLI.Types
where
import
Prelude
import
Data.String
import
Data.Text
(
Text
)
import
Gargantext.Core.Types.Query
import
Prelude
newtype
CorpusFile
=
CorpusFile
{
_CorpusFile
::
FilePath
}
deriving
(
Show
,
Eq
,
IsString
)
...
...
@@ -27,11 +28,27 @@ data AdminArgs = AdminArgs
,
emails
::
[
String
]
}
deriving
(
Show
,
Eq
)
data
ImportFunction
=
IF_corpus
|
IF_corpusTsvHal
|
IF_annuaire
deriving
(
Show
,
Eq
,
Enum
,
Bounded
)
data
ImportArgs
=
ImportArgs
{
imp_function
::
!
ImportFunction
,
imp_user
::
!
Text
,
imp_name
::
!
Text
,
imp_ini
::
!
FilePath
,
imp_limit
::
!
Limit
,
imp_corpus_path
::
!
FilePath
}
deriving
(
Show
,
Eq
)
data
CLICmd
=
CCMD_clean_csv_corpus
|
CCMD_filter_terms_and_cooc
!
CorpusFile
!
TermListFile
!
OutputFile
|
CCMD_obfuscate_db
!
ObfuscateDBArgs
|
CCMD_admin
!
AdminArgs
|
CCMD_import
!
ImportArgs
deriving
(
Show
,
Eq
)
data
CLI
=
...
...
bin/gargantext-cli/Main.hs
View file @
1310b3a3
...
...
@@ -24,6 +24,7 @@ import CLI.ObfuscateDB (obfuscateDB, obfuscateDBCmd)
import
CLI.Types
import
Options.Applicative
import
CLI.Admin
(
adminCLI
,
adminCmd
)
import
CLI.Import
(
importCLI
,
importCmd
)
runCLI
::
CLI
->
IO
()
runCLI
=
\
case
...
...
@@ -35,6 +36,8 @@ runCLI = \case
->
obfuscateDB
args
CLISub
(
CCMD_admin
args
)
->
adminCLI
args
CLISub
(
CCMD_import
args
)
->
importCLI
args
main
::
IO
()
main
=
runCLI
=<<
execParser
opts
...
...
@@ -48,5 +51,6 @@ allOptions :: Parser CLI
allOptions
=
subparser
(
filterTermsAndCoocCmd
<>
obfuscateDBCmd
<>
adminCmd
adminCmd
<>
importCmd
)
gargantext.cabal
View file @
1310b3a3
...
...
@@ -701,6 +701,7 @@ executable gargantext-cli
CLI.Admin
CLI.CleanCsvCorpus
CLI.FilterTermsAndCooc
CLI.Import
CLI.ObfuscateDB
CLI.Types
CLI.Utils
...
...
@@ -713,7 +714,6 @@ executable gargantext-cli
, bytestring ^>= 0.10.12.0
, cassava ^>= 0.5.2.0
, containers ^>= 0.6.5.1
, extra
, extra ^>= 1.7.9
, full-text-search ^>= 0.2.1.4
, gargantext
...
...
@@ -728,24 +728,6 @@ executable gargantext-cli
, unordered-containers ^>= 0.2.16.0
, vector ^>= 0.12.3.0
executable gargantext-import
import:
defaults
, optimized
main-is: Main.hs
default-extensions:
TypeOperators
other-modules:
Paths_gargantext
hs-source-dirs:
bin/gargantext-import
build-depends:
extra ^>= 1.7.9
, gargantext
, gargantext-prelude
, servant-server ^>= 0.18.3
, text ^>= 1.2.4.1
executable gargantext-init
import:
defaults
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment