Commit 1310b3a3 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Port gargantext-import to CLI

parent d765ec00
{-|
Module : Main.hs
Module : Import.hs
Description : Gargantext Import Corpus
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -12,10 +12,13 @@ Import a corpus binary.
-}
{-# LANGUAGE Strict #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE LambdaCase #-}
module Main where
module CLI.Import where
import Data.Text qualified as Text
import CLI.Types
import Gargantext.API.Admin.EnvTypes (DevEnv(..), DevJobHandle(..))
import Gargantext.API.Dev (withDevEnv, runCmdGargDev)
import Gargantext.API.Errors.Types ( BackendInternalError )
......@@ -23,66 +26,62 @@ import Gargantext.API.Node () -- instances
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Query (Limit)
import Gargantext.Database.Action.Flow (flowCorpusFile, flowAnnuaire, TermType(..))
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Node (CorpusId)
import Gargantext.Database.Query.Tree.Root (MkCorpusUser(MkCorpusUserNormalCorpusName))
import Gargantext.Prelude
import Gargantext.Utils.Jobs.Monad ( MonadJobStatus, JobHandle )
import Options.Applicative
import qualified Data.Text as T
import Prelude (String)
import Gargantext.Core.Types.Query
main :: IO ()
main = do
[fun, user, name, iniPath, limit, corpusPath] <- getArgs
--{-
importCLI :: ImportArgs -> IO ()
importCLI (ImportArgs fun user name iniPath limit corpusPath) = do
let
--tt = (Unsupervised EN 6 0 Nothing)
tt = (Multi EN)
format = TsvGargV3 -- TsvHal --WOS
limit' = case (readMaybe limit :: Maybe Limit) of
Nothing -> panicTrace $ "Cannot read limit: " <> (Text.pack limit)
Just l -> l
tt = Multi EN
format = TsvGargV3
corpus :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
mkCorpusUser = MkCorpusUserNormalCorpusName (UserName $ cs user) (cs name :: Text)
corpus = flowCorpusFile mkCorpusUser limit' tt format Plain corpusPath Nothing DevJobHandle
corpus = flowCorpusFile mkCorpusUser limit tt format Plain corpusPath Nothing DevJobHandle
corpusTsvHal :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
corpusTsvHal = flowCorpusFile mkCorpusUser limit' tt TsvHal Plain corpusPath Nothing DevJobHandle
corpusTsvHal = flowCorpusFile mkCorpusUser limit tt TsvHal Plain corpusPath Nothing DevJobHandle
annuaire :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
annuaire = flowAnnuaire (MkCorpusUserNormalCorpusName (UserName $ cs user) "Annuaire") (Multi EN) corpusPath DevJobHandle
{-
let debatCorpus :: forall m. FlowCmdM DevEnv BackendInternalError m => m CorpusId
debatCorpus = do
docs <- liftIO ( splitEvery 500
<$> take (read limit :: Int)
<$> readFile corpusPath
:: IO [[GrandDebatReference ]]
)
flowCorpus (Text.pack user) (Text.pack name) (Multi FR) (map (map toHyperdataDocument) docs)
--}
withDevEnv iniPath $ \env -> do
_ <- if fun == "corpus"
then runCmdGargDev env corpus
else pure 0 --(cs "false")
void $ case fun of
IF_corpus
-> runCmdGargDev env corpus
IF_corpusTsvHal
-> runCmdGargDev env corpusTsvHal
IF_annuaire
-> runCmdGargDev env annuaire
_ <- if fun == "corpusTsvHal"
then runCmdGargDev env corpusTsvHal
else pure 0 --(cs "false")
importCmd :: HasCallStack => Mod CommandFields CLI
importCmd = command "import" (info (helper <*> fmap CLISub import_p) (progDesc "Import CLI."))
_ <- if fun == "annuaire"
then runCmdGargDev env annuaire
else pure 0
{-
_ <- if corpusType == "csv"
then runCmdDev env csvCorpus
else if corpusType == "debat"
then runCmdDev env debatCorpus
else panic "corpusType unknown: try \"csv\" or \"debat\""
-}
pure ()
renderImportFunction :: ImportFunction -> T.Text
renderImportFunction = T.drop 3 . T.pack . show
import_p :: Parser CLICmd
import_p = fmap CCMD_import $ ImportArgs
<$> ( option (eitherReader function_p) ( long "function"
<> help ("The function to use, one between: " <> (T.unpack $ T.intercalate "," $ map renderImportFunction [minBound .. maxBound]))
) )
<*> ( option str ( long "user") )
<*> ( option str ( long "name") )
<*> ( option str ( long "ini" <> help "Path to the .ini file.") )
<*> (fmap Limit ( option auto ( long "ini" <> metavar "INT" <> help "The limit for the query") ))
<*> ( option str ( long "corpus-path" <> help "Path to corpus file") )
function_p :: String -> Either String ImportFunction
function_p = \case
"corpus" -> Right IF_corpus
"corpusTsvHal" -> Right IF_corpusTsvHal
"annuaire" -> Right IF_annuaire
xs -> Left $ "Unrecognised function: " <> xs
module CLI.Types where
import Prelude
import Data.String
import Data.Text (Text)
import Gargantext.Core.Types.Query
import Prelude
newtype CorpusFile = CorpusFile { _CorpusFile :: FilePath }
deriving (Show, Eq, IsString)
......@@ -27,11 +28,27 @@ data AdminArgs = AdminArgs
, emails :: [String]
} deriving (Show, Eq)
data ImportFunction
= IF_corpus
| IF_corpusTsvHal
| IF_annuaire
deriving (Show, Eq, Enum, Bounded)
data ImportArgs = ImportArgs
{ imp_function :: !ImportFunction
, imp_user :: !Text
, imp_name :: !Text
, imp_ini :: !FilePath
, imp_limit :: !Limit
, imp_corpus_path :: !FilePath
} deriving (Show, Eq)
data CLICmd
= CCMD_clean_csv_corpus
| CCMD_filter_terms_and_cooc !CorpusFile !TermListFile !OutputFile
| CCMD_obfuscate_db !ObfuscateDBArgs
| CCMD_admin !AdminArgs
| CCMD_import !ImportArgs
deriving (Show, Eq)
data CLI =
......
......@@ -24,6 +24,7 @@ import CLI.ObfuscateDB (obfuscateDB, obfuscateDBCmd)
import CLI.Types
import Options.Applicative
import CLI.Admin (adminCLI, adminCmd)
import CLI.Import (importCLI, importCmd)
runCLI :: CLI -> IO ()
runCLI = \case
......@@ -35,6 +36,8 @@ runCLI = \case
-> obfuscateDB args
CLISub (CCMD_admin args)
-> adminCLI args
CLISub (CCMD_import args)
-> importCLI args
main :: IO ()
main = runCLI =<< execParser opts
......@@ -48,5 +51,6 @@ allOptions :: Parser CLI
allOptions = subparser (
filterTermsAndCoocCmd <>
obfuscateDBCmd <>
adminCmd
adminCmd <>
importCmd
)
......@@ -701,6 +701,7 @@ executable gargantext-cli
CLI.Admin
CLI.CleanCsvCorpus
CLI.FilterTermsAndCooc
CLI.Import
CLI.ObfuscateDB
CLI.Types
CLI.Utils
......@@ -713,7 +714,6 @@ executable gargantext-cli
, bytestring ^>= 0.10.12.0
, cassava ^>= 0.5.2.0
, containers ^>= 0.6.5.1
, extra
, extra ^>= 1.7.9
, full-text-search ^>= 0.2.1.4
, gargantext
......@@ -728,24 +728,6 @@ executable gargantext-cli
, unordered-containers ^>= 0.2.16.0
, vector ^>= 0.12.3.0
executable gargantext-import
import:
defaults
, optimized
main-is: Main.hs
default-extensions:
TypeOperators
other-modules:
Paths_gargantext
hs-source-dirs:
bin/gargantext-import
build-depends:
extra ^>= 1.7.9
, gargantext
, gargantext-prelude
, servant-server ^>= 0.18.3
, text ^>= 1.2.4.1
executable gargantext-init
import:
defaults
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment