Commit 1310b3a3 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Port gargantext-import to CLI

parent d765ec00
{-| {-|
Module : Main.hs Module : Import.hs
Description : Gargantext Import Corpus Description : Gargantext Import Corpus
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -12,10 +12,13 @@ Import a corpus binary. ...@@ -12,10 +12,13 @@ Import a corpus binary.
-} -}
{-# LANGUAGE Strict #-} {-# LANGUAGE Strict #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE LambdaCase #-}
module Main where module CLI.Import where
import Data.Text qualified as Text import CLI.Types
import Gargantext.API.Admin.EnvTypes (DevEnv(..), DevJobHandle(..)) import Gargantext.API.Admin.EnvTypes (DevEnv(..), DevJobHandle(..))
import Gargantext.API.Dev (withDevEnv, runCmdGargDev) import Gargantext.API.Dev (withDevEnv, runCmdGargDev)
import Gargantext.API.Errors.Types ( BackendInternalError ) import Gargantext.API.Errors.Types ( BackendInternalError )
...@@ -23,66 +26,62 @@ import Gargantext.API.Node () -- instances ...@@ -23,66 +26,62 @@ import Gargantext.API.Node () -- instances
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..)) import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..))
import Gargantext.Core.Types.Individu (User(..)) import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Query (Limit)
import Gargantext.Database.Action.Flow (flowCorpusFile, flowAnnuaire, TermType(..)) import Gargantext.Database.Action.Flow (flowCorpusFile, flowAnnuaire, TermType(..))
import Gargantext.Database.Action.Flow.Types (FlowCmdM) import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Node (CorpusId) import Gargantext.Database.Admin.Types.Node (CorpusId)
import Gargantext.Database.Query.Tree.Root (MkCorpusUser(MkCorpusUserNormalCorpusName)) import Gargantext.Database.Query.Tree.Root (MkCorpusUser(MkCorpusUserNormalCorpusName))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.Jobs.Monad ( MonadJobStatus, JobHandle ) import Gargantext.Utils.Jobs.Monad ( MonadJobStatus, JobHandle )
import Options.Applicative
import qualified Data.Text as T
import Prelude (String)
import Gargantext.Core.Types.Query
main :: IO () importCLI :: ImportArgs -> IO ()
main = do importCLI (ImportArgs fun user name iniPath limit corpusPath) = do
[fun, user, name, iniPath, limit, corpusPath] <- getArgs
--{-
let let
--tt = (Unsupervised EN 6 0 Nothing) tt = Multi EN
tt = (Multi EN) format = TsvGargV3
format = TsvGargV3 -- TsvHal --WOS
limit' = case (readMaybe limit :: Maybe Limit) of
Nothing -> panicTrace $ "Cannot read limit: " <> (Text.pack limit)
Just l -> l
corpus :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId corpus :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
mkCorpusUser = MkCorpusUserNormalCorpusName (UserName $ cs user) (cs name :: Text) mkCorpusUser = MkCorpusUserNormalCorpusName (UserName $ cs user) (cs name :: Text)
corpus = flowCorpusFile mkCorpusUser limit' tt format Plain corpusPath Nothing DevJobHandle corpus = flowCorpusFile mkCorpusUser limit tt format Plain corpusPath Nothing DevJobHandle
corpusTsvHal :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId corpusTsvHal :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
corpusTsvHal = flowCorpusFile mkCorpusUser limit' tt TsvHal Plain corpusPath Nothing DevJobHandle corpusTsvHal = flowCorpusFile mkCorpusUser limit tt TsvHal Plain corpusPath Nothing DevJobHandle
annuaire :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId annuaire :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
annuaire = flowAnnuaire (MkCorpusUserNormalCorpusName (UserName $ cs user) "Annuaire") (Multi EN) corpusPath DevJobHandle annuaire = flowAnnuaire (MkCorpusUserNormalCorpusName (UserName $ cs user) "Annuaire") (Multi EN) corpusPath DevJobHandle
{-
let debatCorpus :: forall m. FlowCmdM DevEnv BackendInternalError m => m CorpusId
debatCorpus = do
docs <- liftIO ( splitEvery 500
<$> take (read limit :: Int)
<$> readFile corpusPath
:: IO [[GrandDebatReference ]]
)
flowCorpus (Text.pack user) (Text.pack name) (Multi FR) (map (map toHyperdataDocument) docs)
--}
withDevEnv iniPath $ \env -> do withDevEnv iniPath $ \env -> do
_ <- if fun == "corpus" void $ case fun of
then runCmdGargDev env corpus IF_corpus
else pure 0 --(cs "false") -> runCmdGargDev env corpus
IF_corpusTsvHal
-> runCmdGargDev env corpusTsvHal
IF_annuaire
-> runCmdGargDev env annuaire
importCmd :: HasCallStack => Mod CommandFields CLI
importCmd = command "import" (info (helper <*> fmap CLISub import_p) (progDesc "Import CLI."))
renderImportFunction :: ImportFunction -> T.Text
renderImportFunction = T.drop 3 . T.pack . show
_ <- if fun == "corpusTsvHal" import_p :: Parser CLICmd
then runCmdGargDev env corpusTsvHal import_p = fmap CCMD_import $ ImportArgs
else pure 0 --(cs "false") <$> ( option (eitherReader function_p) ( long "function"
<> help ("The function to use, one between: " <> (T.unpack $ T.intercalate "," $ map renderImportFunction [minBound .. maxBound]))
) )
<*> ( option str ( long "user") )
<*> ( option str ( long "name") )
<*> ( option str ( long "ini" <> help "Path to the .ini file.") )
<*> (fmap Limit ( option auto ( long "ini" <> metavar "INT" <> help "The limit for the query") ))
<*> ( option str ( long "corpus-path" <> help "Path to corpus file") )
_ <- if fun == "annuaire" function_p :: String -> Either String ImportFunction
then runCmdGargDev env annuaire function_p = \case
else pure 0 "corpus" -> Right IF_corpus
{- "corpusTsvHal" -> Right IF_corpusTsvHal
_ <- if corpusType == "csv" "annuaire" -> Right IF_annuaire
then runCmdDev env csvCorpus xs -> Left $ "Unrecognised function: " <> xs
else if corpusType == "debat"
then runCmdDev env debatCorpus
else panic "corpusType unknown: try \"csv\" or \"debat\""
-}
pure ()
module CLI.Types where module CLI.Types where
import Prelude
import Data.String import Data.String
import Data.Text (Text) import Data.Text (Text)
import Gargantext.Core.Types.Query
import Prelude
newtype CorpusFile = CorpusFile { _CorpusFile :: FilePath } newtype CorpusFile = CorpusFile { _CorpusFile :: FilePath }
deriving (Show, Eq, IsString) deriving (Show, Eq, IsString)
...@@ -27,11 +28,27 @@ data AdminArgs = AdminArgs ...@@ -27,11 +28,27 @@ data AdminArgs = AdminArgs
, emails :: [String] , emails :: [String]
} deriving (Show, Eq) } deriving (Show, Eq)
data ImportFunction
= IF_corpus
| IF_corpusTsvHal
| IF_annuaire
deriving (Show, Eq, Enum, Bounded)
data ImportArgs = ImportArgs
{ imp_function :: !ImportFunction
, imp_user :: !Text
, imp_name :: !Text
, imp_ini :: !FilePath
, imp_limit :: !Limit
, imp_corpus_path :: !FilePath
} deriving (Show, Eq)
data CLICmd data CLICmd
= CCMD_clean_csv_corpus = CCMD_clean_csv_corpus
| CCMD_filter_terms_and_cooc !CorpusFile !TermListFile !OutputFile | CCMD_filter_terms_and_cooc !CorpusFile !TermListFile !OutputFile
| CCMD_obfuscate_db !ObfuscateDBArgs | CCMD_obfuscate_db !ObfuscateDBArgs
| CCMD_admin !AdminArgs | CCMD_admin !AdminArgs
| CCMD_import !ImportArgs
deriving (Show, Eq) deriving (Show, Eq)
data CLI = data CLI =
......
...@@ -24,6 +24,7 @@ import CLI.ObfuscateDB (obfuscateDB, obfuscateDBCmd) ...@@ -24,6 +24,7 @@ import CLI.ObfuscateDB (obfuscateDB, obfuscateDBCmd)
import CLI.Types import CLI.Types
import Options.Applicative import Options.Applicative
import CLI.Admin (adminCLI, adminCmd) import CLI.Admin (adminCLI, adminCmd)
import CLI.Import (importCLI, importCmd)
runCLI :: CLI -> IO () runCLI :: CLI -> IO ()
runCLI = \case runCLI = \case
...@@ -35,6 +36,8 @@ runCLI = \case ...@@ -35,6 +36,8 @@ runCLI = \case
-> obfuscateDB args -> obfuscateDB args
CLISub (CCMD_admin args) CLISub (CCMD_admin args)
-> adminCLI args -> adminCLI args
CLISub (CCMD_import args)
-> importCLI args
main :: IO () main :: IO ()
main = runCLI =<< execParser opts main = runCLI =<< execParser opts
...@@ -48,5 +51,6 @@ allOptions :: Parser CLI ...@@ -48,5 +51,6 @@ allOptions :: Parser CLI
allOptions = subparser ( allOptions = subparser (
filterTermsAndCoocCmd <> filterTermsAndCoocCmd <>
obfuscateDBCmd <> obfuscateDBCmd <>
adminCmd adminCmd <>
importCmd
) )
...@@ -701,6 +701,7 @@ executable gargantext-cli ...@@ -701,6 +701,7 @@ executable gargantext-cli
CLI.Admin CLI.Admin
CLI.CleanCsvCorpus CLI.CleanCsvCorpus
CLI.FilterTermsAndCooc CLI.FilterTermsAndCooc
CLI.Import
CLI.ObfuscateDB CLI.ObfuscateDB
CLI.Types CLI.Types
CLI.Utils CLI.Utils
...@@ -713,7 +714,6 @@ executable gargantext-cli ...@@ -713,7 +714,6 @@ executable gargantext-cli
, bytestring ^>= 0.10.12.0 , bytestring ^>= 0.10.12.0
, cassava ^>= 0.5.2.0 , cassava ^>= 0.5.2.0
, containers ^>= 0.6.5.1 , containers ^>= 0.6.5.1
, extra
, extra ^>= 1.7.9 , extra ^>= 1.7.9
, full-text-search ^>= 0.2.1.4 , full-text-search ^>= 0.2.1.4
, gargantext , gargantext
...@@ -728,24 +728,6 @@ executable gargantext-cli ...@@ -728,24 +728,6 @@ executable gargantext-cli
, unordered-containers ^>= 0.2.16.0 , unordered-containers ^>= 0.2.16.0
, vector ^>= 0.12.3.0 , vector ^>= 0.12.3.0
executable gargantext-import
import:
defaults
, optimized
main-is: Main.hs
default-extensions:
TypeOperators
other-modules:
Paths_gargantext
hs-source-dirs:
bin/gargantext-import
build-depends:
extra ^>= 1.7.9
, gargantext
, gargantext-prelude
, servant-server ^>= 0.18.3
, text ^>= 1.2.4.1
executable gargantext-init executable gargantext-init
import: import:
defaults defaults
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment