Commit 0ad98105 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge remote-tracking branch 'origin/adinapoli/issue-355' into dev

parents 6a0c62d6 f9332b80
......@@ -37,7 +37,7 @@ cabal:
- .cabal/
policy: pull-push
script:
- nix-shell --run "./bin/update-project-dependencies $CABAL_STORE_DIR && cabal --store-dir=$CABAL_STORE_DIR v2-build --flags 'test-crypto no-phylo-debug-logs' --ghc-options='-O0 -fclear-plugins'"
- nix-shell --run "./bin/update-project-dependencies $CABAL_STORE_DIR && cabal --store-dir=$CABAL_STORE_DIR v2-build all --flags 'test-crypto no-phylo-debug-logs' --ghc-options='-O0 -fclear-plugins'"
allow_failure: false
bench:
......
{-|
Module : Main.hs
Description : Gargantext Admin tools
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE Strict #-}
module Main where
import Gargantext.API.Dev (withDevEnv, runCmdDev)
import Gargantext.API.Errors.Types
import Gargantext.Database.Action.User.New (newUsers)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (Cmd'')
import Gargantext.Prelude
import Gargantext.API.Admin.EnvTypes (DevEnv)
import qualified Data.List.NonEmpty as NE
main :: IO ()
main = do
(iniPath:mails) <- getArgs
withDevEnv iniPath $ \env -> do
x <- runCmdDev env ((newUsers $ NE.map cs (NE.fromList mails)) :: Cmd'' DevEnv BackendInternalError (NonEmpty UserId))
putStrLn (show x :: Text)
pure ()
module CLI.Admin (
adminCLI
, adminCmd
) where
import CLI.Types
import Data.List.NonEmpty qualified as NE
import Data.Text qualified as T
import Gargantext.API.Admin.EnvTypes
import Gargantext.API.Dev
import Gargantext.API.Errors
import Gargantext.Core.Types
import Gargantext.Database.Action.User.New
import Gargantext.Database.Prelude
import Gargantext.Prelude
import Options.Applicative
import Prelude (String)
adminCLI :: AdminArgs -> IO ()
adminCLI (AdminArgs iniPath mails) = do
withDevEnv iniPath $ \env -> do
x <- runCmdDev env ((newUsers $ NE.map cs (NE.fromList mails)) :: Cmd'' DevEnv BackendInternalError (NonEmpty UserId))
putStrLn (show x :: Text)
adminCmd :: HasCallStack => Mod CommandFields CLI
adminCmd = command "admin" (info (helper <*> fmap CLISub admin_p) (progDesc "Create users."))
admin_p :: Parser CLICmd
admin_p = fmap CCMD_admin $ AdminArgs
<$> ( strOption ( long "ini-path"
<> metavar "FILEPATH"
<> help "Location of the .ini path"
) )
<*> ( option (maybeReader emails_p) ( long "emails"
<> metavar "email1,email2,..."
<> help "A comma-separated list of emails."
) )
emails_p :: String -> Maybe [String]
emails_p s = case T.splitOn "," (T.pack s) of
[] -> Nothing
xs -> pure $ map T.unpack xs
module Main where
module CLI.FileDiff where
import Prelude
import CLI.Types
import Data.List qualified as L
import Data.Text qualified as T
import Data.Text.IO qualified as TIO
import Data.TreeDiff.Class
import Data.TreeDiff.Pretty
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import System.Environment (getArgs)
import System.Exit (exitFailure)
import Control.Monad (unless)
import qualified Data.List as L
import Gargantext.Prelude (HasCallStack, unless, exitFailure)
import Options.Applicative
import Prelude
-- | Renders in a pretty way the content of two golden files. The
-- first file should contain the expected output, the second the
-- actual data generated by the test suite.
main :: IO ()
main = do
(refPath:newPath:_) <- getArgs
fileDiffCLI :: GoldenFileDiffArgs -> IO ()
fileDiffCLI (GoldenFileDiffArgs refPath newPath) = do
ref <- T.lines <$> TIO.readFile refPath
new <- T.lines <$> TIO.readFile newPath
......@@ -25,3 +24,17 @@ main = do
unless (L.null differences) $ do
putStrLn $ show $ ansiWlEditExpr $ ediff' (map fst differences) (map snd differences)
exitFailure
fileDiffCmd :: HasCallStack => Mod CommandFields CLI
fileDiffCmd = command "golden-file-diff" (info (helper <*> fmap CLISub filediff_p) (progDesc "Compare the output of two golden files."))
filediff_p :: Parser CLICmd
filediff_p = fmap CCMD_golden_file_diff $ GoldenFileDiffArgs
<$> ( strOption ( long "expected"
<> metavar "FILEPATH"
<> help "Path to the file containing the expected output."
) )
<*> ( strOption ( long "actual"
<> metavar "FILEPATH"
<> help "Path to the file containing the actual output."
) )
{-|
Module : Main.hs
Module : Import.hs
Description : Gargantext Import Corpus
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -12,10 +12,13 @@ Import a corpus binary.
-}
{-# LANGUAGE Strict #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE LambdaCase #-}
module Main where
module CLI.Import where
import Data.Text qualified as Text
import CLI.Types
import Gargantext.API.Admin.EnvTypes (DevEnv(..), DevJobHandle(..))
import Gargantext.API.Dev (withDevEnv, runCmdGargDev)
import Gargantext.API.Errors.Types ( BackendInternalError )
......@@ -23,66 +26,62 @@ import Gargantext.API.Node () -- instances
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Query (Limit)
import Gargantext.Database.Action.Flow (flowCorpusFile, flowAnnuaire, TermType(..))
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Node (CorpusId)
import Gargantext.Database.Query.Tree.Root (MkCorpusUser(MkCorpusUserNormalCorpusName))
import Gargantext.Prelude
import Gargantext.Utils.Jobs.Monad ( MonadJobStatus, JobHandle )
import Options.Applicative
import qualified Data.Text as T
import Prelude (String)
import Gargantext.Core.Types.Query
main :: IO ()
main = do
[fun, user, name, iniPath, limit, corpusPath] <- getArgs
--{-
importCLI :: ImportArgs -> IO ()
importCLI (ImportArgs fun user name iniPath limit corpusPath) = do
let
--tt = (Unsupervised EN 6 0 Nothing)
tt = (Multi EN)
format = TsvGargV3 -- TsvHal --WOS
limit' = case (readMaybe limit :: Maybe Limit) of
Nothing -> panicTrace $ "Cannot read limit: " <> (Text.pack limit)
Just l -> l
tt = Multi EN
format = TsvGargV3
corpus :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
mkCorpusUser = MkCorpusUserNormalCorpusName (UserName $ cs user) (cs name :: Text)
corpus = flowCorpusFile mkCorpusUser limit' tt format Plain corpusPath Nothing DevJobHandle
corpus = flowCorpusFile mkCorpusUser limit tt format Plain corpusPath Nothing DevJobHandle
corpusTsvHal :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
corpusTsvHal = flowCorpusFile mkCorpusUser limit' tt TsvHal Plain corpusPath Nothing DevJobHandle
corpusTsvHal = flowCorpusFile mkCorpusUser limit tt TsvHal Plain corpusPath Nothing DevJobHandle
annuaire :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
annuaire = flowAnnuaire (MkCorpusUserNormalCorpusName (UserName $ cs user) "Annuaire") (Multi EN) corpusPath DevJobHandle
{-
let debatCorpus :: forall m. FlowCmdM DevEnv BackendInternalError m => m CorpusId
debatCorpus = do
docs <- liftIO ( splitEvery 500
<$> take (read limit :: Int)
<$> readFile corpusPath
:: IO [[GrandDebatReference ]]
)
flowCorpus (Text.pack user) (Text.pack name) (Multi FR) (map (map toHyperdataDocument) docs)
--}
withDevEnv iniPath $ \env -> do
_ <- if fun == "corpus"
then runCmdGargDev env corpus
else pure 0 --(cs "false")
void $ case fun of
IF_corpus
-> runCmdGargDev env corpus
IF_corpusTsvHal
-> runCmdGargDev env corpusTsvHal
IF_annuaire
-> runCmdGargDev env annuaire
_ <- if fun == "corpusTsvHal"
then runCmdGargDev env corpusTsvHal
else pure 0 --(cs "false")
importCmd :: HasCallStack => Mod CommandFields CLI
importCmd = command "import" (info (helper <*> fmap CLISub import_p) (progDesc "Import CLI."))
_ <- if fun == "annuaire"
then runCmdGargDev env annuaire
else pure 0
{-
_ <- if corpusType == "csv"
then runCmdDev env csvCorpus
else if corpusType == "debat"
then runCmdDev env debatCorpus
else panic "corpusType unknown: try \"csv\" or \"debat\""
-}
pure ()
renderImportFunction :: ImportFunction -> T.Text
renderImportFunction = T.drop 3 . T.pack . show
import_p :: Parser CLICmd
import_p = fmap CCMD_import $ ImportArgs
<$> ( option (eitherReader function_p) ( long "function"
<> help ("The function to use, one between: " <> (T.unpack $ T.intercalate "," $ map renderImportFunction [minBound .. maxBound]))
) )
<*> ( option str ( long "user") )
<*> ( option str ( long "name") )
<*> ( option str ( long "ini" <> help "Path to the .ini file.") )
<*> (fmap Limit ( option auto ( long "ini" <> metavar "INT" <> help "The limit for the query") ))
<*> ( option str ( long "corpus-path" <> help "Path to corpus file") )
function_p :: String -> Either String ImportFunction
function_p = \case
"corpus" -> Right IF_corpus
"corpusTsvHal" -> Right IF_corpusTsvHal
"annuaire" -> Right IF_annuaire
xs -> Left $ "Unrecognised function: " <> xs
{-|
Module : Main.hs
Description : Gargantext Import Corpus
Module : Init.hs
Description : Gargantext Init Script
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Import a corpus binary.
Initialise the Gargantext dataset.
-}
{-# LANGUAGE Strict #-}
module Main where
module CLI.Init where
import Data.List.NonEmpty qualified as NE
import Gargantext.API.Dev (withDevEnv, runCmdDev)
......@@ -32,16 +32,12 @@ import Gargantext.Prelude
import Gargantext.Prelude.Config (GargConfig(..), readConfig)
import Gargantext.API.Admin.Types
import Gargantext.Database.Prelude (DBCmd')
import CLI.Types
import Options.Applicative
main :: IO ()
main = do
params@[iniPath] <- getArgs
_ <- if length params /= 1
then panicTrace "USAGE: ./gargantext-init gargantext.ini"
else pure ()
initCLI :: InitArgs -> IO ()
initCLI (InitArgs iniPath) = do
putStrLn ("Enter master user (gargantua) _password_ :" :: Text)
password <- getLine
......@@ -77,4 +73,13 @@ main = do
x <- runCmdDev env initMaster
_ <- runCmdDev env mkRoots
putStrLn (show x :: Text)
pure ()
initCmd :: HasCallStack => Mod CommandFields CLI
initCmd = command "init" (info (helper <*> fmap CLISub init_p) (progDesc "Initialise this Gargantext instance."))
init_p :: Parser CLICmd
init_p = fmap CCMD_init $ InitArgs
<$> ( strOption ( long "ini-path"
<> metavar "FILEPATH"
<> help "Location of the .ini path"
) )
{-|
Module : Main.hs
Module : Invitations.hs
Description : GarganText Mailing Invitations
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -12,35 +12,51 @@ Portability : POSIX
{-# LANGUAGE Strict #-}
module Main where
module CLI.Invitations where
import CLI.Types
import Gargantext.API.Admin.Types
import Gargantext.API.Dev (withDevEnv, runCmdDev)
import Gargantext.API.Node () -- instances only
import Gargantext.API.Errors.Types
import Gargantext.API.Admin.Types
import Gargantext.API.Node () -- instances only
import Gargantext.API.Node.Share qualified as Share
import Gargantext.API.Node.Share.Types qualified as Share
import Gargantext.Core.NLP (HasNLPServer)
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (CmdRandom)
import Gargantext.Prelude
import Gargantext.Prelude.Config (readConfig)
import Prelude (read)
import Gargantext.API.Node.Share.Types qualified as Share
import Gargantext.API.Node.Share qualified as Share
main :: IO ()
main = do
params@[iniPath,user,node_id,email] <- getArgs
_ <- if length params /= 4
then panicTrace "USAGE: ./gargantext-init gargantext.ini username node_id student@university.edu"
else pure ()
import Options.Applicative
import Prelude (String)
import Gargantext.Core.Types
invitationsCLI :: InvitationsArgs -> IO ()
invitationsCLI (InvitationsArgs iniPath user node_id email) = do
_cfg <- readConfig iniPath
let invite :: (HasSettings env, CmdRandom env BackendInternalError m, HasNLPServer env) => m Int
invite = Share.api (UserName $ cs user) (UnsafeMkNodeId $ (read node_id :: Int)) (Share.ShareTeamParams $ cs email)
invite = Share.api (UserName $ cs user) node_id (Share.ShareTeamParams $ cs email)
withDevEnv iniPath $ \env -> do
_ <- runCmdDev env invite
pure ()
void $ runCmdDev env invite
invitationsCmd :: HasCallStack => Mod CommandFields CLI
invitationsCmd = command "invitations" (info (helper <*> fmap CLISub invitations_p) (progDesc "Mailing invitations."))
invitations_p :: Parser CLICmd
invitations_p = fmap CCMD_invitations $ InvitationsArgs
<$> ( strOption ( long "ini-path"
<> metavar "FILEPATH"
<> help "Location of the .ini path"
) )
<*> ( strOption ( long "user" ) )
<*> ( option (eitherReader node_p) ( long "node-id" <> metavar "POSITIVE-INT" <> help "The node ID.") )
<*> ( strOption ( long "email" <> help "The email address.") )
node_p :: String -> Either String NodeId
node_p i = case readMaybe i of
Nothing -> Left $ i <> " is not a valid integer."
Just xs
| xs < 0 -> Left $ "The node id needs to be a positive integer."
| otherwise
-> Right $ UnsafeMkNodeId xs
{-|
Module : Phylo.hs
Description : Gargantext starter binary with Adaptative Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Adaptative Phylo binaries
-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE Strict #-}
module CLI.Phylo where
import CLI.Phylo.Common
import CLI.Types
import Data.Aeson (eitherDecodeFileStrict')
import Data.List (nub)
import Data.Text qualified as T
import GHC.IO.Encoding
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig)
import Gargantext.Prelude hiding (hash, replace)
import Options.Applicative
import System.Directory (doesFileExist)
phyloCLI :: PhyloArgs -> IO ()
phyloCLI (PhyloArgs configPath) = do
setLocaleEncoding utf8
config_e <- eitherDecodeFileStrict' configPath
case config_e of
Left err -> panicTrace $ T.pack err
Right config -> do
currentLocale <- getLocaleEncoding
printIOMsg $ "Machine locale: " <> show currentLocale
printIOMsg "Starting the reconstruction"
printIOMsg "Parse the corpus"
mapList <- fileToList (listParser config) (listPath config)
corpus <- if (defaultMode config)
then fileToDocsDefault (corpusParser config) (corpusPath config) [Year 3 1 5,Month 3 1 5,Week 4 2 5] mapList
else fileToDocsAdvanced (corpusParser config) (corpusPath config) (timeUnit config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOComment (show (length $ nub $ concat $ map text corpus) <> " Size ngs_coterms")
printIOComment (show (length mapList) <> " Size ngs_terms List Map Ngrams")
printIOMsg "Reconstruct the phylo"
-- check the existing backup files
let backupPhyloWithoutLink = (outputPath config) <> "backupPhyloWithoutLink_" <> (configToSha BackupPhyloWithoutLink config) <> ".json"
let backupPhylo = (outputPath config) <> "backupPhylo_" <> (configToSha BackupPhylo config) <> ".json"
phyloWithoutLinkExists <- doesFileExist backupPhyloWithoutLink
phyloExists <- doesFileExist backupPhylo
-- reconstruct the phylo
phylo <- if phyloExists
then do
printIOMsg "Reconstruct the phylo from an existing file"
readPhylo backupPhylo
else do
if phyloWithoutLinkExists
then do
printIOMsg "Reconstruct the phylo from an existing file without links"
phyloWithoutLink <- readPhylo backupPhyloWithoutLink
writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithoutLink)
else do
printIOMsg "Reconstruct the phylo from scratch"
phyloWithoutLink <- pure $ toPhyloWithoutLink corpus config
writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithoutLink)
writePhylo backupPhylo phylo
printIOMsg "End of reconstruction, start the export"
let dot = toPhyloExport (setConfig config phylo)
let output = configToLabel config
dotToFile output dot
phyloCmd :: HasCallStack => Mod CommandFields CLI
phyloCmd = command "phylo" (info (helper <*> fmap CLISub phylo_p) (progDesc "Phylo toolkit."))
phylo_p :: Parser CLICmd
phylo_p = fmap CCMD_phylo $ PhyloArgs
<$> ( strOption ( long "config"
<> metavar "FILEPATH"
<> help "Path to a file containing a JSON to be parsed into a PhyloConfig"
) )
{-# LANGUAGE OverloadedStrings #-}
module Common where
module CLI.Phylo.Common where
import Control.Concurrent.Async (mapConcurrently)
import Crypto.Hash.SHA256 (hash)
......@@ -99,6 +99,8 @@ tsvToDocs parser patterns time path =
(map (T.strip . pack) $ splitOn ";" (unpack $ (tsv'_source row)))
time
) <$> snd <$> Tsv.readWeightedTsv path
Csv _ -> panicTrace "CSV is currently not supported."
Csv' _ -> panicTrace "CSV is currently not supported."
-- To parse a file into a list of Document
......@@ -109,6 +111,8 @@ fileToDocsAdvanced parser path time lst = do
Wos limit -> wosToDocs limit patterns time path
Tsv _ -> tsvToDocs parser patterns time path
Tsv' _ -> tsvToDocs parser patterns time path
Csv _ -> panicTrace "CSV is currently not supported."
Csv' _ -> panicTrace "CSV is currently not supported."
fileToDocsDefault :: CorpusParser -> FilePath -> [TimeUnit] -> TermList -> IO [Document]
fileToDocsDefault parser path timeUnits lst =
......
{-# LANGUAGE OverloadedStrings #-}
module Main where
module CLI.Phylo.Profile where
import Common
import CLI.Phylo.Common
import Data.Aeson
import Data.List (nub)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig, toPeriods, getTimePeriod, getTimeStep)
import Gargantext.Core.Viz.Phylo.PhyloTools
import GHC.IO.Encoding
import GHC.Stack
import Paths_gargantext
import Prelude
import qualified Data.Text as T
import Shelly
import Shelly hiding (command)
import System.Directory
import Options.Applicative
import CLI.Types
--------------
-- | Main | --
......@@ -46,8 +47,8 @@ phyloConfig outdir = PhyloConfig {
}
main :: HasCallStack => IO ()
main = do
phyloProfileCLI :: HasCallStack => IO ()
phyloProfileCLI = do
shelly $ escaping False $ withTmpDir $ \tdir -> do
curDir <- pwd
......@@ -110,3 +111,8 @@ main = do
dotToFile output dot
echo "Done."
phyloProfileCmd :: HasCallStack => Mod CommandFields CLI
phyloProfileCmd =
command "phylo-profile" (info (helper <*> fmap CLISub (pure CCMD_phylo_profile))
(progDesc "Helper to profile phylo code."))
module CLI.Types where
import Prelude
import Data.String
import Data.Text (Text)
import Gargantext.Core.Types.Query
import Prelude
import Gargantext.Core.Types (NodeId)
newtype CorpusFile = CorpusFile { _CorpusFile :: FilePath }
deriving (Show, Eq, IsString)
......@@ -22,11 +24,62 @@ data ObfuscateDBArgs = ObfuscateDBArgs {
, dbPassword :: !Text
} deriving (Show, Eq)
data AdminArgs = AdminArgs
{ iniPath :: !FilePath
, emails :: [String]
} deriving (Show, Eq)
data ImportFunction
= IF_corpus
| IF_corpusTsvHal
| IF_annuaire
deriving (Show, Eq, Enum, Bounded)
data ImportArgs = ImportArgs
{ imp_function :: !ImportFunction
, imp_user :: !Text
, imp_name :: !Text
, imp_ini :: !FilePath
, imp_limit :: !Limit
, imp_corpus_path :: !FilePath
} deriving (Show, Eq)
data InitArgs = InitArgs
{ init_ini :: !FilePath
} deriving (Show, Eq)
data InvitationsArgs = InvitationsArgs
{ inv_path :: !FilePath
, inv_user :: !Text
, inv_node_id :: !NodeId
, inv_email :: !Text
} deriving (Show, Eq)
data PhyloArgs = PhyloArgs
{ phylo_config :: !FilePath
} deriving (Show, Eq)
data UpgradeArgs = UpgradeArgs
{ upgrade_ini :: !FilePath
} deriving (Show, Eq)
data GoldenFileDiffArgs = GoldenFileDiffArgs
{ gdf_expected :: !FilePath
, gdf_actual :: !FilePath
} deriving (Show, Eq)
data CLICmd
= CCMD_clean_csv_corpus
| CCMD_filter_terms_and_cooc !CorpusFile !TermListFile !OutputFile
| CCMD_obfuscate_db !ObfuscateDBArgs
| CCMD_admin !AdminArgs
| CCMD_import !ImportArgs
| CCMD_init !InitArgs
| CCMD_invitations !InvitationsArgs
| CCMD_phylo !PhyloArgs
| CCMD_phylo_profile
| CCMD_upgrade !UpgradeArgs
| CCMD_golden_file_diff !GoldenFileDiffArgs
deriving (Show, Eq)
data CLI =
......
{-|
Module : Main.hs
Module : Upgrade.hs
Description : Gargantext Import Corpus
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -7,24 +7,26 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Import a corpus binary.
Upgrade a gargantext node.
-}
{-# LANGUAGE Strict #-}
{-# LANGUAGE QuasiQuotes #-}
module Main where
module CLI.Upgrade where
import CLI.Types
import Data.List qualified as List (cycle, concat, take, unlines)
import Gargantext.API.Dev (withDevEnv)
import Gargantext.API.Node () -- instances only
import Gargantext.Prelude
import Gargantext.Prelude.Config (GargConfig(..), readConfig)
import Prelude qualified
import Options.Applicative
main :: IO ()
main = do
upgradeCLI :: UpgradeArgs -> IO ()
upgradeCLI (UpgradeArgs iniPath) = do
let ___ = putStrLn ((List.concat
$ List.take 72
......@@ -34,11 +36,6 @@ main = do
putStrLn ("GarganText upgrade to version 0.0.6.9.9.4.4" :: Text)
___
params@[iniPath] <- getArgs
_ <- if length params /= 1
then panicTrace "Usage: ./gargantext-upgrade gargantext.ini"
else pure ()
putStrLn $ List.unlines
[ "Your Database defined in gargantext.ini will be upgraded."
, "We stronlgy recommend you to make a backup using pg_dump."
......@@ -92,3 +89,13 @@ main = do
-- CREATE INDEX IF NOT EXISTS node_stories_ngrams_id_idx
-- ON node_stories(ngrams_id);
-- |]
upgradeCmd :: HasCallStack => Mod CommandFields CLI
upgradeCmd = command "upgrade" (info (helper <*> fmap CLISub upgrade_p) (progDesc "Upgrade a Gargantext node."))
upgrade_p :: Parser CLICmd
upgrade_p = fmap CCMD_upgrade $ UpgradeArgs
<$> ( strOption ( long "ini-path"
<> metavar "FILEPATH"
<> help "Location of the .ini path"
) )
......@@ -23,6 +23,14 @@ import CLI.FilterTermsAndCooc
import CLI.ObfuscateDB (obfuscateDB, obfuscateDBCmd)
import CLI.Types
import Options.Applicative
import CLI.Admin (adminCLI, adminCmd)
import CLI.Import (importCLI, importCmd)
import CLI.Init (initCLI, initCmd)
import CLI.Invitations (invitationsCLI, invitationsCmd)
import CLI.Phylo (phyloCLI, phyloCmd)
import CLI.Phylo.Profile (phyloProfileCLI, phyloProfileCmd)
import CLI.Upgrade (upgradeCLI, upgradeCmd)
import CLI.FileDiff (fileDiffCLI, fileDiffCmd)
runCLI :: CLI -> IO ()
runCLI = \case
......@@ -32,17 +40,41 @@ runCLI = \case
-> filterTermsAndCoocCLI corpusFile termListFile outputFile
CLISub (CCMD_obfuscate_db args)
-> obfuscateDB args
CLISub (CCMD_admin args)
-> adminCLI args
CLISub (CCMD_import args)
-> importCLI args
CLISub (CCMD_init args)
-> initCLI args
CLISub (CCMD_invitations args)
-> invitationsCLI args
CLISub (CCMD_phylo args)
-> phyloCLI args
CLISub CCMD_phylo_profile
-> phyloProfileCLI
CLISub (CCMD_upgrade args)
-> upgradeCLI args
CLISub (CCMD_golden_file_diff args)
-> fileDiffCLI args
main :: IO ()
main = runCLI =<< execParser opts
where
opts = info (helper <*> allOptions)
( fullDesc
<> progDesc "CLI for the gargantext-server"
<> progDesc "CLI for the gargantext-server. Type --help for all the commands."
<> header "gargantext-cli tools" )
allOptions :: Parser CLI
allOptions = subparser (
filterTermsAndCoocCmd <>
obfuscateDBCmd
obfuscateDBCmd <>
adminCmd <>
importCmd <>
initCmd <>
invitationsCmd <>
phyloCmd <>
phyloProfileCmd <>
upgradeCmd <>
fileDiffCmd
)
{-|
Module : Main.hs
Description : Gargantext starter binary with Adaptative Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Adaptative Phylo binaries
-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE Strict #-}
module Main where
import Control.Concurrent.Async (mapConcurrently)
import Crypto.Hash.SHA256 (hash)
import Data.Aeson
import Data.ByteString.Char8 qualified as C8
import Data.List (nub, isSuffixOf, tail)
import Data.List.Split
import Data.Maybe (fromJust)
import Data.Text (unpack, replace, pack)
import Data.Text qualified as T
import Data.Vector qualified as Vector
import GHC.IO.Encoding
import Gargantext.API.Ngrams.Prelude (toTermList)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..), parseFile)
import Gargantext.Core.Text.Corpus.Parsers.TSV (tsv_title, tsv_abstract, tsv_publication_year, tsv_publication_month, tsv_publication_day, tsv'_source, tsv'_title, tsv'_abstract, tsv'_publication_year, tsv'_publication_month, tsv'_publication_day, tsv'_weight)
import Gargantext.Core.Text.Corpus.Parsers.TSV qualified as Tsv
import Gargantext.Core.Text.List.Formats.TSV (tsvMapTermList)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig, toPeriods, getTimePeriod, getTimeStep)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude hiding (hash, replace)
import Prelude qualified
import System.Directory (listDirectory,doesFileExist)
import Common
main :: IO ()
main = do
setLocaleEncoding utf8
currentLocale <- getLocaleEncoding
printIOMsg $ "Machine locale: " <> show currentLocale
printIOMsg "Starting the reconstruction"
printIOMsg "Read the configuration file"
[args] <- getArgs
jsonArgs <- (eitherDecode <$> readJson args) :: IO (Either Prelude.String PhyloConfig)
case jsonArgs of
Left err -> putStrLn err
Right config -> do
printIOMsg "Parse the corpus"
mapList <- fileToList (listParser config) (listPath config)
corpus <- if (defaultMode config)
then fileToDocsDefault (corpusParser config) (corpusPath config) [Year 3 1 5,Month 3 1 5,Week 4 2 5] mapList
else fileToDocsAdvanced (corpusParser config) (corpusPath config) (timeUnit config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOComment (show (length $ nub $ concat $ map text corpus) <> " Size ngs_coterms")
printIOComment (show (length mapList) <> " Size ngs_terms List Map Ngrams")
printIOMsg "Reconstruct the phylo"
-- check the existing backup files
let backupPhyloWithoutLink = (outputPath config) <> "backupPhyloWithoutLink_" <> (configToSha BackupPhyloWithoutLink config) <> ".json"
let backupPhylo = (outputPath config) <> "backupPhylo_" <> (configToSha BackupPhylo config) <> ".json"
phyloWithoutLinkExists <- doesFileExist backupPhyloWithoutLink
phyloExists <- doesFileExist backupPhylo
-- reconstruct the phylo
phylo <- if phyloExists
then do
printIOMsg "Reconstruct the phylo from an existing file"
readPhylo backupPhylo
else do
if phyloWithoutLinkExists
then do
printIOMsg "Reconstruct the phylo from an existing file without links"
phyloWithoutLink <- readPhylo backupPhyloWithoutLink
writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithoutLink)
else do
printIOMsg "Reconstruct the phylo from scratch"
phyloWithoutLink <- pure $ toPhyloWithoutLink corpus config
writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithoutLink)
writePhylo backupPhylo phylo
printIOMsg "End of reconstruction, start the export"
let dot = toPhyloExport (setConfig config phylo)
let output = configToLabel config
dotToFile output dot
......@@ -692,31 +692,25 @@ library
, zip-archive ^>= 0.4.3
, zlib ^>= 0.6.2.3
executable gargantext-admin
import:
defaults
, optimized
main-is: Main.hs
other-modules:
Paths_gargantext
hs-source-dirs:
bin/gargantext-admin
build-depends:
extra
, gargantext
, gargantext-prelude
, text
executable gargantext-cli
import:
defaults
, optimized
main-is: Main.hs
other-modules:
CLI.Admin
CLI.CleanCsvCorpus
CLI.FileDiff
CLI.FilterTermsAndCooc
CLI.Import
CLI.Init
CLI.Invitations
CLI.ObfuscateDB
CLI.Phylo
CLI.Phylo.Common
CLI.Phylo.Profile
CLI.Types
CLI.Upgrade
CLI.Utils
Paths_gargantext
hs-source-dirs:
......@@ -727,7 +721,8 @@ executable gargantext-cli
, bytestring ^>= 0.10.12.0
, cassava ^>= 0.5.2.0
, containers ^>= 0.6.5.1
, extra
, cryptohash ^>= 0.11.9
, directory ^>= 1.3.6.0
, extra ^>= 1.7.9
, full-text-search ^>= 0.2.1.4
, gargantext
......@@ -735,105 +730,14 @@ executable gargantext-cli
, ini ^>= 0.4.1
, optparse-applicative
, optparse-generic ^>= 1.4.7
, parallel ^>= 3.2.2.0
, postgresql-simple ^>= 0.6.4
, protolude ^>= 0.3.3
, split ^>= 0.2.3.4
, text ^>= 1.2.4.1
, unordered-containers ^>= 0.2.16.0
, vector ^>= 0.12.3.0
executable gargantext-import
import:
defaults
, optimized
main-is: Main.hs
default-extensions:
TypeOperators
other-modules:
Paths_gargantext
hs-source-dirs:
bin/gargantext-import
build-depends:
extra ^>= 1.7.9
, gargantext
, gargantext-prelude
, servant-server ^>= 0.18.3
, text ^>= 1.2.4.1
executable gargantext-init
import:
defaults
, optimized
main-is: Main.hs
other-modules:
Paths_gargantext
hs-source-dirs:
bin/gargantext-init
build-depends:
cron ^>= 0.7.0
, extra ^>= 1.7.9
, gargantext
, gargantext-prelude
, text ^>= 1.2.4.1
executable gargantext-invitations
import:
defaults
, optimized
main-is: Main.hs
other-modules:
Paths_gargantext
hs-source-dirs:
bin/gargantext-invitations
build-depends:
extra ^>= 1.7.9
, gargantext
, gargantext-prelude
, text ^>= 1.2.4.1
executable gargantext-phylo
import:
defaults
, optimized
main-is: Main.hs
other-modules:
Paths_gargantext
Common
hs-source-dirs:
bin/gargantext-phylo bin/gargantext-phylo/Phylo
default-extensions:
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GADTs
GeneralizedNewtypeDeriving
ImportQualifiedPost
MultiParamTypeClasses
NamedFieldPuns
NoImplicitPrelude
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
aeson ^>= 1.5.6.0
, async ^>= 2.2.4
, bytestring ^>= 0.10.12.0
, cassava ^>= 0.5.2.0
, containers ^>= 0.6.5.1
, cryptohash ^>= 0.11.9
, directory ^>= 1.3.6.0
, extra ^>= 1.7.9
, gargantext
, gargantext-prelude
, ini ^>= 0.4.1
, optparse-generic ^>= 1.4.7
, parallel ^>= 3.2.2.0
, shelly
, split ^>= 0.2.3.4
, text ^>= 1.2.4.1
, time ^>= 1.9.3
, tree-diff
, unordered-containers ^>= 0.2.16.0
, vector ^>= 0.7.3
......@@ -860,23 +764,6 @@ executable gargantext-server
, unordered-containers ^>= 0.2.16.0
, vector ^>= 0.7.3
executable gargantext-upgrade
import:
defaults
, optimized
main-is: Main.hs
other-modules:
Paths_gargantext
hs-source-dirs:
bin/gargantext-upgrade
build-depends:
cron ^>= 0.7.0
, extra ^>= 1.7.9
, gargantext
, gargantext-prelude
, postgresql-simple ^>= 0.6.4
, text ^>= 1.2.4.1
test-suite garg-test-tasty
import:
defaults
......@@ -884,7 +771,7 @@ test-suite garg-test-tasty
main-is: drivers/tasty/Main.hs
other-modules:
Test.API.Routes
Common
CLI.Phylo.Common
Test.API.Setup
Test.Core.Similarity
Test.Core.Text
......@@ -920,7 +807,7 @@ test-suite garg-test-tasty
Test.Utils.Jobs
Paths_gargantext
hs-source-dirs:
test bin/gargantext-phylo/Phylo
test bin/gargantext-cli
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
build-depends:
QuickCheck ^>= 2.14.2
......@@ -1098,39 +985,3 @@ benchmark garg-bench
ghc-options: "-with-rtsopts=-T -A32m"
if impl(ghc >= 8.6)
ghc-options: "-with-rtsopts=--nonmoving-gc"
executable gargantext-phylo-profile
main-is: Main.hs
other-modules:
Paths_gargantext
Common
hs-source-dirs:
bin/gargantext-phylo-profile bin/gargantext-phylo/Phylo
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
base
, bytestring
, gargantext
, gargantext-prelude
, shelly
, text
, async
, cryptohash
, aeson
, split
, vector
, directory
default-language: GHC2021
executable garg-golden-file-diff
import:
defaults
, optimized
main-is: Main.hs
hs-source-dirs:
bin/gargantext-golden-file-diff
build-depends:
base
, text
, tree-diff
default-language: Haskell2010
......@@ -5,7 +5,7 @@
module Test.Offline.Phylo (tests) where
import Common
import CLI.Phylo.Common
import Data.Aeson as JSON
import Data.Aeson.Types qualified as JSON
import Data.GraphViz.Attributes.Complete qualified as Graphviz
......
......@@ -8,15 +8,14 @@ import Data.Text qualified as T
import Gargantext.Core.Text.Terms.Mono.Stem.Internal.Lancaster (stem)
import Gargantext.Prelude (toS)
import Test.Tasty
import Test.Tasty.Golden (goldenVsStringDiff)
import Test.Tasty.Golden (goldenVsString)
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text.Encoding as TE
tests :: TestTree
tests = testGroup "Lancaster" [
goldenVsStringDiff "test vector works" (\ref new -> ["cabal", "v2-run", "-v0", "garg-golden-file-diff", "--", ref, new]) "test-data/stemming/lancaster.txt" mkTestVector
goldenVsString "test vector works" "test-data/stemming/lancaster.txt" mkTestVector
]
-- | List un /unstemmed/ test words
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment