Commit a4c1fbec authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Port gargantext-upgrade to CLI

parent f28aafdb
...@@ -59,6 +59,10 @@ data PhyloArgs = PhyloArgs ...@@ -59,6 +59,10 @@ data PhyloArgs = PhyloArgs
{ phylo_config :: !FilePath { phylo_config :: !FilePath
} deriving (Show, Eq) } deriving (Show, Eq)
data UpgradeArgs = UpgradeArgs
{ upgrade_ini :: !FilePath
} deriving (Show, Eq)
data CLICmd data CLICmd
= CCMD_clean_csv_corpus = CCMD_clean_csv_corpus
| CCMD_filter_terms_and_cooc !CorpusFile !TermListFile !OutputFile | CCMD_filter_terms_and_cooc !CorpusFile !TermListFile !OutputFile
...@@ -69,6 +73,7 @@ data CLICmd ...@@ -69,6 +73,7 @@ data CLICmd
| CCMD_invitations !InvitationsArgs | CCMD_invitations !InvitationsArgs
| CCMD_phylo !PhyloArgs | CCMD_phylo !PhyloArgs
| CCMD_phylo_profile | CCMD_phylo_profile
| CCMD_upgrade !UpgradeArgs
deriving (Show, Eq) deriving (Show, Eq)
data CLI = data CLI =
......
{-| {-|
Module : Main.hs Module : Upgrade.hs
Description : Gargantext Import Corpus Description : Gargantext Import Corpus
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -7,24 +7,26 @@ Maintainer : team@gargantext.org ...@@ -7,24 +7,26 @@ Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
Import a corpus binary. Upgrade a gargantext node.
-} -}
{-# LANGUAGE Strict #-} {-# LANGUAGE Strict #-}
{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE QuasiQuotes #-}
module Main where module CLI.Upgrade where
import CLI.Types
import Data.List qualified as List (cycle, concat, take, unlines) import Data.List qualified as List (cycle, concat, take, unlines)
import Gargantext.API.Dev (withDevEnv) import Gargantext.API.Dev (withDevEnv)
import Gargantext.API.Node () -- instances only import Gargantext.API.Node () -- instances only
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Config (GargConfig(..), readConfig) import Gargantext.Prelude.Config (GargConfig(..), readConfig)
import Prelude qualified import Prelude qualified
import Options.Applicative
main :: IO () upgradeCLI :: UpgradeArgs -> IO ()
main = do upgradeCLI (UpgradeArgs iniPath) = do
let ___ = putStrLn ((List.concat let ___ = putStrLn ((List.concat
$ List.take 72 $ List.take 72
...@@ -34,11 +36,6 @@ main = do ...@@ -34,11 +36,6 @@ main = do
putStrLn ("GarganText upgrade to version 0.0.6.9.9.4.4" :: Text) putStrLn ("GarganText upgrade to version 0.0.6.9.9.4.4" :: Text)
___ ___
params@[iniPath] <- getArgs
_ <- if length params /= 1
then panicTrace "Usage: ./gargantext-upgrade gargantext.ini"
else pure ()
putStrLn $ List.unlines putStrLn $ List.unlines
[ "Your Database defined in gargantext.ini will be upgraded." [ "Your Database defined in gargantext.ini will be upgraded."
, "We stronlgy recommend you to make a backup using pg_dump." , "We stronlgy recommend you to make a backup using pg_dump."
...@@ -92,3 +89,13 @@ main = do ...@@ -92,3 +89,13 @@ main = do
-- CREATE INDEX IF NOT EXISTS node_stories_ngrams_id_idx -- CREATE INDEX IF NOT EXISTS node_stories_ngrams_id_idx
-- ON node_stories(ngrams_id); -- ON node_stories(ngrams_id);
-- |] -- |]
upgradeCmd :: HasCallStack => Mod CommandFields CLI
upgradeCmd = command "upgrade" (info (helper <*> fmap CLISub upgrade_p) (progDesc "Upgrade a Gargantext node."))
upgrade_p :: Parser CLICmd
upgrade_p = fmap CCMD_upgrade $ UpgradeArgs
<$> ( strOption ( long "ini-path"
<> metavar "FILEPATH"
<> help "Location of the .ini path"
) )
...@@ -29,6 +29,7 @@ import CLI.Init (initCLI, initCmd) ...@@ -29,6 +29,7 @@ import CLI.Init (initCLI, initCmd)
import CLI.Invitations (invitationsCLI, invitationsCmd) import CLI.Invitations (invitationsCLI, invitationsCmd)
import CLI.Phylo (phyloCLI, phyloCmd) import CLI.Phylo (phyloCLI, phyloCmd)
import CLI.Phylo.Profile (phyloProfileCLI, phyloProfileCmd) import CLI.Phylo.Profile (phyloProfileCLI, phyloProfileCmd)
import CLI.Upgrade (upgradeCLI, upgradeCmd)
runCLI :: CLI -> IO () runCLI :: CLI -> IO ()
runCLI = \case runCLI = \case
...@@ -50,6 +51,8 @@ runCLI = \case ...@@ -50,6 +51,8 @@ runCLI = \case
-> phyloCLI args -> phyloCLI args
CLISub CCMD_phylo_profile CLISub CCMD_phylo_profile
-> phyloProfileCLI -> phyloProfileCLI
CLISub (CCMD_upgrade args)
-> upgradeCLI args
main :: IO () main :: IO ()
main = runCLI =<< execParser opts main = runCLI =<< execParser opts
...@@ -68,5 +71,6 @@ allOptions = subparser ( ...@@ -68,5 +71,6 @@ allOptions = subparser (
initCmd <> initCmd <>
invitationsCmd <> invitationsCmd <>
phyloCmd <> phyloCmd <>
phyloProfileCmd phyloProfileCmd <>
upgradeCmd
) )
{-# LANGUAGE OverloadedStrings #-}
module Main where
import CLI.Phylo.Common
import Data.Aeson
import Data.List (nub)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools
import GHC.IO.Encoding
import GHC.Stack
import Paths_gargantext
import Prelude
import Shelly
import System.Directory
--------------
-- | Main | --
--------------
phyloConfig :: FilePath -> PhyloConfig
phyloConfig outdir = PhyloConfig {
corpusPath = "corpus.csv"
, listPath = "list.csv"
, outputPath = outdir
, corpusParser = Tsv {_tsv_limit = 150000}
, listParser = V4
, phyloName = "phylo_profile_test"
, phyloScale = 2
, similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2}
, seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1}
, defaultMode = True
, findAncestors = False
, phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups}
, phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3}
, timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5}
, clique = MaxClique {_mcl_size = 5, _mcl_threshold = 1.0e-4, _mcl_filter = ByThreshold}
, exportLabel = [ BranchLabel {_branch_labelTagger = MostEmergentTfIdf, _branch_labelSize = 2}
, GroupLabel {_group_labelTagger = MostEmergentInclusive, _group_labelSize = 2}
]
, exportSort = ByHierarchy {_sort_order = Desc}
, exportFilter = [ByBranchSize {_branch_size = 3.0}]
}
main :: HasCallStack => IO ()
main = do
shelly $ escaping False $ withTmpDir $ \tdir -> do
curDir <- pwd
let output = curDir <> "/" <> "gargantext_profile_out.dot"
chdir tdir $ do
liftIO $ setLocaleEncoding utf8
bpaConfig <- liftIO $ getDataFileName "bench-data/phylo/bpa-config.json"
corpusPath' <- liftIO $ getDataFileName "bench-data/phylo/GarganText_DocsList-nodeId-185487.csv"
listPath' <- liftIO $ getDataFileName "bench-data/phylo/GarganText_NgramsList-185488.csv"
(Right config) <- fmap (\pcfg -> pcfg { outputPath = tdir
, corpusPath = corpusPath'
, listPath = listPath'
}) <$> liftIO (eitherDecodeFileStrict' bpaConfig)
mapList <- liftIO $ fileToList (listParser config) (listPath config)
corpus <- liftIO $ if (defaultMode config)
then fileToDocsDefault (corpusParser config) (corpusPath config) [Year 3 1 5,Month 3 1 5,Week 4 2 5] mapList
else fileToDocsAdvanced (corpusParser config) (corpusPath config) (timeUnit config) mapList
liftIO $ do
printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOComment (show (length $ nub $ concat $ map text corpus) <> " Size ngs_coterms")
printIOComment (show (length mapList) <> " Size ngs_terms List Map Ngrams")
printIOMsg "Reconstruct the phylo"
-- check the existing backup files
let backupPhyloWithoutLink = (outputPath config) <> "backupPhyloWithoutLink_" <> (configToSha BackupPhyloWithoutLink config) <> ".json"
let backupPhylo = (outputPath config) <> "backupPhylo_" <> (configToSha BackupPhylo config) <> ".json"
phyloWithoutLinkExists <- doesFileExist backupPhyloWithoutLink
phyloExists <- doesFileExist backupPhylo
-- reconstruct the phylo
phylo <- if phyloExists
then do
printIOMsg "Reconstruct the phylo from an existing file"
readPhylo backupPhylo
else do
if phyloWithoutLinkExists
then do
printIOMsg "Reconstruct the phylo from an existing file without links"
phyloWithoutLink <- readPhylo backupPhyloWithoutLink
writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithoutLink)
else do
printIOMsg "Reconstruct the phylo from scratch"
phyloWithoutLink <- pure $ toPhyloWithoutLink corpus config
writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithoutLink)
writePhylo backupPhylo phylo
printIOMsg "End of reconstruction, start the export"
let dot = toPhyloExport (setConfig config phylo)
dotToFile output dot
echo "Done."
...@@ -709,6 +709,7 @@ executable gargantext-cli ...@@ -709,6 +709,7 @@ executable gargantext-cli
CLI.Phylo.Common CLI.Phylo.Common
CLI.Phylo.Profile CLI.Phylo.Profile
CLI.Types CLI.Types
CLI.Upgrade
CLI.Utils CLI.Utils
Paths_gargantext Paths_gargantext
hs-source-dirs: hs-source-dirs:
...@@ -761,23 +762,6 @@ executable gargantext-server ...@@ -761,23 +762,6 @@ executable gargantext-server
, unordered-containers ^>= 0.2.16.0 , unordered-containers ^>= 0.2.16.0
, vector ^>= 0.7.3 , vector ^>= 0.7.3
executable gargantext-upgrade
import:
defaults
, optimized
main-is: Main.hs
other-modules:
Paths_gargantext
hs-source-dirs:
bin/gargantext-upgrade
build-depends:
cron ^>= 0.7.0
, extra ^>= 1.7.9
, gargantext
, gargantext-prelude
, postgresql-simple ^>= 0.6.4
, text ^>= 1.2.4.1
test-suite garg-test-tasty test-suite garg-test-tasty
import: import:
defaults defaults
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment