Commit f28aafdb authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Port phylo-profile to CLI

parent 1905b024
{-# LANGUAGE OverloadedStrings #-}
module CLI.Phylo.Profile where
import CLI.Phylo.Common
import Data.Aeson
import Data.List (nub)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools
import GHC.IO.Encoding
import GHC.Stack
import Paths_gargantext
import Prelude
import Shelly hiding (command)
import System.Directory
import Options.Applicative
import CLI.Types
--------------
-- | Main | --
--------------
phyloConfig :: FilePath -> PhyloConfig
phyloConfig outdir = PhyloConfig {
corpusPath = "corpus.csv"
, listPath = "list.csv"
, outputPath = outdir
, corpusParser = Tsv {_tsv_limit = 150000}
, listParser = V4
, phyloName = "phylo_profile_test"
, phyloScale = 2
, similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2}
, seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1}
, defaultMode = True
, findAncestors = False
, phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups}
, phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3}
, timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5}
, clique = MaxClique {_mcl_size = 5, _mcl_threshold = 1.0e-4, _mcl_filter = ByThreshold}
, exportLabel = [ BranchLabel {_branch_labelTagger = MostEmergentTfIdf, _branch_labelSize = 2}
, GroupLabel {_group_labelTagger = MostEmergentInclusive, _group_labelSize = 2}
]
, exportSort = ByHierarchy {_sort_order = Desc}
, exportFilter = [ByBranchSize {_branch_size = 3.0}]
}
phyloProfileCLI :: HasCallStack => IO ()
phyloProfileCLI = do
shelly $ escaping False $ withTmpDir $ \tdir -> do
curDir <- pwd
let output = curDir <> "/" <> "gargantext_profile_out.dot"
chdir tdir $ do
liftIO $ setLocaleEncoding utf8
bpaConfig <- liftIO $ getDataFileName "bench-data/phylo/bpa-config.json"
corpusPath' <- liftIO $ getDataFileName "bench-data/phylo/GarganText_DocsList-nodeId-185487.csv"
listPath' <- liftIO $ getDataFileName "bench-data/phylo/GarganText_NgramsList-185488.csv"
(Right config) <- fmap (\pcfg -> pcfg { outputPath = tdir
, corpusPath = corpusPath'
, listPath = listPath'
}) <$> liftIO (eitherDecodeFileStrict' bpaConfig)
mapList <- liftIO $ fileToList (listParser config) (listPath config)
corpus <- liftIO $ if (defaultMode config)
then fileToDocsDefault (corpusParser config) (corpusPath config) [Year 3 1 5,Month 3 1 5,Week 4 2 5] mapList
else fileToDocsAdvanced (corpusParser config) (corpusPath config) (timeUnit config) mapList
liftIO $ do
printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOComment (show (length $ nub $ concat $ map text corpus) <> " Size ngs_coterms")
printIOComment (show (length mapList) <> " Size ngs_terms List Map Ngrams")
printIOMsg "Reconstruct the phylo"
-- check the existing backup files
let backupPhyloWithoutLink = (outputPath config) <> "backupPhyloWithoutLink_" <> (configToSha BackupPhyloWithoutLink config) <> ".json"
let backupPhylo = (outputPath config) <> "backupPhylo_" <> (configToSha BackupPhylo config) <> ".json"
phyloWithoutLinkExists <- doesFileExist backupPhyloWithoutLink
phyloExists <- doesFileExist backupPhylo
-- reconstruct the phylo
phylo <- if phyloExists
then do
printIOMsg "Reconstruct the phylo from an existing file"
readPhylo backupPhylo
else do
if phyloWithoutLinkExists
then do
printIOMsg "Reconstruct the phylo from an existing file without links"
phyloWithoutLink <- readPhylo backupPhyloWithoutLink
writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithoutLink)
else do
printIOMsg "Reconstruct the phylo from scratch"
phyloWithoutLink <- pure $ toPhyloWithoutLink corpus config
writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithoutLink)
writePhylo backupPhylo phylo
printIOMsg "End of reconstruction, start the export"
let dot = toPhyloExport (setConfig config phylo)
dotToFile output dot
echo "Done."
phyloProfileCmd :: HasCallStack => Mod CommandFields CLI
phyloProfileCmd =
command "phylo-profile" (info (helper <*> fmap CLISub (pure CCMD_phylo_profile))
(progDesc "Helper to profile phylo code."))
......@@ -68,6 +68,7 @@ data CLICmd
| CCMD_init !InitArgs
| CCMD_invitations !InvitationsArgs
| CCMD_phylo !PhyloArgs
| CCMD_phylo_profile
deriving (Show, Eq)
data CLI =
......
......@@ -28,6 +28,7 @@ import CLI.Import (importCLI, importCmd)
import CLI.Init (initCLI, initCmd)
import CLI.Invitations (invitationsCLI, invitationsCmd)
import CLI.Phylo (phyloCLI, phyloCmd)
import CLI.Phylo.Profile (phyloProfileCLI, phyloProfileCmd)
runCLI :: CLI -> IO ()
runCLI = \case
......@@ -47,6 +48,8 @@ runCLI = \case
-> invitationsCLI args
CLISub (CCMD_phylo args)
-> phyloCLI args
CLISub CCMD_phylo_profile
-> phyloProfileCLI
main :: IO ()
main = runCLI =<< execParser opts
......@@ -64,5 +67,6 @@ allOptions = subparser (
importCmd <>
initCmd <>
invitationsCmd <>
phyloCmd
phyloCmd <>
phyloProfileCmd
)
......@@ -707,6 +707,7 @@ executable gargantext-cli
CLI.ObfuscateDB
CLI.Phylo
CLI.Phylo.Common
CLI.Phylo.Profile
CLI.Types
CLI.Utils
Paths_gargantext
......@@ -730,6 +731,7 @@ executable gargantext-cli
, parallel ^>= 3.2.2.0
, postgresql-simple ^>= 0.6.4
, protolude ^>= 0.3.3
, shelly
, split ^>= 0.2.3.4
, text ^>= 1.2.4.1
, time ^>= 1.9.3
......@@ -998,29 +1000,6 @@ benchmark garg-bench
if impl(ghc >= 8.6)
ghc-options: "-with-rtsopts=--nonmoving-gc"
executable gargantext-phylo-profile
main-is: Main.hs
other-modules:
Paths_gargantext
CLI.Phylo.Common
hs-source-dirs:
bin/gargantext-phylo-profile bin/gargantext-cli
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
base
, bytestring
, gargantext
, gargantext-prelude
, shelly
, text
, async
, cryptohash
, aeson
, split
, vector
, directory
default-language: GHC2021
executable garg-golden-file-diff
import:
defaults
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment