Commit f28aafdb authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Port phylo-profile to CLI

parent 1905b024
{-# LANGUAGE OverloadedStrings #-}
module CLI.Phylo.Profile where
import CLI.Phylo.Common
import Data.Aeson
import Data.List (nub)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools
import GHC.IO.Encoding
import GHC.Stack
import Paths_gargantext
import Prelude
import Shelly hiding (command)
import System.Directory
import Options.Applicative
import CLI.Types
--------------
-- | Main | --
--------------
phyloConfig :: FilePath -> PhyloConfig
phyloConfig outdir = PhyloConfig {
corpusPath = "corpus.csv"
, listPath = "list.csv"
, outputPath = outdir
, corpusParser = Tsv {_tsv_limit = 150000}
, listParser = V4
, phyloName = "phylo_profile_test"
, phyloScale = 2
, similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2}
, seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1}
, defaultMode = True
, findAncestors = False
, phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups}
, phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3}
, timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5}
, clique = MaxClique {_mcl_size = 5, _mcl_threshold = 1.0e-4, _mcl_filter = ByThreshold}
, exportLabel = [ BranchLabel {_branch_labelTagger = MostEmergentTfIdf, _branch_labelSize = 2}
, GroupLabel {_group_labelTagger = MostEmergentInclusive, _group_labelSize = 2}
]
, exportSort = ByHierarchy {_sort_order = Desc}
, exportFilter = [ByBranchSize {_branch_size = 3.0}]
}
phyloProfileCLI :: HasCallStack => IO ()
phyloProfileCLI = do
shelly $ escaping False $ withTmpDir $ \tdir -> do
curDir <- pwd
let output = curDir <> "/" <> "gargantext_profile_out.dot"
chdir tdir $ do
liftIO $ setLocaleEncoding utf8
bpaConfig <- liftIO $ getDataFileName "bench-data/phylo/bpa-config.json"
corpusPath' <- liftIO $ getDataFileName "bench-data/phylo/GarganText_DocsList-nodeId-185487.csv"
listPath' <- liftIO $ getDataFileName "bench-data/phylo/GarganText_NgramsList-185488.csv"
(Right config) <- fmap (\pcfg -> pcfg { outputPath = tdir
, corpusPath = corpusPath'
, listPath = listPath'
}) <$> liftIO (eitherDecodeFileStrict' bpaConfig)
mapList <- liftIO $ fileToList (listParser config) (listPath config)
corpus <- liftIO $ if (defaultMode config)
then fileToDocsDefault (corpusParser config) (corpusPath config) [Year 3 1 5,Month 3 1 5,Week 4 2 5] mapList
else fileToDocsAdvanced (corpusParser config) (corpusPath config) (timeUnit config) mapList
liftIO $ do
printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOComment (show (length $ nub $ concat $ map text corpus) <> " Size ngs_coterms")
printIOComment (show (length mapList) <> " Size ngs_terms List Map Ngrams")
printIOMsg "Reconstruct the phylo"
-- check the existing backup files
let backupPhyloWithoutLink = (outputPath config) <> "backupPhyloWithoutLink_" <> (configToSha BackupPhyloWithoutLink config) <> ".json"
let backupPhylo = (outputPath config) <> "backupPhylo_" <> (configToSha BackupPhylo config) <> ".json"
phyloWithoutLinkExists <- doesFileExist backupPhyloWithoutLink
phyloExists <- doesFileExist backupPhylo
-- reconstruct the phylo
phylo <- if phyloExists
then do
printIOMsg "Reconstruct the phylo from an existing file"
readPhylo backupPhylo
else do
if phyloWithoutLinkExists
then do
printIOMsg "Reconstruct the phylo from an existing file without links"
phyloWithoutLink <- readPhylo backupPhyloWithoutLink
writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithoutLink)
else do
printIOMsg "Reconstruct the phylo from scratch"
phyloWithoutLink <- pure $ toPhyloWithoutLink corpus config
writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithoutLink)
writePhylo backupPhylo phylo
printIOMsg "End of reconstruction, start the export"
let dot = toPhyloExport (setConfig config phylo)
dotToFile output dot
echo "Done."
phyloProfileCmd :: HasCallStack => Mod CommandFields CLI
phyloProfileCmd =
command "phylo-profile" (info (helper <*> fmap CLISub (pure CCMD_phylo_profile))
(progDesc "Helper to profile phylo code."))
...@@ -68,6 +68,7 @@ data CLICmd ...@@ -68,6 +68,7 @@ data CLICmd
| CCMD_init !InitArgs | CCMD_init !InitArgs
| CCMD_invitations !InvitationsArgs | CCMD_invitations !InvitationsArgs
| CCMD_phylo !PhyloArgs | CCMD_phylo !PhyloArgs
| CCMD_phylo_profile
deriving (Show, Eq) deriving (Show, Eq)
data CLI = data CLI =
......
...@@ -28,6 +28,7 @@ import CLI.Import (importCLI, importCmd) ...@@ -28,6 +28,7 @@ import CLI.Import (importCLI, importCmd)
import CLI.Init (initCLI, initCmd) import CLI.Init (initCLI, initCmd)
import CLI.Invitations (invitationsCLI, invitationsCmd) import CLI.Invitations (invitationsCLI, invitationsCmd)
import CLI.Phylo (phyloCLI, phyloCmd) import CLI.Phylo (phyloCLI, phyloCmd)
import CLI.Phylo.Profile (phyloProfileCLI, phyloProfileCmd)
runCLI :: CLI -> IO () runCLI :: CLI -> IO ()
runCLI = \case runCLI = \case
...@@ -47,6 +48,8 @@ runCLI = \case ...@@ -47,6 +48,8 @@ runCLI = \case
-> invitationsCLI args -> invitationsCLI args
CLISub (CCMD_phylo args) CLISub (CCMD_phylo args)
-> phyloCLI args -> phyloCLI args
CLISub CCMD_phylo_profile
-> phyloProfileCLI
main :: IO () main :: IO ()
main = runCLI =<< execParser opts main = runCLI =<< execParser opts
...@@ -64,5 +67,6 @@ allOptions = subparser ( ...@@ -64,5 +67,6 @@ allOptions = subparser (
importCmd <> importCmd <>
initCmd <> initCmd <>
invitationsCmd <> invitationsCmd <>
phyloCmd phyloCmd <>
phyloProfileCmd
) )
...@@ -707,6 +707,7 @@ executable gargantext-cli ...@@ -707,6 +707,7 @@ executable gargantext-cli
CLI.ObfuscateDB CLI.ObfuscateDB
CLI.Phylo CLI.Phylo
CLI.Phylo.Common CLI.Phylo.Common
CLI.Phylo.Profile
CLI.Types CLI.Types
CLI.Utils CLI.Utils
Paths_gargantext Paths_gargantext
...@@ -730,6 +731,7 @@ executable gargantext-cli ...@@ -730,6 +731,7 @@ executable gargantext-cli
, parallel ^>= 3.2.2.0 , parallel ^>= 3.2.2.0
, postgresql-simple ^>= 0.6.4 , postgresql-simple ^>= 0.6.4
, protolude ^>= 0.3.3 , protolude ^>= 0.3.3
, shelly
, split ^>= 0.2.3.4 , split ^>= 0.2.3.4
, text ^>= 1.2.4.1 , text ^>= 1.2.4.1
, time ^>= 1.9.3 , time ^>= 1.9.3
...@@ -998,29 +1000,6 @@ benchmark garg-bench ...@@ -998,29 +1000,6 @@ benchmark garg-bench
if impl(ghc >= 8.6) if impl(ghc >= 8.6)
ghc-options: "-with-rtsopts=--nonmoving-gc" ghc-options: "-with-rtsopts=--nonmoving-gc"
executable gargantext-phylo-profile
main-is: Main.hs
other-modules:
Paths_gargantext
CLI.Phylo.Common
hs-source-dirs:
bin/gargantext-phylo-profile bin/gargantext-cli
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
base
, bytestring
, gargantext
, gargantext-prelude
, shelly
, text
, async
, cryptohash
, aeson
, split
, vector
, directory
default-language: GHC2021
executable garg-golden-file-diff executable garg-golden-file-diff
import: import:
defaults defaults
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment