Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Grégoire Locqueville
haskell-gargantext
Commits
f28aafdb
Commit
f28aafdb
authored
Jul 01, 2024
by
Alfredo Di Napoli
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Port phylo-profile to CLI
parent
1905b024
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
126 additions
and
24 deletions
+126
-24
Profile.hs
bin/gargantext-cli/CLI/Phylo/Profile.hs
+118
-0
Types.hs
bin/gargantext-cli/CLI/Types.hs
+1
-0
Main.hs
bin/gargantext-cli/Main.hs
+5
-1
gargantext.cabal
gargantext.cabal
+2
-23
No files found.
bin/gargantext-cli/CLI/Phylo/Profile.hs
0 → 100644
View file @
f28aafdb
{-# LANGUAGE OverloadedStrings #-}
module
CLI.Phylo.Profile
where
import
CLI.Phylo.Common
import
Data.Aeson
import
Data.List
(
nub
)
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.API.Tools
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloWithoutLink
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
import
GHC.IO.Encoding
import
GHC.Stack
import
Paths_gargantext
import
Prelude
import
Shelly
hiding
(
command
)
import
System.Directory
import
Options.Applicative
import
CLI.Types
--------------
-- | Main | --
--------------
phyloConfig
::
FilePath
->
PhyloConfig
phyloConfig
outdir
=
PhyloConfig
{
corpusPath
=
"corpus.csv"
,
listPath
=
"list.csv"
,
outputPath
=
outdir
,
corpusParser
=
Tsv
{
_tsv_limit
=
150000
}
,
listParser
=
V4
,
phyloName
=
"phylo_profile_test"
,
phyloScale
=
2
,
similarity
=
WeightedLogJaccard
{
_wlj_sensibility
=
0.5
,
_wlj_minSharedNgrams
=
2
}
,
seaElevation
=
Constante
{
_cons_start
=
0.1
,
_cons_gap
=
0.1
}
,
defaultMode
=
True
,
findAncestors
=
False
,
phyloSynchrony
=
ByProximityThreshold
{
_bpt_threshold
=
0.5
,
_bpt_sensibility
=
0.0
,
_bpt_scope
=
AllBranches
,
_bpt_strategy
=
MergeAllGroups
}
,
phyloQuality
=
Quality
{
_qua_granularity
=
0.8
,
_qua_minBranch
=
3
}
,
timeUnit
=
Year
{
_year_period
=
3
,
_year_step
=
1
,
_year_matchingFrame
=
5
}
,
clique
=
MaxClique
{
_mcl_size
=
5
,
_mcl_threshold
=
1.0e-4
,
_mcl_filter
=
ByThreshold
}
,
exportLabel
=
[
BranchLabel
{
_branch_labelTagger
=
MostEmergentTfIdf
,
_branch_labelSize
=
2
}
,
GroupLabel
{
_group_labelTagger
=
MostEmergentInclusive
,
_group_labelSize
=
2
}
]
,
exportSort
=
ByHierarchy
{
_sort_order
=
Desc
}
,
exportFilter
=
[
ByBranchSize
{
_branch_size
=
3.0
}]
}
phyloProfileCLI
::
HasCallStack
=>
IO
()
phyloProfileCLI
=
do
shelly
$
escaping
False
$
withTmpDir
$
\
tdir
->
do
curDir
<-
pwd
let
output
=
curDir
<>
"/"
<>
"gargantext_profile_out.dot"
chdir
tdir
$
do
liftIO
$
setLocaleEncoding
utf8
bpaConfig
<-
liftIO
$
getDataFileName
"bench-data/phylo/bpa-config.json"
corpusPath'
<-
liftIO
$
getDataFileName
"bench-data/phylo/GarganText_DocsList-nodeId-185487.csv"
listPath'
<-
liftIO
$
getDataFileName
"bench-data/phylo/GarganText_NgramsList-185488.csv"
(
Right
config
)
<-
fmap
(
\
pcfg
->
pcfg
{
outputPath
=
tdir
,
corpusPath
=
corpusPath'
,
listPath
=
listPath'
})
<$>
liftIO
(
eitherDecodeFileStrict'
bpaConfig
)
mapList
<-
liftIO
$
fileToList
(
listParser
config
)
(
listPath
config
)
corpus
<-
liftIO
$
if
(
defaultMode
config
)
then
fileToDocsDefault
(
corpusParser
config
)
(
corpusPath
config
)
[
Year
3
1
5
,
Month
3
1
5
,
Week
4
2
5
]
mapList
else
fileToDocsAdvanced
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
liftIO
$
do
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOComment
(
show
(
length
$
nub
$
concat
$
map
text
corpus
)
<>
" Size ngs_coterms"
)
printIOComment
(
show
(
length
mapList
)
<>
" Size ngs_terms List Map Ngrams"
)
printIOMsg
"Reconstruct the phylo"
-- check the existing backup files
let
backupPhyloWithoutLink
=
(
outputPath
config
)
<>
"backupPhyloWithoutLink_"
<>
(
configToSha
BackupPhyloWithoutLink
config
)
<>
".json"
let
backupPhylo
=
(
outputPath
config
)
<>
"backupPhylo_"
<>
(
configToSha
BackupPhylo
config
)
<>
".json"
phyloWithoutLinkExists
<-
doesFileExist
backupPhyloWithoutLink
phyloExists
<-
doesFileExist
backupPhylo
-- reconstruct the phylo
phylo
<-
if
phyloExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file"
readPhylo
backupPhylo
else
do
if
phyloWithoutLinkExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file without links"
phyloWithoutLink
<-
readPhylo
backupPhyloWithoutLink
writePhylo
backupPhyloWithoutLink
phyloWithoutLink
pure
$
toPhylo
(
setConfig
config
phyloWithoutLink
)
else
do
printIOMsg
"Reconstruct the phylo from scratch"
phyloWithoutLink
<-
pure
$
toPhyloWithoutLink
corpus
config
writePhylo
backupPhyloWithoutLink
phyloWithoutLink
pure
$
toPhylo
(
setConfig
config
phyloWithoutLink
)
writePhylo
backupPhylo
phylo
printIOMsg
"End of reconstruction, start the export"
let
dot
=
toPhyloExport
(
setConfig
config
phylo
)
dotToFile
output
dot
echo
"Done."
phyloProfileCmd
::
HasCallStack
=>
Mod
CommandFields
CLI
phyloProfileCmd
=
command
"phylo-profile"
(
info
(
helper
<*>
fmap
CLISub
(
pure
CCMD_phylo_profile
))
(
progDesc
"Helper to profile phylo code."
))
bin/gargantext-cli/CLI/Types.hs
View file @
f28aafdb
...
...
@@ -68,6 +68,7 @@ data CLICmd
|
CCMD_init
!
InitArgs
|
CCMD_invitations
!
InvitationsArgs
|
CCMD_phylo
!
PhyloArgs
|
CCMD_phylo_profile
deriving
(
Show
,
Eq
)
data
CLI
=
...
...
bin/gargantext-cli/Main.hs
View file @
f28aafdb
...
...
@@ -28,6 +28,7 @@ import CLI.Import (importCLI, importCmd)
import
CLI.Init
(
initCLI
,
initCmd
)
import
CLI.Invitations
(
invitationsCLI
,
invitationsCmd
)
import
CLI.Phylo
(
phyloCLI
,
phyloCmd
)
import
CLI.Phylo.Profile
(
phyloProfileCLI
,
phyloProfileCmd
)
runCLI
::
CLI
->
IO
()
runCLI
=
\
case
...
...
@@ -47,6 +48,8 @@ runCLI = \case
->
invitationsCLI
args
CLISub
(
CCMD_phylo
args
)
->
phyloCLI
args
CLISub
CCMD_phylo_profile
->
phyloProfileCLI
main
::
IO
()
main
=
runCLI
=<<
execParser
opts
...
...
@@ -64,5 +67,6 @@ allOptions = subparser (
importCmd
<>
initCmd
<>
invitationsCmd
<>
phyloCmd
phyloCmd
<>
phyloProfileCmd
)
gargantext.cabal
View file @
f28aafdb
...
...
@@ -707,6 +707,7 @@ executable gargantext-cli
CLI.ObfuscateDB
CLI.Phylo
CLI.Phylo.Common
CLI.Phylo.Profile
CLI.Types
CLI.Utils
Paths_gargantext
...
...
@@ -730,6 +731,7 @@ executable gargantext-cli
, parallel ^>= 3.2.2.0
, postgresql-simple ^>= 0.6.4
, protolude ^>= 0.3.3
, shelly
, split ^>= 0.2.3.4
, text ^>= 1.2.4.1
, time ^>= 1.9.3
...
...
@@ -998,29 +1000,6 @@ benchmark garg-bench
if impl(ghc >= 8.6)
ghc-options: "-with-rtsopts=--nonmoving-gc"
executable gargantext-phylo-profile
main-is: Main.hs
other-modules:
Paths_gargantext
CLI.Phylo.Common
hs-source-dirs:
bin/gargantext-phylo-profile bin/gargantext-cli
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
base
, bytestring
, gargantext
, gargantext-prelude
, shelly
, text
, async
, cryptohash
, aeson
, split
, vector
, directory
default-language: GHC2021
executable garg-golden-file-diff
import:
defaults
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment