Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
160
Issues
160
List
Board
Labels
Milestones
Merge Requests
14
Merge Requests
14
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
f28aafdb
Commit
f28aafdb
authored
Jul 01, 2024
by
Alfredo Di Napoli
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Port phylo-profile to CLI
parent
1905b024
Pipeline
#6297
passed with stages
in 29 minutes and 11 seconds
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
126 additions
and
24 deletions
+126
-24
Profile.hs
bin/gargantext-cli/CLI/Phylo/Profile.hs
+118
-0
Types.hs
bin/gargantext-cli/CLI/Types.hs
+1
-0
Main.hs
bin/gargantext-cli/Main.hs
+5
-1
gargantext.cabal
gargantext.cabal
+2
-23
No files found.
bin/gargantext-cli/CLI/Phylo/Profile.hs
0 → 100644
View file @
f28aafdb
{-# LANGUAGE OverloadedStrings #-}
module
CLI.Phylo.Profile
where
import
CLI.Phylo.Common
import
Data.Aeson
import
Data.List
(
nub
)
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.API.Tools
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloWithoutLink
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
import
GHC.IO.Encoding
import
GHC.Stack
import
Paths_gargantext
import
Prelude
import
Shelly
hiding
(
command
)
import
System.Directory
import
Options.Applicative
import
CLI.Types
--------------
-- | Main | --
--------------
phyloConfig
::
FilePath
->
PhyloConfig
phyloConfig
outdir
=
PhyloConfig
{
corpusPath
=
"corpus.csv"
,
listPath
=
"list.csv"
,
outputPath
=
outdir
,
corpusParser
=
Tsv
{
_tsv_limit
=
150000
}
,
listParser
=
V4
,
phyloName
=
"phylo_profile_test"
,
phyloScale
=
2
,
similarity
=
WeightedLogJaccard
{
_wlj_sensibility
=
0.5
,
_wlj_minSharedNgrams
=
2
}
,
seaElevation
=
Constante
{
_cons_start
=
0.1
,
_cons_gap
=
0.1
}
,
defaultMode
=
True
,
findAncestors
=
False
,
phyloSynchrony
=
ByProximityThreshold
{
_bpt_threshold
=
0.5
,
_bpt_sensibility
=
0.0
,
_bpt_scope
=
AllBranches
,
_bpt_strategy
=
MergeAllGroups
}
,
phyloQuality
=
Quality
{
_qua_granularity
=
0.8
,
_qua_minBranch
=
3
}
,
timeUnit
=
Year
{
_year_period
=
3
,
_year_step
=
1
,
_year_matchingFrame
=
5
}
,
clique
=
MaxClique
{
_mcl_size
=
5
,
_mcl_threshold
=
1.0e-4
,
_mcl_filter
=
ByThreshold
}
,
exportLabel
=
[
BranchLabel
{
_branch_labelTagger
=
MostEmergentTfIdf
,
_branch_labelSize
=
2
}
,
GroupLabel
{
_group_labelTagger
=
MostEmergentInclusive
,
_group_labelSize
=
2
}
]
,
exportSort
=
ByHierarchy
{
_sort_order
=
Desc
}
,
exportFilter
=
[
ByBranchSize
{
_branch_size
=
3.0
}]
}
phyloProfileCLI
::
HasCallStack
=>
IO
()
phyloProfileCLI
=
do
shelly
$
escaping
False
$
withTmpDir
$
\
tdir
->
do
curDir
<-
pwd
let
output
=
curDir
<>
"/"
<>
"gargantext_profile_out.dot"
chdir
tdir
$
do
liftIO
$
setLocaleEncoding
utf8
bpaConfig
<-
liftIO
$
getDataFileName
"bench-data/phylo/bpa-config.json"
corpusPath'
<-
liftIO
$
getDataFileName
"bench-data/phylo/GarganText_DocsList-nodeId-185487.csv"
listPath'
<-
liftIO
$
getDataFileName
"bench-data/phylo/GarganText_NgramsList-185488.csv"
(
Right
config
)
<-
fmap
(
\
pcfg
->
pcfg
{
outputPath
=
tdir
,
corpusPath
=
corpusPath'
,
listPath
=
listPath'
})
<$>
liftIO
(
eitherDecodeFileStrict'
bpaConfig
)
mapList
<-
liftIO
$
fileToList
(
listParser
config
)
(
listPath
config
)
corpus
<-
liftIO
$
if
(
defaultMode
config
)
then
fileToDocsDefault
(
corpusParser
config
)
(
corpusPath
config
)
[
Year
3
1
5
,
Month
3
1
5
,
Week
4
2
5
]
mapList
else
fileToDocsAdvanced
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
liftIO
$
do
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOComment
(
show
(
length
$
nub
$
concat
$
map
text
corpus
)
<>
" Size ngs_coterms"
)
printIOComment
(
show
(
length
mapList
)
<>
" Size ngs_terms List Map Ngrams"
)
printIOMsg
"Reconstruct the phylo"
-- check the existing backup files
let
backupPhyloWithoutLink
=
(
outputPath
config
)
<>
"backupPhyloWithoutLink_"
<>
(
configToSha
BackupPhyloWithoutLink
config
)
<>
".json"
let
backupPhylo
=
(
outputPath
config
)
<>
"backupPhylo_"
<>
(
configToSha
BackupPhylo
config
)
<>
".json"
phyloWithoutLinkExists
<-
doesFileExist
backupPhyloWithoutLink
phyloExists
<-
doesFileExist
backupPhylo
-- reconstruct the phylo
phylo
<-
if
phyloExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file"
readPhylo
backupPhylo
else
do
if
phyloWithoutLinkExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file without links"
phyloWithoutLink
<-
readPhylo
backupPhyloWithoutLink
writePhylo
backupPhyloWithoutLink
phyloWithoutLink
pure
$
toPhylo
(
setConfig
config
phyloWithoutLink
)
else
do
printIOMsg
"Reconstruct the phylo from scratch"
phyloWithoutLink
<-
pure
$
toPhyloWithoutLink
corpus
config
writePhylo
backupPhyloWithoutLink
phyloWithoutLink
pure
$
toPhylo
(
setConfig
config
phyloWithoutLink
)
writePhylo
backupPhylo
phylo
printIOMsg
"End of reconstruction, start the export"
let
dot
=
toPhyloExport
(
setConfig
config
phylo
)
dotToFile
output
dot
echo
"Done."
phyloProfileCmd
::
HasCallStack
=>
Mod
CommandFields
CLI
phyloProfileCmd
=
command
"phylo-profile"
(
info
(
helper
<*>
fmap
CLISub
(
pure
CCMD_phylo_profile
))
(
progDesc
"Helper to profile phylo code."
))
bin/gargantext-cli/CLI/Types.hs
View file @
f28aafdb
...
...
@@ -68,6 +68,7 @@ data CLICmd
|
CCMD_init
!
InitArgs
|
CCMD_invitations
!
InvitationsArgs
|
CCMD_phylo
!
PhyloArgs
|
CCMD_phylo_profile
deriving
(
Show
,
Eq
)
data
CLI
=
...
...
bin/gargantext-cli/Main.hs
View file @
f28aafdb
...
...
@@ -28,6 +28,7 @@ import CLI.Import (importCLI, importCmd)
import
CLI.Init
(
initCLI
,
initCmd
)
import
CLI.Invitations
(
invitationsCLI
,
invitationsCmd
)
import
CLI.Phylo
(
phyloCLI
,
phyloCmd
)
import
CLI.Phylo.Profile
(
phyloProfileCLI
,
phyloProfileCmd
)
runCLI
::
CLI
->
IO
()
runCLI
=
\
case
...
...
@@ -47,6 +48,8 @@ runCLI = \case
->
invitationsCLI
args
CLISub
(
CCMD_phylo
args
)
->
phyloCLI
args
CLISub
CCMD_phylo_profile
->
phyloProfileCLI
main
::
IO
()
main
=
runCLI
=<<
execParser
opts
...
...
@@ -64,5 +67,6 @@ allOptions = subparser (
importCmd
<>
initCmd
<>
invitationsCmd
<>
phyloCmd
phyloCmd
<>
phyloProfileCmd
)
gargantext.cabal
View file @
f28aafdb
...
...
@@ -707,6 +707,7 @@ executable gargantext-cli
CLI.ObfuscateDB
CLI.Phylo
CLI.Phylo.Common
CLI.Phylo.Profile
CLI.Types
CLI.Utils
Paths_gargantext
...
...
@@ -730,6 +731,7 @@ executable gargantext-cli
, parallel ^>= 3.2.2.0
, postgresql-simple ^>= 0.6.4
, protolude ^>= 0.3.3
, shelly
, split ^>= 0.2.3.4
, text ^>= 1.2.4.1
, time ^>= 1.9.3
...
...
@@ -998,29 +1000,6 @@ benchmark garg-bench
if impl(ghc >= 8.6)
ghc-options: "-with-rtsopts=--nonmoving-gc"
executable gargantext-phylo-profile
main-is: Main.hs
other-modules:
Paths_gargantext
CLI.Phylo.Common
hs-source-dirs:
bin/gargantext-phylo-profile bin/gargantext-cli
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
base
, bytestring
, gargantext
, gargantext-prelude
, shelly
, text
, async
, cryptohash
, aeson
, split
, vector
, directory
default-language: GHC2021
executable garg-golden-file-diff
import:
defaults
...
...
Przemyslaw Kaminski
@cgenie
mentioned in commit
5660aec0
·
Oct 08, 2024
mentioned in commit
5660aec0
mentioned in commit 5660aec07ec5a0a0a5468f440092c1a8f57a864e
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment