Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
160
Issues
160
List
Board
Labels
Milestones
Merge Requests
14
Merge Requests
14
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
1905b024
Commit
1905b024
authored
Jul 01, 2024
by
Alfredo Di Napoli
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Port phylo to CLI executable
parent
409c8423
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
131 additions
and
172 deletions
+131
-172
Phylo.hs
bin/gargantext-cli/CLI/Phylo.hs
+104
-0
Common.hs
bin/gargantext-cli/CLI/Phylo/Common.hs
+5
-1
Types.hs
bin/gargantext-cli/CLI/Types.hs
+5
-0
Main.hs
bin/gargantext-cli/Main.hs
+5
-1
Main.hs
bin/gargantext-phylo-profile/Main.hs
+2
-3
Main.hs
bin/gargantext-phylo/Main.hs
+0
-117
gargantext.cabal
gargantext.cabal
+9
-49
Phylo.hs
test/Test/Offline/Phylo.hs
+1
-1
No files found.
bin/gargantext-cli/CLI/Phylo.hs
0 → 100644
View file @
1905b024
{-|
Module : Phylo.hs
Description : Gargantext starter binary with Adaptative Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Adaptative Phylo binaries
-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE Strict #-}
module
CLI.Phylo
where
import
CLI.Phylo.Common
import
CLI.Types
import
Data.Aeson
(
eitherDecodeFileStrict'
)
import
Data.List
(
nub
)
import
Data.Text
qualified
as
T
import
GHC.IO.Encoding
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.API.Tools
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloWithoutLink
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
)
import
Gargantext.Prelude
hiding
(
hash
,
replace
)
import
Options.Applicative
import
System.Directory
(
doesFileExist
)
phyloCLI
::
PhyloArgs
->
IO
()
phyloCLI
(
PhyloArgs
configPath
)
=
do
setLocaleEncoding
utf8
config_e
<-
eitherDecodeFileStrict'
configPath
case
config_e
of
Left
err
->
panicTrace
$
T
.
pack
err
Right
config
->
do
currentLocale
<-
getLocaleEncoding
printIOMsg
$
"Machine locale: "
<>
show
currentLocale
printIOMsg
"Starting the reconstruction"
printIOMsg
"Parse the corpus"
mapList
<-
fileToList
(
listParser
config
)
(
listPath
config
)
corpus
<-
if
(
defaultMode
config
)
then
fileToDocsDefault
(
corpusParser
config
)
(
corpusPath
config
)
[
Year
3
1
5
,
Month
3
1
5
,
Week
4
2
5
]
mapList
else
fileToDocsAdvanced
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOComment
(
show
(
length
$
nub
$
concat
$
map
text
corpus
)
<>
" Size ngs_coterms"
)
printIOComment
(
show
(
length
mapList
)
<>
" Size ngs_terms List Map Ngrams"
)
printIOMsg
"Reconstruct the phylo"
-- check the existing backup files
let
backupPhyloWithoutLink
=
(
outputPath
config
)
<>
"backupPhyloWithoutLink_"
<>
(
configToSha
BackupPhyloWithoutLink
config
)
<>
".json"
let
backupPhylo
=
(
outputPath
config
)
<>
"backupPhylo_"
<>
(
configToSha
BackupPhylo
config
)
<>
".json"
phyloWithoutLinkExists
<-
doesFileExist
backupPhyloWithoutLink
phyloExists
<-
doesFileExist
backupPhylo
-- reconstruct the phylo
phylo
<-
if
phyloExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file"
readPhylo
backupPhylo
else
do
if
phyloWithoutLinkExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file without links"
phyloWithoutLink
<-
readPhylo
backupPhyloWithoutLink
writePhylo
backupPhyloWithoutLink
phyloWithoutLink
pure
$
toPhylo
(
setConfig
config
phyloWithoutLink
)
else
do
printIOMsg
"Reconstruct the phylo from scratch"
phyloWithoutLink
<-
pure
$
toPhyloWithoutLink
corpus
config
writePhylo
backupPhyloWithoutLink
phyloWithoutLink
pure
$
toPhylo
(
setConfig
config
phyloWithoutLink
)
writePhylo
backupPhylo
phylo
printIOMsg
"End of reconstruction, start the export"
let
dot
=
toPhyloExport
(
setConfig
config
phylo
)
let
output
=
configToLabel
config
dotToFile
output
dot
phyloCmd
::
HasCallStack
=>
Mod
CommandFields
CLI
phyloCmd
=
command
"phylo"
(
info
(
helper
<*>
fmap
CLISub
phylo_p
)
(
progDesc
"Phylo toolkit."
))
phylo_p
::
Parser
CLICmd
phylo_p
=
fmap
CCMD_phylo
$
PhyloArgs
<$>
(
strOption
(
long
"config"
<>
metavar
"FILEPATH"
<>
help
"Path to a file containing a JSON to be parsed into a PhyloConfig"
)
)
bin/gargantext-
phylo
/Phylo/Common.hs
→
bin/gargantext-
cli/CLI
/Phylo/Common.hs
View file @
1905b024
{-# LANGUAGE OverloadedStrings #-}
module
Common
where
module
C
LI.Phylo.C
ommon
where
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Crypto.Hash.SHA256
(
hash
)
...
...
@@ -99,6 +99,8 @@ tsvToDocs parser patterns time path =
(
map
(
T
.
strip
.
pack
)
$
splitOn
";"
(
unpack
$
(
tsv'_source
row
)))
time
)
<$>
snd
<$>
Tsv
.
readWeightedTsv
path
Csv
_
->
panicTrace
"CSV is currently not supported."
Csv'
_
->
panicTrace
"CSV is currently not supported."
-- To parse a file into a list of Document
...
...
@@ -109,6 +111,8 @@ fileToDocsAdvanced parser path time lst = do
Wos
limit
->
wosToDocs
limit
patterns
time
path
Tsv
_
->
tsvToDocs
parser
patterns
time
path
Tsv'
_
->
tsvToDocs
parser
patterns
time
path
Csv
_
->
panicTrace
"CSV is currently not supported."
Csv'
_
->
panicTrace
"CSV is currently not supported."
fileToDocsDefault
::
CorpusParser
->
FilePath
->
[
TimeUnit
]
->
TermList
->
IO
[
Document
]
fileToDocsDefault
parser
path
timeUnits
lst
=
...
...
bin/gargantext-cli/CLI/Types.hs
View file @
1905b024
...
...
@@ -55,6 +55,10 @@ data InvitationsArgs = InvitationsArgs
,
inv_email
::
!
Text
}
deriving
(
Show
,
Eq
)
data
PhyloArgs
=
PhyloArgs
{
phylo_config
::
!
FilePath
}
deriving
(
Show
,
Eq
)
data
CLICmd
=
CCMD_clean_csv_corpus
|
CCMD_filter_terms_and_cooc
!
CorpusFile
!
TermListFile
!
OutputFile
...
...
@@ -63,6 +67,7 @@ data CLICmd
|
CCMD_import
!
ImportArgs
|
CCMD_init
!
InitArgs
|
CCMD_invitations
!
InvitationsArgs
|
CCMD_phylo
!
PhyloArgs
deriving
(
Show
,
Eq
)
data
CLI
=
...
...
bin/gargantext-cli/Main.hs
View file @
1905b024
...
...
@@ -27,6 +27,7 @@ import CLI.Admin (adminCLI, adminCmd)
import
CLI.Import
(
importCLI
,
importCmd
)
import
CLI.Init
(
initCLI
,
initCmd
)
import
CLI.Invitations
(
invitationsCLI
,
invitationsCmd
)
import
CLI.Phylo
(
phyloCLI
,
phyloCmd
)
runCLI
::
CLI
->
IO
()
runCLI
=
\
case
...
...
@@ -44,6 +45,8 @@ runCLI = \case
->
initCLI
args
CLISub
(
CCMD_invitations
args
)
->
invitationsCLI
args
CLISub
(
CCMD_phylo
args
)
->
phyloCLI
args
main
::
IO
()
main
=
runCLI
=<<
execParser
opts
...
...
@@ -60,5 +63,6 @@ allOptions = subparser (
adminCmd
<>
importCmd
<>
initCmd
<>
invitationsCmd
invitationsCmd
<>
phyloCmd
)
bin/gargantext-phylo-profile/Main.hs
View file @
1905b024
{-# LANGUAGE OverloadedStrings #-}
module
Main
where
import
Common
import
C
LI.Phylo.C
ommon
import
Data.Aeson
import
Data.List
(
nub
)
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.API.Tools
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloWithoutLink
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
,
toPeriods
,
getTimePeriod
,
getTimeStep
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
import
GHC.IO.Encoding
import
GHC.Stack
import
Paths_gargantext
import
Prelude
import
qualified
Data.Text
as
T
import
Shelly
import
System.Directory
...
...
bin/gargantext-phylo/Main.hs
deleted
100644 → 0
View file @
409c8423
{-|
Module : Main.hs
Description : Gargantext starter binary with Adaptative Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Adaptative Phylo binaries
-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE Strict #-}
module
Main
where
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Crypto.Hash.SHA256
(
hash
)
import
Data.Aeson
import
Data.ByteString.Char8
qualified
as
C8
import
Data.List
(
nub
,
isSuffixOf
,
tail
)
import
Data.List.Split
import
Data.Maybe
(
fromJust
)
import
Data.Text
(
unpack
,
replace
,
pack
)
import
Data.Text
qualified
as
T
import
Data.Vector
qualified
as
Vector
import
GHC.IO.Encoding
import
Gargantext.API.Ngrams.Prelude
(
toTermList
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
FileType
(
..
),
parseFile
)
import
Gargantext.Core.Text.Corpus.Parsers.TSV
(
tsv_title
,
tsv_abstract
,
tsv_publication_year
,
tsv_publication_month
,
tsv_publication_day
,
tsv'_source
,
tsv'_title
,
tsv'_abstract
,
tsv'_publication_year
,
tsv'_publication_month
,
tsv'_publication_day
,
tsv'_weight
)
import
Gargantext.Core.Text.Corpus.Parsers.TSV
qualified
as
Tsv
import
Gargantext.Core.Text.List.Formats.TSV
(
tsvMapTermList
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.API.Tools
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloWithoutLink
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
,
toPeriods
,
getTimePeriod
,
getTimeStep
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
hiding
(
hash
,
replace
)
import
Prelude
qualified
import
System.Directory
(
listDirectory
,
doesFileExist
)
import
Common
main
::
IO
()
main
=
do
setLocaleEncoding
utf8
currentLocale
<-
getLocaleEncoding
printIOMsg
$
"Machine locale: "
<>
show
currentLocale
printIOMsg
"Starting the reconstruction"
printIOMsg
"Read the configuration file"
[
args
]
<-
getArgs
jsonArgs
<-
(
eitherDecode
<$>
readJson
args
)
::
IO
(
Either
Prelude
.
String
PhyloConfig
)
case
jsonArgs
of
Left
err
->
putStrLn
err
Right
config
->
do
printIOMsg
"Parse the corpus"
mapList
<-
fileToList
(
listParser
config
)
(
listPath
config
)
corpus
<-
if
(
defaultMode
config
)
then
fileToDocsDefault
(
corpusParser
config
)
(
corpusPath
config
)
[
Year
3
1
5
,
Month
3
1
5
,
Week
4
2
5
]
mapList
else
fileToDocsAdvanced
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOComment
(
show
(
length
$
nub
$
concat
$
map
text
corpus
)
<>
" Size ngs_coterms"
)
printIOComment
(
show
(
length
mapList
)
<>
" Size ngs_terms List Map Ngrams"
)
printIOMsg
"Reconstruct the phylo"
-- check the existing backup files
let
backupPhyloWithoutLink
=
(
outputPath
config
)
<>
"backupPhyloWithoutLink_"
<>
(
configToSha
BackupPhyloWithoutLink
config
)
<>
".json"
let
backupPhylo
=
(
outputPath
config
)
<>
"backupPhylo_"
<>
(
configToSha
BackupPhylo
config
)
<>
".json"
phyloWithoutLinkExists
<-
doesFileExist
backupPhyloWithoutLink
phyloExists
<-
doesFileExist
backupPhylo
-- reconstruct the phylo
phylo
<-
if
phyloExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file"
readPhylo
backupPhylo
else
do
if
phyloWithoutLinkExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file without links"
phyloWithoutLink
<-
readPhylo
backupPhyloWithoutLink
writePhylo
backupPhyloWithoutLink
phyloWithoutLink
pure
$
toPhylo
(
setConfig
config
phyloWithoutLink
)
else
do
printIOMsg
"Reconstruct the phylo from scratch"
phyloWithoutLink
<-
pure
$
toPhyloWithoutLink
corpus
config
writePhylo
backupPhyloWithoutLink
phyloWithoutLink
pure
$
toPhylo
(
setConfig
config
phyloWithoutLink
)
writePhylo
backupPhylo
phylo
printIOMsg
"End of reconstruction, start the export"
let
dot
=
toPhyloExport
(
setConfig
config
phylo
)
let
output
=
configToLabel
config
dotToFile
output
dot
gargantext.cabal
View file @
1905b024
...
...
@@ -705,6 +705,8 @@ executable gargantext-cli
CLI.Init
CLI.Invitations
CLI.ObfuscateDB
CLI.Phylo
CLI.Phylo.Common
CLI.Types
CLI.Utils
Paths_gargantext
...
...
@@ -716,6 +718,8 @@ executable gargantext-cli
, bytestring ^>= 0.10.12.0
, cassava ^>= 0.5.2.0
, containers ^>= 0.6.5.1
, cryptohash ^>= 0.11.9
, directory ^>= 1.3.6.0
, extra ^>= 1.7.9
, full-text-search ^>= 0.2.1.4
, gargantext
...
...
@@ -723,55 +727,11 @@ executable gargantext-cli
, ini ^>= 0.4.1
, optparse-applicative
, optparse-generic ^>= 1.4.7
, parallel ^>= 3.2.2.0
, postgresql-simple ^>= 0.6.4
, protolude ^>= 0.3.3
, split ^>= 0.2.3.4
, text ^>= 1.2.4.1
, unordered-containers ^>= 0.2.16.0
, vector ^>= 0.12.3.0
executable gargantext-phylo
import:
defaults
, optimized
main-is: Main.hs
other-modules:
Paths_gargantext
Common
hs-source-dirs:
bin/gargantext-phylo bin/gargantext-phylo/Phylo
default-extensions:
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GADTs
GeneralizedNewtypeDeriving
ImportQualifiedPost
MultiParamTypeClasses
NamedFieldPuns
NoImplicitPrelude
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
aeson ^>= 1.5.6.0
, async ^>= 2.2.4
, bytestring ^>= 0.10.12.0
, cassava ^>= 0.5.2.0
, containers ^>= 0.6.5.1
, cryptohash ^>= 0.11.9
, directory ^>= 1.3.6.0
, extra ^>= 1.7.9
, gargantext
, gargantext-prelude
, ini ^>= 0.4.1
, optparse-generic ^>= 1.4.7
, parallel ^>= 3.2.2.0
, split ^>= 0.2.3.4
, text ^>= 1.2.4.1
, time ^>= 1.9.3
, unordered-containers ^>= 0.2.16.0
, vector ^>= 0.7.3
...
...
@@ -823,7 +783,7 @@ test-suite garg-test-tasty
main-is: drivers/tasty/Main.hs
other-modules:
Test.API.Routes
Common
C
LI.Phylo.C
ommon
Test.API.Setup
Test.Core.Similarity
Test.Core.Text
...
...
@@ -859,7 +819,7 @@ test-suite garg-test-tasty
Test.Utils.Jobs
Paths_gargantext
hs-source-dirs:
test bin/gargantext-
phylo/Phylo
test bin/gargantext-
cli
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
build-depends:
QuickCheck ^>= 2.14.2
...
...
@@ -1042,9 +1002,9 @@ executable gargantext-phylo-profile
main-is: Main.hs
other-modules:
Paths_gargantext
Common
C
LI.Phylo.C
ommon
hs-source-dirs:
bin/gargantext-phylo-profile bin/gargantext-
phylo/Phylo
bin/gargantext-phylo-profile bin/gargantext-
cli
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
base
...
...
test/Test/Offline/Phylo.hs
View file @
1905b024
...
...
@@ -5,7 +5,7 @@
module
Test.Offline.Phylo
(
tests
)
where
import
Common
import
C
LI.Phylo.C
ommon
import
Data.Aeson
as
JSON
import
Data.Aeson.Types
qualified
as
JSON
import
Data.GraphViz.Attributes.Complete
qualified
as
Graphviz
...
...
Przemyslaw Kaminski
@cgenie
mentioned in commit
5660aec0
·
Oct 08, 2024
mentioned in commit
5660aec0
mentioned in commit 5660aec07ec5a0a0a5468f440092c1a8f57a864e
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment