Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
159
Issues
159
List
Board
Labels
Milestones
Merge Requests
7
Merge Requests
7
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
1905b024
Commit
1905b024
authored
Jul 01, 2024
by
Alfredo Di Napoli
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Port phylo to CLI executable
parent
409c8423
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
131 additions
and
172 deletions
+131
-172
Phylo.hs
bin/gargantext-cli/CLI/Phylo.hs
+104
-0
Common.hs
bin/gargantext-cli/CLI/Phylo/Common.hs
+5
-1
Types.hs
bin/gargantext-cli/CLI/Types.hs
+5
-0
Main.hs
bin/gargantext-cli/Main.hs
+5
-1
Main.hs
bin/gargantext-phylo-profile/Main.hs
+2
-3
Main.hs
bin/gargantext-phylo/Main.hs
+0
-117
gargantext.cabal
gargantext.cabal
+9
-49
Phylo.hs
test/Test/Offline/Phylo.hs
+1
-1
No files found.
bin/gargantext-cli/CLI/Phylo.hs
0 → 100644
View file @
1905b024
{-|
Module : Phylo.hs
Description : Gargantext starter binary with Adaptative Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Adaptative Phylo binaries
-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE Strict #-}
module
CLI.Phylo
where
import
CLI.Phylo.Common
import
CLI.Types
import
Data.Aeson
(
eitherDecodeFileStrict'
)
import
Data.List
(
nub
)
import
Data.Text
qualified
as
T
import
GHC.IO.Encoding
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.API.Tools
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloWithoutLink
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
)
import
Gargantext.Prelude
hiding
(
hash
,
replace
)
import
Options.Applicative
import
System.Directory
(
doesFileExist
)
phyloCLI
::
PhyloArgs
->
IO
()
phyloCLI
(
PhyloArgs
configPath
)
=
do
setLocaleEncoding
utf8
config_e
<-
eitherDecodeFileStrict'
configPath
case
config_e
of
Left
err
->
panicTrace
$
T
.
pack
err
Right
config
->
do
currentLocale
<-
getLocaleEncoding
printIOMsg
$
"Machine locale: "
<>
show
currentLocale
printIOMsg
"Starting the reconstruction"
printIOMsg
"Parse the corpus"
mapList
<-
fileToList
(
listParser
config
)
(
listPath
config
)
corpus
<-
if
(
defaultMode
config
)
then
fileToDocsDefault
(
corpusParser
config
)
(
corpusPath
config
)
[
Year
3
1
5
,
Month
3
1
5
,
Week
4
2
5
]
mapList
else
fileToDocsAdvanced
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOComment
(
show
(
length
$
nub
$
concat
$
map
text
corpus
)
<>
" Size ngs_coterms"
)
printIOComment
(
show
(
length
mapList
)
<>
" Size ngs_terms List Map Ngrams"
)
printIOMsg
"Reconstruct the phylo"
-- check the existing backup files
let
backupPhyloWithoutLink
=
(
outputPath
config
)
<>
"backupPhyloWithoutLink_"
<>
(
configToSha
BackupPhyloWithoutLink
config
)
<>
".json"
let
backupPhylo
=
(
outputPath
config
)
<>
"backupPhylo_"
<>
(
configToSha
BackupPhylo
config
)
<>
".json"
phyloWithoutLinkExists
<-
doesFileExist
backupPhyloWithoutLink
phyloExists
<-
doesFileExist
backupPhylo
-- reconstruct the phylo
phylo
<-
if
phyloExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file"
readPhylo
backupPhylo
else
do
if
phyloWithoutLinkExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file without links"
phyloWithoutLink
<-
readPhylo
backupPhyloWithoutLink
writePhylo
backupPhyloWithoutLink
phyloWithoutLink
pure
$
toPhylo
(
setConfig
config
phyloWithoutLink
)
else
do
printIOMsg
"Reconstruct the phylo from scratch"
phyloWithoutLink
<-
pure
$
toPhyloWithoutLink
corpus
config
writePhylo
backupPhyloWithoutLink
phyloWithoutLink
pure
$
toPhylo
(
setConfig
config
phyloWithoutLink
)
writePhylo
backupPhylo
phylo
printIOMsg
"End of reconstruction, start the export"
let
dot
=
toPhyloExport
(
setConfig
config
phylo
)
let
output
=
configToLabel
config
dotToFile
output
dot
phyloCmd
::
HasCallStack
=>
Mod
CommandFields
CLI
phyloCmd
=
command
"phylo"
(
info
(
helper
<*>
fmap
CLISub
phylo_p
)
(
progDesc
"Phylo toolkit."
))
phylo_p
::
Parser
CLICmd
phylo_p
=
fmap
CCMD_phylo
$
PhyloArgs
<$>
(
strOption
(
long
"config"
<>
metavar
"FILEPATH"
<>
help
"Path to a file containing a JSON to be parsed into a PhyloConfig"
)
)
bin/gargantext-
phylo
/Phylo/Common.hs
→
bin/gargantext-
cli/CLI
/Phylo/Common.hs
View file @
1905b024
{-# LANGUAGE OverloadedStrings #-}
module
Common
where
module
C
LI.Phylo.C
ommon
where
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Crypto.Hash.SHA256
(
hash
)
...
...
@@ -99,6 +99,8 @@ tsvToDocs parser patterns time path =
(
map
(
T
.
strip
.
pack
)
$
splitOn
";"
(
unpack
$
(
tsv'_source
row
)))
time
)
<$>
snd
<$>
Tsv
.
readWeightedTsv
path
Csv
_
->
panicTrace
"CSV is currently not supported."
Csv'
_
->
panicTrace
"CSV is currently not supported."
-- To parse a file into a list of Document
...
...
@@ -109,6 +111,8 @@ fileToDocsAdvanced parser path time lst = do
Wos
limit
->
wosToDocs
limit
patterns
time
path
Tsv
_
->
tsvToDocs
parser
patterns
time
path
Tsv'
_
->
tsvToDocs
parser
patterns
time
path
Csv
_
->
panicTrace
"CSV is currently not supported."
Csv'
_
->
panicTrace
"CSV is currently not supported."
fileToDocsDefault
::
CorpusParser
->
FilePath
->
[
TimeUnit
]
->
TermList
->
IO
[
Document
]
fileToDocsDefault
parser
path
timeUnits
lst
=
...
...
bin/gargantext-cli/CLI/Types.hs
View file @
1905b024
...
...
@@ -55,6 +55,10 @@ data InvitationsArgs = InvitationsArgs
,
inv_email
::
!
Text
}
deriving
(
Show
,
Eq
)
data
PhyloArgs
=
PhyloArgs
{
phylo_config
::
!
FilePath
}
deriving
(
Show
,
Eq
)
data
CLICmd
=
CCMD_clean_csv_corpus
|
CCMD_filter_terms_and_cooc
!
CorpusFile
!
TermListFile
!
OutputFile
...
...
@@ -63,6 +67,7 @@ data CLICmd
|
CCMD_import
!
ImportArgs
|
CCMD_init
!
InitArgs
|
CCMD_invitations
!
InvitationsArgs
|
CCMD_phylo
!
PhyloArgs
deriving
(
Show
,
Eq
)
data
CLI
=
...
...
bin/gargantext-cli/Main.hs
View file @
1905b024
...
...
@@ -27,6 +27,7 @@ import CLI.Admin (adminCLI, adminCmd)
import
CLI.Import
(
importCLI
,
importCmd
)
import
CLI.Init
(
initCLI
,
initCmd
)
import
CLI.Invitations
(
invitationsCLI
,
invitationsCmd
)
import
CLI.Phylo
(
phyloCLI
,
phyloCmd
)
runCLI
::
CLI
->
IO
()
runCLI
=
\
case
...
...
@@ -44,6 +45,8 @@ runCLI = \case
->
initCLI
args
CLISub
(
CCMD_invitations
args
)
->
invitationsCLI
args
CLISub
(
CCMD_phylo
args
)
->
phyloCLI
args
main
::
IO
()
main
=
runCLI
=<<
execParser
opts
...
...
@@ -60,5 +63,6 @@ allOptions = subparser (
adminCmd
<>
importCmd
<>
initCmd
<>
invitationsCmd
invitationsCmd
<>
phyloCmd
)
bin/gargantext-phylo-profile/Main.hs
View file @
1905b024
{-# LANGUAGE OverloadedStrings #-}
module
Main
where
import
Common
import
C
LI.Phylo.C
ommon
import
Data.Aeson
import
Data.List
(
nub
)
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.API.Tools
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloWithoutLink
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
,
toPeriods
,
getTimePeriod
,
getTimeStep
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
import
GHC.IO.Encoding
import
GHC.Stack
import
Paths_gargantext
import
Prelude
import
qualified
Data.Text
as
T
import
Shelly
import
System.Directory
...
...
bin/gargantext-phylo/Main.hs
deleted
100644 → 0
View file @
409c8423
{-|
Module : Main.hs
Description : Gargantext starter binary with Adaptative Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Adaptative Phylo binaries
-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE Strict #-}
module
Main
where
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Crypto.Hash.SHA256
(
hash
)
import
Data.Aeson
import
Data.ByteString.Char8
qualified
as
C8
import
Data.List
(
nub
,
isSuffixOf
,
tail
)
import
Data.List.Split
import
Data.Maybe
(
fromJust
)
import
Data.Text
(
unpack
,
replace
,
pack
)
import
Data.Text
qualified
as
T
import
Data.Vector
qualified
as
Vector
import
GHC.IO.Encoding
import
Gargantext.API.Ngrams.Prelude
(
toTermList
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
FileType
(
..
),
parseFile
)
import
Gargantext.Core.Text.Corpus.Parsers.TSV
(
tsv_title
,
tsv_abstract
,
tsv_publication_year
,
tsv_publication_month
,
tsv_publication_day
,
tsv'_source
,
tsv'_title
,
tsv'_abstract
,
tsv'_publication_year
,
tsv'_publication_month
,
tsv'_publication_day
,
tsv'_weight
)
import
Gargantext.Core.Text.Corpus.Parsers.TSV
qualified
as
Tsv
import
Gargantext.Core.Text.List.Formats.TSV
(
tsvMapTermList
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.API.Tools
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloWithoutLink
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
,
toPeriods
,
getTimePeriod
,
getTimeStep
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Prelude
hiding
(
hash
,
replace
)
import
Prelude
qualified
import
System.Directory
(
listDirectory
,
doesFileExist
)
import
Common
main
::
IO
()
main
=
do
setLocaleEncoding
utf8
currentLocale
<-
getLocaleEncoding
printIOMsg
$
"Machine locale: "
<>
show
currentLocale
printIOMsg
"Starting the reconstruction"
printIOMsg
"Read the configuration file"
[
args
]
<-
getArgs
jsonArgs
<-
(
eitherDecode
<$>
readJson
args
)
::
IO
(
Either
Prelude
.
String
PhyloConfig
)
case
jsonArgs
of
Left
err
->
putStrLn
err
Right
config
->
do
printIOMsg
"Parse the corpus"
mapList
<-
fileToList
(
listParser
config
)
(
listPath
config
)
corpus
<-
if
(
defaultMode
config
)
then
fileToDocsDefault
(
corpusParser
config
)
(
corpusPath
config
)
[
Year
3
1
5
,
Month
3
1
5
,
Week
4
2
5
]
mapList
else
fileToDocsAdvanced
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOComment
(
show
(
length
$
nub
$
concat
$
map
text
corpus
)
<>
" Size ngs_coterms"
)
printIOComment
(
show
(
length
mapList
)
<>
" Size ngs_terms List Map Ngrams"
)
printIOMsg
"Reconstruct the phylo"
-- check the existing backup files
let
backupPhyloWithoutLink
=
(
outputPath
config
)
<>
"backupPhyloWithoutLink_"
<>
(
configToSha
BackupPhyloWithoutLink
config
)
<>
".json"
let
backupPhylo
=
(
outputPath
config
)
<>
"backupPhylo_"
<>
(
configToSha
BackupPhylo
config
)
<>
".json"
phyloWithoutLinkExists
<-
doesFileExist
backupPhyloWithoutLink
phyloExists
<-
doesFileExist
backupPhylo
-- reconstruct the phylo
phylo
<-
if
phyloExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file"
readPhylo
backupPhylo
else
do
if
phyloWithoutLinkExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file without links"
phyloWithoutLink
<-
readPhylo
backupPhyloWithoutLink
writePhylo
backupPhyloWithoutLink
phyloWithoutLink
pure
$
toPhylo
(
setConfig
config
phyloWithoutLink
)
else
do
printIOMsg
"Reconstruct the phylo from scratch"
phyloWithoutLink
<-
pure
$
toPhyloWithoutLink
corpus
config
writePhylo
backupPhyloWithoutLink
phyloWithoutLink
pure
$
toPhylo
(
setConfig
config
phyloWithoutLink
)
writePhylo
backupPhylo
phylo
printIOMsg
"End of reconstruction, start the export"
let
dot
=
toPhyloExport
(
setConfig
config
phylo
)
let
output
=
configToLabel
config
dotToFile
output
dot
gargantext.cabal
View file @
1905b024
...
...
@@ -705,6 +705,8 @@ executable gargantext-cli
CLI.Init
CLI.Invitations
CLI.ObfuscateDB
CLI.Phylo
CLI.Phylo.Common
CLI.Types
CLI.Utils
Paths_gargantext
...
...
@@ -716,6 +718,8 @@ executable gargantext-cli
, bytestring ^>= 0.10.12.0
, cassava ^>= 0.5.2.0
, containers ^>= 0.6.5.1
, cryptohash ^>= 0.11.9
, directory ^>= 1.3.6.0
, extra ^>= 1.7.9
, full-text-search ^>= 0.2.1.4
, gargantext
...
...
@@ -723,55 +727,11 @@ executable gargantext-cli
, ini ^>= 0.4.1
, optparse-applicative
, optparse-generic ^>= 1.4.7
, parallel ^>= 3.2.2.0
, postgresql-simple ^>= 0.6.4
, protolude ^>= 0.3.3
, split ^>= 0.2.3.4
, text ^>= 1.2.4.1
, unordered-containers ^>= 0.2.16.0
, vector ^>= 0.12.3.0
executable gargantext-phylo
import:
defaults
, optimized
main-is: Main.hs
other-modules:
Paths_gargantext
Common
hs-source-dirs:
bin/gargantext-phylo bin/gargantext-phylo/Phylo
default-extensions:
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GADTs
GeneralizedNewtypeDeriving
ImportQualifiedPost
MultiParamTypeClasses
NamedFieldPuns
NoImplicitPrelude
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
aeson ^>= 1.5.6.0
, async ^>= 2.2.4
, bytestring ^>= 0.10.12.0
, cassava ^>= 0.5.2.0
, containers ^>= 0.6.5.1
, cryptohash ^>= 0.11.9
, directory ^>= 1.3.6.0
, extra ^>= 1.7.9
, gargantext
, gargantext-prelude
, ini ^>= 0.4.1
, optparse-generic ^>= 1.4.7
, parallel ^>= 3.2.2.0
, split ^>= 0.2.3.4
, text ^>= 1.2.4.1
, time ^>= 1.9.3
, unordered-containers ^>= 0.2.16.0
, vector ^>= 0.7.3
...
...
@@ -823,7 +783,7 @@ test-suite garg-test-tasty
main-is: drivers/tasty/Main.hs
other-modules:
Test.API.Routes
Common
C
LI.Phylo.C
ommon
Test.API.Setup
Test.Core.Similarity
Test.Core.Text
...
...
@@ -859,7 +819,7 @@ test-suite garg-test-tasty
Test.Utils.Jobs
Paths_gargantext
hs-source-dirs:
test bin/gargantext-
phylo/Phylo
test bin/gargantext-
cli
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
build-depends:
QuickCheck ^>= 2.14.2
...
...
@@ -1042,9 +1002,9 @@ executable gargantext-phylo-profile
main-is: Main.hs
other-modules:
Paths_gargantext
Common
C
LI.Phylo.C
ommon
hs-source-dirs:
bin/gargantext-phylo-profile bin/gargantext-
phylo/Phylo
bin/gargantext-phylo-profile bin/gargantext-
cli
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
base
...
...
test/Test/Offline/Phylo.hs
View file @
1905b024
...
...
@@ -5,7 +5,7 @@
module
Test.Offline.Phylo
(
tests
)
where
import
Common
import
C
LI.Phylo.C
ommon
import
Data.Aeson
as
JSON
import
Data.Aeson.Types
qualified
as
JSON
import
Data.GraphViz.Attributes.Complete
qualified
as
Graphviz
...
...
Przemyslaw Kaminski
@cgenie
mentioned in commit
5660aec0
·
Oct 08, 2024
mentioned in commit
5660aec0
mentioned in commit 5660aec07ec5a0a0a5468f440092c1a8f57a864e
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment