Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
85fcd70b
Commit
85fcd70b
authored
Feb 07, 2022
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT][PHYLO] preparing integration to backend
parent
92316028
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
444 additions
and
665 deletions
+444
-665
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+0
-359
Main.hs
bin/gargantext-phylo/Main.hs
+301
-161
package.yaml
package.yaml
+3
-3
MaxClique.hs
src/Gargantext/Core/Methods/Graph/MaxClique.hs
+1
-1
Phylo.hs
src/Gargantext/Core/Viz/Phylo.hs
+81
-64
PhyloExample.hs
src/Gargantext/Core/Viz/Phylo/PhyloExample.hs
+8
-11
PhyloExport.hs
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
+12
-15
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+16
-15
PhyloTools.hs
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
+8
-13
SynchronicClustering.hs
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
+9
-13
TemporalMatching.hs
src/Gargantext/Core/Viz/Phylo/TemporalMatching.hs
+5
-10
No files found.
bin/gargantext-adaptative-phylo/Main.hs
deleted
100644 → 0
View file @
92316028
{-|
Module : Main.hs
Description : Gargantext starter binary with Adaptative Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Adaptative Phylo binaries
-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE Strict #-}
module
Main
where
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Crypto.Hash.SHA256
(
hash
)
import
Data.Aeson
import
Data.Either
(
Either
(
..
))
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.Maybe
(
fromMaybe
)
import
Data.String
(
String
)
import
GHC.IO
(
FilePath
)
import
qualified
Prelude
as
Prelude
import
System.Environment
import
System.Directory
(
listDirectory
,
doesFileExist
)
import
Data.Text
(
Text
,
unwords
,
unpack
,
replace
,
pack
)
import
Data.Time.Calendar
(
fromGregorian
,
diffGregorianDurationClip
,
cdMonths
,
diffDays
,
showGregorian
)
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Text
as
T
import
Gargantext.Prelude
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
csv'_weight
)
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
parseFile
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Core.Viz.AdaptativePhylo
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloStep
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
)
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
-- import Gargantext.API.Ngrams.Prelude (toTermList)
-- import Debug.Trace (trace)
data
PhyloStage
=
PhyloWithCliques
|
PhyloWithLinks
deriving
(
Show
)
---------------
-- | Tools | --
---------------
-- | To get all the files in a directory or just a file
getFilesFromPath
::
FilePath
->
IO
([
FilePath
])
getFilesFromPath
path
=
do
if
(
isSuffixOf
"/"
path
)
then
(
listDirectory
path
)
else
return
[
path
]
---------------
-- | Dates | --
---------------
toMonths
::
Integer
->
Int
->
Int
->
Date
toMonths
y
m
d
=
fromIntegral
$
cdMonths
$
diffGregorianDurationClip
(
fromGregorian
y
m
d
)
(
fromGregorian
0000
0
0
)
toDays
::
Integer
->
Int
->
Int
->
Date
toDays
y
m
d
=
fromIntegral
$
diffDays
(
fromGregorian
y
m
d
)
(
fromGregorian
0000
0
0
)
toPhyloDate
::
Int
->
Int
->
Int
->
TimeUnit
->
Date
toPhyloDate
y
m
d
tu
=
case
tu
of
Year
_
_
_
->
y
Month
_
_
_
->
toMonths
(
Prelude
.
toInteger
y
)
m
d
Week
_
_
_
->
div
(
toDays
(
Prelude
.
toInteger
y
)
m
d
)
7
Day
_
_
_
->
toDays
(
Prelude
.
toInteger
y
)
m
d
toPhyloDate'
::
Int
->
Int
->
Int
->
Text
toPhyloDate'
y
m
d
=
pack
$
showGregorian
$
fromGregorian
(
Prelude
.
toInteger
y
)
m
d
--------------
-- | Json | --
--------------
-- | To read and decode a Json file
readJson
::
FilePath
->
IO
Lazy
.
ByteString
readJson
path
=
Lazy
.
readFile
path
----------------
-- | Parser | --
----------------
-- | To filter the Ngrams of a document based on the termList
termsInText
::
Patterns
->
Text
->
[
Text
]
termsInText
pats
txt
=
nub
$
concat
$
map
(
map
unwords
)
$
extractTermsWithList
pats
txt
-- | To transform a Wos file (or [file]) into a list of Docs
wosToDocs
::
Int
->
Patterns
->
TimeUnit
->
FilePath
->
IO
[
Document
]
wosToDocs
limit
patterns
time
path
=
do
files
<-
getFilesFromPath
path
let
parseFile'
file
=
do
eParsed
<-
parseFile
WOS
(
path
<>
file
)
case
eParsed
of
Right
ps
->
pure
ps
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
take
limit
<$>
map
(
\
d
->
let
title
=
fromJust
$
_hd_title
d
abstr
=
if
(
isJust
$
_hd_abstract
d
)
then
fromJust
$
_hd_abstract
d
else
""
in
Document
(
toPhyloDate
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
)
time
)
(
toPhyloDate'
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
))
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
<$>
concat
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
&&
(
isJust
$
_hd_title
d
))
<$>
parseFile'
file
)
files
-- To transform a Csv file into a list of Document
csvToDocs
::
CorpusParser
->
Patterns
->
TimeUnit
->
FilePath
->
IO
[
Document
]
csvToDocs
parser
patterns
time
path
=
case
parser
of
Wos
_
->
undefined
Csv
limit
->
do
eR
<-
Csv
.
readFile
path
case
eR
of
Right
r
->
pure
$
Vector
.
toList
$
Vector
.
take
limit
$
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
csv_publication_day
row
)
time
)
(
toPhyloDate'
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
csv_publication_day
row
))
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
[]
)
$
snd
r
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
Csv'
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
)
time
)
(
toPhyloDate'
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
))
(
termsInText
patterns
$
(
csv'_title
row
)
<>
" "
<>
(
csv'_abstract
row
))
(
Just
$
csv'_weight
row
)
[
csv'_source
row
]
)
<$>
snd
<$>
Csv
.
readWeightedCsv
path
-- To parse a file into a list of Document
fileToDocs'
::
CorpusParser
->
FilePath
->
TimeUnit
->
TermList
->
IO
[
Document
]
fileToDocs'
parser
path
time
lst
=
do
let
patterns
=
buildPatterns
lst
case
parser
of
Wos
limit
->
wosToDocs
limit
patterns
time
path
Csv
_
->
csvToDocs
parser
patterns
time
path
Csv'
_
->
csvToDocs
parser
patterns
time
path
---------------
-- | Label | --
---------------
-- Config time parameters to label
timeToLabel
::
Config
->
[
Char
]
timeToLabel
config
=
case
(
timeUnit
config
)
of
Year
p
s
f
->
(
"time_years"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Month
p
s
f
->
(
"time_months"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Week
p
s
f
->
(
"time_weeks"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Day
p
s
f
->
(
"time_days"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
seaToLabel
::
Config
->
[
Char
]
seaToLabel
config
=
case
(
seaElevation
config
)
of
Constante
start
step
->
(
"sea_cst_"
<>
(
show
start
)
<>
"_"
<>
(
show
step
))
Adaptative
granularity
->
(
"sea_adapt"
<>
(
show
granularity
))
sensToLabel
::
Config
->
[
Char
]
sensToLabel
config
=
case
(
phyloProximity
config
)
of
Hamming
->
undefined
WeightedLogJaccard
s
->
(
"WeightedLogJaccard_"
<>
show
s
)
WeightedLogSim
s
->
(
"WeightedLogSim-sens_"
<>
show
s
)
cliqueToLabel
::
Config
->
[
Char
]
cliqueToLabel
config
=
case
(
clique
config
)
of
Fis
s
s'
->
"fis_"
<>
(
show
s
)
<>
"_"
<>
(
show
s'
)
MaxClique
s
t
f
->
"clique_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
)
<>
"_"
<>
(
show
t
)
syncToLabel
::
Config
->
[
Char
]
syncToLabel
config
=
case
(
phyloSynchrony
config
)
of
ByProximityThreshold
scl
sync_sens
scope
_
->
(
"scale_"
<>
(
show
scope
)
<>
"_"
<>
(
show
sync_sens
)
<>
"_"
<>
(
show
scl
))
ByProximityDistribution
_
_
->
undefined
qualToConfig
::
Config
->
[
Char
]
qualToConfig
config
=
case
(
phyloQuality
config
)
of
Quality
g
m
->
"quality_"
<>
(
show
g
)
<>
"_"
<>
(
show
m
)
-- To set up the export file's label from the configuration
configToLabel
::
Config
->
[
Char
]
configToLabel
config
=
outputPath
config
<>
(
unpack
$
phyloName
config
)
<>
"-"
<>
(
timeToLabel
config
)
<>
"-scale_"
<>
(
show
(
phyloLevel
config
))
<>
"-"
<>
(
seaToLabel
config
)
<>
"-"
<>
(
sensToLabel
config
)
<>
"-"
<>
(
cliqueToLabel
config
)
<>
"-level_"
<>
(
show
(
_qua_granularity
$
phyloQuality
config
))
<>
"-"
<>
(
syncToLabel
config
)
<>
".dot"
-- To write a sha256 from a set of config's parameters
configToSha
::
PhyloStage
->
Config
->
[
Char
]
configToSha
stage
config
=
unpack
$
replace
"/"
"-"
$
T
.
pack
(
show
(
hash
$
C8
.
pack
label
))
where
label
::
[
Char
]
label
=
case
stage
of
PhyloWithCliques
->
(
corpusPath
config
)
<>
(
listPath
config
)
<>
(
timeToLabel
config
)
<>
(
cliqueToLabel
config
)
PhyloWithLinks
->
(
corpusPath
config
)
<>
(
listPath
config
)
<>
(
timeToLabel
config
)
<>
(
cliqueToLabel
config
)
<>
(
sensToLabel
config
)
<>
(
seaToLabel
config
)
<>
(
syncToLabel
config
)
<>
(
qualToConfig
config
)
<>
(
show
(
phyloLevel
config
))
writePhylo
::
[
Char
]
->
Phylo
->
IO
()
writePhylo
path
phylo
=
Lazy
.
writeFile
path
$
encode
phylo
readPhylo
::
[
Char
]
->
IO
Phylo
readPhylo
path
=
do
phyloJson
<-
(
eitherDecode
<$>
readJson
path
)
::
IO
(
Either
String
Phylo
)
case
phyloJson
of
Left
err
->
do
putStrLn
err
undefined
Right
phylo
->
pure
phylo
--------------
-- | Main | --
--------------
main
::
IO
()
main
=
do
printIOMsg
"Starting the reconstruction"
printIOMsg
"Read the configuration file"
[
args
]
<-
getArgs
jsonArgs
<-
(
eitherDecode
<$>
readJson
args
)
::
IO
(
Either
String
Config
)
case
jsonArgs
of
Left
err
->
putStrLn
err
Right
config
->
do
printIOMsg
"Parse the corpus"
mapList
<-
csvMapTermList
(
listPath
config
)
corpus
<-
fileToDocs'
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOMsg
"Reconstruct the phylo"
let
phyloWithCliquesFile
=
(
outputPath
config
)
<>
"phyloWithCliques_"
<>
(
configToSha
PhyloWithCliques
config
)
<>
".json"
let
phyloWithLinksFile
=
(
outputPath
config
)
<>
"phyloWithLinks_"
<>
(
configToSha
PhyloWithLinks
config
)
<>
".json"
phyloWithCliquesExists
<-
doesFileExist
phyloWithCliquesFile
phyloWithLinksExists
<-
doesFileExist
phyloWithLinksFile
-- phyloStep <- if phyloWithCliquesExists
-- then do
-- printIOMsg "Reconstruct the phylo step from an existing file"
-- readPhylo phyloWithCliquesFile
-- else do
-- printIOMsg "Reconstruct the phylo step from scratch"
-- pure $ toPhyloStep corpus mapList config
-- writePhylo phyloWithCliquesFile phyloStep
-- let phylo = toPhylo (setConfig config phyloStep)
phyloWithLinks
<-
if
phyloWithLinksExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file with intertemporal links"
readPhylo
phyloWithLinksFile
else
do
if
phyloWithCliquesExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file with cliques"
phyloWithCliques
<-
readPhylo
phyloWithCliquesFile
writePhylo
phyloWithCliquesFile
phyloWithCliques
pure
$
toPhylo
(
setConfig
config
phyloWithCliques
)
else
do
printIOMsg
"Reconstruct the phylo from scratch"
phyloWithCliques
<-
pure
$
toPhyloStep
corpus
mapList
config
writePhylo
phyloWithCliquesFile
phyloWithCliques
pure
$
toPhylo
(
setConfig
config
phyloWithCliques
)
writePhylo
phyloWithLinksFile
phyloWithLinks
-- probes
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_synchronic_distance_cumu_jaccard.txt")
-- $ synchronicDistance' phylo 1
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_inflexion_points.txt")
-- $ inflexionPoints phylo 1
printIOMsg
"End of reconstruction, start the export"
let
dot
=
toPhyloExport
(
setConfig
config
phyloWithLinks
)
let
output
=
configToLabel
config
dotToFile
output
dot
bin/gargantext-phylo/Main.hs
View file @
85fcd70b
{-|
Module : Main.hs
Description : Gargantext starter binary with Phylo
Description : Gargantext starter binary with
Adaptative
Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Phylo binaries
Adaptative Phylo binaries
-}
{-# LANGUAGE StandaloneDeriving #-}
...
...
@@ -17,153 +16,267 @@ Phylo binaries
module
Main
where
import
Control.Concurrent.Async
as
CCA
(
mapConcurrently
)
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Crypto.Hash.SHA256
(
hash
)
import
Data.Aeson
import
Data.
List
((
++
),
concat
)
import
Data.
Maybe
import
Data.
Text
(
Text
,
unwords
)
import
GHC.Generics
import
Data.
Either
(
Either
(
..
)
)
import
Data.
List
(
concat
,
nub
,
isSuffixOf
)
import
Data.
Maybe
(
fromMaybe
)
import
Data.String
(
String
)
import
GHC.IO
(
FilePath
)
import
System.Directory
(
doesFileExist
)
import
qualified
Prelude
as
Prelude
import
System.Environment
import
qualified
Data.ByteString.Lazy
as
L
import
qualified
Data.List
as
DL
import
qualified
Data.Map
as
DM
import
qualified
Data.Text
as
DT
import
qualified
Data.
Vector
as
DV
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
CSV
import
qualified
Prelude
as
P
import
Gargantext.Database.Admin.Types.Hyperdata
import
System.Directory
(
listDirectory
,
doesFileExist
)
import
Data.Text
(
Text
,
unwords
,
unpack
,
replace
,
pack
)
import
Data.Time.Calendar
(
fromGregorian
,
diffGregorianDurationClip
,
cdMonths
,
diffDays
,
showGregorian
)
import
qualified
Data.
ByteString.Char8
as
C8
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Text
as
T
import
Gargantext.Prelude
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
csv'_weight
)
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
parseFile
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Terms.WithList
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.LevelMaker
import
Gargantext.Core.Viz.Phylo.Tools
import
Gargantext.Core.Viz.Phylo.View.Export
import
Gargantext.Core.Viz.Phylo.View.ViewMaker
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloStep
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
)
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
-- import Gargantext.API.Ngrams.Prelude (toTermList)
-- import Debug.Trace (trace)
data
PhyloStage
=
PhyloWithCliques
|
PhyloWithLinks
deriving
(
Show
)
---------------
-- | Tools | --
---------------
-- | To get all the files in a directory or just a file
getFilesFromPath
::
FilePath
->
IO
([
FilePath
])
getFilesFromPath
path
=
do
if
(
isSuffixOf
"/"
path
)
then
(
listDirectory
path
)
else
return
[
path
]
---------------
-- | Dates | --
---------------
toMonths
::
Integer
->
Int
->
Int
->
Date
toMonths
y
m
d
=
fromIntegral
$
cdMonths
$
diffGregorianDurationClip
(
fromGregorian
y
m
d
)
(
fromGregorian
0000
0
0
)
toDays
::
Integer
->
Int
->
Int
->
Date
toDays
y
m
d
=
fromIntegral
$
diffDays
(
fromGregorian
y
m
d
)
(
fromGregorian
0000
0
0
)
toPhyloDate
::
Int
->
Int
->
Int
->
TimeUnit
->
Date
toPhyloDate
y
m
d
tu
=
case
tu
of
Year
_
_
_
->
y
Month
_
_
_
->
toMonths
(
Prelude
.
toInteger
y
)
m
d
Week
_
_
_
->
div
(
toDays
(
Prelude
.
toInteger
y
)
m
d
)
7
Day
_
_
_
->
toDays
(
Prelude
.
toInteger
y
)
m
d
-- Function to use in Database export
toPhyloDate'
::
Int
->
Int
->
Int
->
Text
toPhyloDate'
y
m
d
=
pack
$
showGregorian
$
fromGregorian
(
Prelude
.
toInteger
y
)
m
d
--------------
-- |
Conf
| --
-- |
Json
| --
--------------
type
ListPath
=
FilePath
type
FisPath
=
FilePath
type
CorpusPath
=
FilePath
data
CorpusType
=
Wos
|
Csv
deriving
(
Show
,
Generic
)
type
Limit
=
Int
data
Conf
=
Conf
{
corpusPath
::
CorpusPath
,
corpusType
::
CorpusType
,
listPath
::
ListPath
,
fisPath
::
FilePath
,
outputPath
::
FilePath
,
phyloName
::
Text
,
limit
::
Limit
,
timeGrain
::
Int
,
timeStep
::
Int
,
timeFrame
::
Int
,
timeFrameTh
::
Double
,
timeTh
::
Double
,
timeSens
::
Double
,
reBranchThr
::
Double
,
reBranchNth
::
Int
,
clusterTh
::
Double
,
clusterSens
::
Double
,
phyloLevel
::
Int
,
viewLevel
::
Int
,
fisSupport
::
Int
,
fisClique
::
Int
,
minSizeBranch
::
Int
}
deriving
(
Show
,
Generic
)
instance
FromJSON
Conf
instance
ToJSON
Conf
instance
FromJSON
CorpusType
instance
ToJSON
CorpusType
decoder
::
P
.
Either
a
b
->
b
decoder
(
P
.
Left
_
)
=
P
.
error
"Error"
decoder
(
P
.
Right
x
)
=
x
-- | Get the conf from a Json file
getJson
::
FilePath
->
IO
L
.
ByteString
getJson
path
=
L
.
readFile
path
-- | To read and decode a Json file
readJson
::
FilePath
->
IO
Lazy
.
ByteString
readJson
path
=
Lazy
.
readFile
path
----------------
-- | Parser | --
----------------
-- | To filter the Ngrams of a document based on the termList
termsInText
::
Patterns
->
Text
->
[
Text
]
termsInText
pats
txt
=
nub
$
concat
$
map
(
map
unwords
)
$
extractTermsWithList
pats
txt
-- | To transform a Wos file (or [file]) into a list of Docs
wosToDocs
::
Int
->
Patterns
->
TimeUnit
->
FilePath
->
IO
[
Document
]
wosToDocs
limit
patterns
time
path
=
do
files
<-
getFilesFromPath
path
let
parseFile'
file
=
do
eParsed
<-
parseFile
WOS
(
path
<>
file
)
case
eParsed
of
Right
ps
->
pure
ps
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
take
limit
<$>
map
(
\
d
->
let
title
=
fromJust
$
_hd_title
d
abstr
=
if
(
isJust
$
_hd_abstract
d
)
then
fromJust
$
_hd_abstract
d
else
""
in
Document
(
toPhyloDate
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
)
time
)
(
toPhyloDate'
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
))
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
<$>
concat
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
&&
(
isJust
$
_hd_title
d
))
<$>
parseFile'
file
)
files
-- To transform a Csv file into a list of Document
csvToDocs
::
CorpusParser
->
Patterns
->
TimeUnit
->
FilePath
->
IO
[
Document
]
csvToDocs
parser
patterns
time
path
=
case
parser
of
Wos
_
->
undefined
Csv
limit
->
do
eR
<-
Csv
.
readFile
path
case
eR
of
Right
r
->
pure
$
Vector
.
toList
$
Vector
.
take
limit
$
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
csv_publication_day
row
)
time
)
(
toPhyloDate'
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
csv_publication_day
row
))
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
[]
)
$
snd
r
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
Csv'
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
)
time
)
(
toPhyloDate'
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
))
(
termsInText
patterns
$
(
csv'_title
row
)
<>
" "
<>
(
csv'_abstract
row
))
(
Just
$
csv'_weight
row
)
[
csv'_source
row
]
)
<$>
snd
<$>
Csv
.
readWeightedCsv
path
-- To parse a file into a list of Document
fileToDocs'
::
CorpusParser
->
FilePath
->
TimeUnit
->
TermList
->
IO
[
Document
]
fileToDocs'
parser
path
time
lst
=
do
let
patterns
=
buildPatterns
lst
case
parser
of
Wos
limit
->
wosToDocs
limit
patterns
time
path
Csv
_
->
csvToDocs
parser
patterns
time
path
Csv'
_
->
csvToDocs
parser
patterns
time
path
---------------
-- |
Parse
| --
-- |
Label
| --
---------------
-- | To filter the Ngrams of a document based on the termList
filterTerms
::
Patterns
->
(
a
,
Text
)
->
(
a
,
[
Text
])
filterTerms
patterns
(
y
,
d
)
=
(
y
,
termsInText
patterns
d
)
-- | To transform a Csv nfile into a readable corpus
csvToCorpus
::
Limit
->
CorpusPath
->
IO
([(
Int
,
Text
)])
csvToCorpus
limit
csv
=
DV
.
toList
-- . DV.reverse
.
DV
.
take
limit
-- . DV.reverse
.
DV
.
map
(
\
n
->
(
csv_publication_year
n
,
(
csv_title
n
)
<>
" "
<>
(
csv_abstract
n
)))
.
snd
<$>
CSV
.
readFile
csv
-- | To transform a Wos nfile into a readable corpus
wosToCorpus
::
Limit
->
CorpusPath
->
IO
([(
Int
,
Text
)])
wosToCorpus
limit
path
=
DL
.
take
limit
.
map
(
\
d
->
((
fromJust
$
_hd_publication_year
d
)
,(
fromJust
$
_hd_title
d
)
<>
" "
<>
(
fromJust
$
_hd_abstract
d
)))
.
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
&&
(
isJust
$
_hd_title
d
)
&&
(
isJust
$
_hd_abstract
d
))
.
concat
<$>
mapConcurrently
(
\
idx
->
parseFile
WOS
(
path
<>
show
(
idx
)
<>
".txt"
))
[
1
..
20
]
-- | To use the correct parser given a CorpusType
fileToCorpus
::
CorpusType
->
Limit
->
CorpusPath
->
IO
([(
Int
,
Text
)])
fileToCorpus
format
limit
path
=
case
format
of
Wos
->
wosToCorpus
limit
path
Csv
->
csvToCorpus
limit
path
-- | To parse a file into a list of Document
parse
::
CorpusType
->
Limit
->
CorpusPath
->
TermList
->
IO
[
Document
]
parse
format
limit
path
l
=
do
corpus
<-
fileToCorpus
format
limit
path
let
patterns
=
buildPatterns
l
pure
$
map
(
(
\
(
y
,
t
)
->
Document
y
t
)
.
filterTerms
patterns
)
corpus
-- | To parse an existing Fis file
parseFis
::
FisPath
->
Text
->
Int
->
Int
->
Int
->
Int
->
IO
[
PhyloFis
]
parseFis
path
name
grain
step
support
clique
=
do
fisExists
<-
doesFileExist
(
path
<>
(
DT
.
unpack
name
)
<>
"_"
<>
show
(
grain
)
<>
"_"
<>
show
(
step
)
<>
"_"
<>
show
(
support
)
<>
"_"
<>
show
(
clique
)
<>
".json"
)
if
fisExists
then
do
fisJson
<-
(
eitherDecode
<$>
getJson
(
path
<>
(
DT
.
unpack
name
)
<>
"_"
<>
show
(
grain
)
<>
"_"
<>
show
(
step
)
<>
"_"
<>
show
(
support
)
<>
"_"
<>
show
(
clique
)
<>
".json"
))
::
IO
(
P
.
Either
P
.
String
[
PhyloFis
])
case
fisJson
of
P
.
Left
err
->
do
-- Config time parameters to label
timeToLabel
::
Config
->
[
Char
]
timeToLabel
config
=
case
(
timeUnit
config
)
of
Year
p
s
f
->
(
"time_years"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Month
p
s
f
->
(
"time_months"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Week
p
s
f
->
(
"time_weeks"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Day
p
s
f
->
(
"time_days"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
seaToLabel
::
Config
->
[
Char
]
seaToLabel
config
=
case
(
seaElevation
config
)
of
Constante
start
step
->
(
"sea_cst_"
<>
(
show
start
)
<>
"_"
<>
(
show
step
))
Adaptative
granularity
->
(
"sea_adapt"
<>
(
show
granularity
))
sensToLabel
::
Config
->
[
Char
]
sensToLabel
config
=
case
(
phyloProximity
config
)
of
Hamming
->
undefined
WeightedLogJaccard
s
->
(
"WeightedLogJaccard_"
<>
show
s
)
WeightedLogSim
s
->
(
"WeightedLogSim-sens_"
<>
show
s
)
cliqueToLabel
::
Config
->
[
Char
]
cliqueToLabel
config
=
case
(
clique
config
)
of
Fis
s
s'
->
"fis_"
<>
(
show
s
)
<>
"_"
<>
(
show
s'
)
MaxClique
s
t
f
->
"clique_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
)
<>
"_"
<>
(
show
t
)
syncToLabel
::
Config
->
[
Char
]
syncToLabel
config
=
case
(
phyloSynchrony
config
)
of
ByProximityThreshold
scl
sync_sens
scope
_
->
(
"scale_"
<>
(
show
scope
)
<>
"_"
<>
(
show
sync_sens
)
<>
"_"
<>
(
show
scl
))
ByProximityDistribution
_
_
->
undefined
qualToConfig
::
Config
->
[
Char
]
qualToConfig
config
=
case
(
phyloQuality
config
)
of
Quality
g
m
->
"quality_"
<>
(
show
g
)
<>
"_"
<>
(
show
m
)
-- To set up the export file's label from the configuration
configToLabel
::
Config
->
[
Char
]
configToLabel
config
=
outputPath
config
<>
(
unpack
$
phyloName
config
)
<>
"-"
<>
(
timeToLabel
config
)
<>
"-scale_"
<>
(
show
(
phyloLevel
config
))
<>
"-"
<>
(
seaToLabel
config
)
<>
"-"
<>
(
sensToLabel
config
)
<>
"-"
<>
(
cliqueToLabel
config
)
<>
"-level_"
<>
(
show
(
_qua_granularity
$
phyloQuality
config
))
<>
"-"
<>
(
syncToLabel
config
)
<>
".dot"
-- To write a sha256 from a set of config's parameters
configToSha
::
PhyloStage
->
Config
->
[
Char
]
configToSha
stage
config
=
unpack
$
replace
"/"
"-"
$
T
.
pack
(
show
(
hash
$
C8
.
pack
label
))
where
label
::
[
Char
]
label
=
case
stage
of
PhyloWithCliques
->
(
corpusPath
config
)
<>
(
listPath
config
)
<>
(
timeToLabel
config
)
<>
(
cliqueToLabel
config
)
PhyloWithLinks
->
(
corpusPath
config
)
<>
(
listPath
config
)
<>
(
timeToLabel
config
)
<>
(
cliqueToLabel
config
)
<>
(
sensToLabel
config
)
<>
(
seaToLabel
config
)
<>
(
syncToLabel
config
)
<>
(
qualToConfig
config
)
<>
(
show
(
phyloLevel
config
))
writePhylo
::
[
Char
]
->
Phylo
->
IO
()
writePhylo
path
phylo
=
Lazy
.
writeFile
path
$
encode
phylo
readPhylo
::
[
Char
]
->
IO
Phylo
readPhylo
path
=
do
phyloJson
<-
(
eitherDecode
<$>
readJson
path
)
::
IO
(
Either
String
Phylo
)
case
phyloJson
of
Left
err
->
do
putStrLn
err
pure
[]
P
.
Right
fis
->
pure
fis
else
pure
[]
undefined
Right
phylo
->
pure
phylo
writeFis
::
FisPath
->
Text
->
Int
->
Int
->
Int
->
Int
->
DM
.
Map
(
Date
,
Date
)
[
PhyloFis
]
->
IO
()
writeFis
path
name
grain
step
support
clique
fis
=
do
let
fisPath
=
path
<>
(
DT
.
unpack
name
)
<>
"_"
<>
show
(
grain
)
<>
"_"
<>
show
(
step
)
<>
"_"
<>
show
(
support
)
<>
"_"
<>
show
(
clique
)
<>
".json"
L
.
writeFile
fisPath
$
encode
(
DL
.
concat
$
DM
.
elems
fis
)
--------------
-- | Main | --
...
...
@@ -173,47 +286,74 @@ writeFis path name grain step support clique fis = do
main
::
IO
()
main
=
do
[
jsonPath
]
<-
getArgs
printIOMsg
"Starting the reconstruction"
confJson
<-
(
eitherDecode
<$>
getJson
jsonPath
)
::
IO
(
P
.
Either
P
.
String
Conf
)
printIOMsg
"Read the configuration file"
[
args
]
<-
getArgs
jsonArgs
<-
(
eitherDecode
<$>
readJson
args
)
::
IO
(
Either
String
Config
)
case
confJson
of
P
.
Left
err
->
putStrLn
err
P
.
Right
conf
->
do
case
jsonArgs
of
Left
err
->
putStrLn
err
Right
config
->
do
termList
<-
csvMapTermList
(
listPath
conf
)
printIOMsg
"Parse the corpus"
mapList
<-
csvMapTermList
(
listPath
config
)
corpus
<-
fileToDocs'
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
corpus
<-
parse
(
corpusType
conf
)
(
limit
conf
)
(
corpusPath
conf
)
termList
printIOMsg
"Reconstruct the phylo"
putStrLn
$
(
"
\n
"
<>
show
(
length
corpus
)
<>
" parsed docs"
)
let
phyloWithCliquesFile
=
(
outputPath
config
)
<>
"phyloWithCliques_"
<>
(
configToSha
PhyloWithCliques
config
)
<>
".json"
let
phyloWithLinksFile
=
(
outputPath
config
)
<>
"phyloWithLinks_"
<>
(
configToSha
PhyloWithLinks
config
)
<>
".json"
fis
<-
parseFis
(
fisPath
conf
)
(
phyloName
conf
)
(
timeGrain
conf
)
(
timeStep
conf
)
(
fisSupport
conf
)
(
fisClique
conf
)
phyloWithCliquesExists
<-
doesFileExist
phyloWithCliquesFile
phyloWithLinksExists
<-
doesFileExist
phyloWithLinksFile
putStrLn
$
(
"
\n
"
<>
show
(
length
fis
)
<>
" parsed fis"
)
-- phyloStep <- if phyloWithCliquesExists
-- then do
-- printIOMsg "Reconstruct the phylo step from an existing file"
-- readPhylo phyloWithCliquesFile
-- else do
-- printIOMsg "Reconstruct the phylo step from scratch"
-- pure $ toPhyloStep corpus mapList config
let
fis'
=
DM
.
fromListWith
(
++
)
$
DL
.
sortOn
(
fst
.
fst
)
$
map
(
\
f
->
(
getFisPeriod
f
,[
f
]))
fis
-- writePhylo phyloWithCliquesFile phyloStep
let
query
=
PhyloQueryBuild
(
phyloName
conf
)
""
(
timeGrain
conf
)
(
timeStep
conf
)
(
Fis
$
FisParams
True
(
fisSupport
conf
)
(
fisClique
conf
))
[]
[]
(
WeightedLogJaccard
$
WLJParams
(
timeTh
conf
)
(
timeSens
conf
))
(
timeFrame
conf
)
(
timeFrameTh
conf
)
(
reBranchThr
conf
)
(
reBranchNth
conf
)
(
phyloLevel
conf
)
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
(
clusterTh
conf
)
(
clusterSens
conf
))
-- let phylo = toPhylo (setConfig config phyloStep)
let
queryView
=
PhyloQueryView
(
viewLevel
conf
)
Merge
False
1
[
BranchAge
,
BranchBirth
,
BranchGroups
]
[
SizeBranch
$
SBParams
(
minSizeBranch
conf
)]
[
GroupLabelIncDyn
,
BranchPeakInc
]
(
Just
(
ByBranchBirth
,
Asc
))
Json
Flat
True
-- QL: 2 files read from disk
phyloWithLinks
<-
if
phyloWithLinksExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file with intertemporal links"
readPhylo
phyloWithLinksFile
else
do
if
phyloWithCliquesExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file with cliques"
phyloWithCliques
<-
readPhylo
phyloWithCliquesFile
writePhylo
phyloWithCliquesFile
phyloWithCliques
pure
$
toPhylo
(
setConfig
config
phyloWithCliques
)
else
do
printIOMsg
"Reconstruct the phylo from scratch"
phyloWithCliques
<-
pure
$
toPhyloStep
corpus
mapList
config
writePhylo
phyloWithCliquesFile
phyloWithCliques
pure
$
toPhylo
(
setConfig
config
phyloWithCliques
)
let
phylo
=
toPhylo
query
corpus
termList
fis'
writePhylo
phyloWithLinksFile
phyloWithLinks
writeFis
(
fisPath
conf
)
(
phyloName
conf
)
(
timeGrain
conf
)
(
timeStep
conf
)
(
fisSupport
conf
)
(
fisClique
conf
)
(
getPhyloFis
phylo
)
let
view
=
toPhyloView
queryView
phylo
-- probes
putStrLn
$
(
"phylo completed until level "
<>
show
(
phyloLevel
conf
)
<>
", export at level "
<>
show
(
viewLevel
conf
))
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_synchronic_distance_cumu_jaccard.txt")
-- $ synchronicDistance' phylo 1
let
outputFile
=
(
outputPath
conf
)
<>
(
DT
.
unpack
$
phyloName
conf
)
<>
"_"
<>
show
(
limit
conf
)
<>
"_"
<>
"_"
<>
show
(
timeTh
conf
)
<>
"_"
<>
"_"
<>
show
(
timeSens
conf
)
<>
"_"
<>
"_"
<>
show
(
clusterTh
conf
)
<>
"_"
<>
"_"
<>
show
(
clusterSens
conf
)
<>
".dot"
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_inflexion_points.txt")
-- $ inflexionPoints phylo 1
printIOMsg
"End of reconstruction, start the export"
let
dot
=
toPhyloExport
(
setConfig
config
phyloWithLinks
)
let
output
=
configToLabel
config
P
.
writeFile
outputFile
$
dotToString
$
viewToDot
view
dotToFile
output
dot
package.yaml
View file @
85fcd70b
...
...
@@ -100,7 +100,7 @@ library:
-
Gargantext.Core.Viz.Graph.Tools
-
Gargantext.Core.Viz.Graph.Tools.IGraph
-
Gargantext.Core.Viz.Graph.Index
-
Gargantext.Core.Viz.
Adaptative
Phylo
-
Gargantext.Core.Viz.Phylo
-
Gargantext.Core.Viz.Phylo.PhyloMaker
-
Gargantext.Core.Viz.Phylo.PhyloTools
-
Gargantext.Core.Viz.Phylo.PhyloExport
...
...
@@ -322,9 +322,9 @@ executables:
-
unordered-containers
-
full-text-search
gargantext-
adaptative-
phylo
:
gargantext-phylo
:
main
:
Main.hs
source-dirs
:
bin/gargantext-
adaptative-
phylo
source-dirs
:
bin/gargantext-phylo
ghc-options
:
-
-threaded
-
-rtsopts
...
...
src/Gargantext/Core/Methods/Graph/MaxClique.hs
View file @
85fcd70b
...
...
@@ -64,7 +64,7 @@ import Gargantext.Core.Viz.Graph.FGL (Graph_Undirected, degree, neighbors, mkGra
import
Gargantext.Core.Viz.Graph.Tools
(
cooc2graph'
,
cooc2graph''
,
Threshold
)
import
Gargantext.Core.Methods.Distances
(
Distance
)
import
Gargantext.Core.Viz.Graph.Index
(
createIndices
,
toIndex
)
import
Gargantext.Core.Viz.
Adaptative
Phylo
import
Gargantext.Core.Viz.Phylo
-- import Debug.Trace (trace)
type
Graph
=
Graph_Undirected
type
Neighbor
=
Node
...
...
src/Gargantext/Core/Viz/
Adaptative
Phylo.hs
→
src/Gargantext/Core/Viz/Phylo.hs
View file @
85fcd70b
...
...
@@ -24,31 +24,26 @@ one 8, e54847.
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Core.Viz.
Adaptative
Phylo
where
module
Gargantext.Core.Viz.Phylo
where
import
Control.DeepSeq
(
NFData
)
import
Control.Lens
(
makeLenses
)
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Map
(
Map
)
import
Data.Text
(
Text
,
pack
)
import
Data.Vector
(
Vector
)
import
Data.Map
(
Map
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Prelude
import
Gargantext.Core.Text.Context
(
TermList
)
import
GHC.Generics
import
GHC.IO
(
FilePath
)
import
Control.DeepSeq
(
NFData
)
import
Control.Lens
(
makeLenses
)
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Prelude
import
qualified
Data.Text.Lazy
as
TextLazy
----------------
-- | Config | --
----------------
data
CorpusParser
=
Wos
{
_wos_limit
::
Int
}
|
Csv
{
_csv_limit
::
Int
}
...
...
@@ -182,34 +177,49 @@ defaultConfig =
instance
FromJSON
Config
instance
ToJSON
Config
instance
FromJSON
CorpusParser
instance
ToJSON
CorpusParser
instance
FromJSON
Proximity
instance
ToJSON
Proximity
instance
FromJSON
SeaElevation
instance
ToJSON
SeaElevation
instance
FromJSON
TimeUnit
instance
ToJSON
TimeUnit
instance
FromJSON
CliqueFilter
instance
ToJSON
CliqueFilter
instance
FromJSON
Clique
instance
ToJSON
Clique
instance
FromJSON
PhyloLabel
instance
ToJSON
PhyloLabel
instance
FromJSON
Tagger
instance
ToJSON
Tagger
instance
FromJSON
Sort
instance
ToJSON
Sort
instance
FromJSON
Order
instance
ToJSON
Order
instance
FromJSON
Filter
instance
ToJSON
Filter
instance
FromJSON
SynchronyScope
instance
ToJSON
SynchronyScope
instance
FromJSON
SynchronyStrategy
instance
ToJSON
SynchronyStrategy
instance
FromJSON
Synchrony
instance
ToJSON
Synchrony
instance
FromJSON
Quality
instance
ToJSON
Quality
...
...
@@ -252,9 +262,10 @@ type Ngrams = Text
-- Document : a piece of Text linked to a Date
-- date = computational date; date' = original string date yyyy-mm-dd
-- Export Database to Document
data
Document
=
Document
{
date
::
Date
,
date'
::
Text
{
date
::
Date
-- datatype Date {unDate :: Int}
,
date'
::
Text
-- show date
,
text
::
[
Ngrams
]
,
weight
::
Maybe
Double
,
sources
::
[
Text
]
...
...
@@ -469,16 +480,22 @@ makeLenses ''PhyloBranch
instance
FromJSON
Phylo
instance
ToJSON
Phylo
instance
FromJSON
PhyloSources
instance
ToJSON
PhyloSources
instance
FromJSON
PhyloParam
instance
ToJSON
PhyloParam
instance
FromJSON
PhyloPeriod
instance
ToJSON
PhyloPeriod
instance
FromJSON
PhyloLevel
instance
ToJSON
PhyloLevel
instance
FromJSON
Software
instance
ToJSON
Software
instance
FromJSON
PhyloGroup
instance
ToJSON
PhyloGroup
...
...
src/Gargantext/Core/Viz/Phylo/PhyloExample.hs
View file @
85fcd70b
...
...
@@ -15,23 +15,20 @@ Portability : POSIX
module
Gargantext.Core.Viz.Phylo.PhyloExample
where
import
Control.Lens
import
Data.GraphViz.Types.Generalised
(
DotGraph
)
import
Data.List
(
sortOn
,
nub
,
sort
)
import
Data.Map
(
Map
)
import
Data.Text
(
Text
,
toLower
)
import
Gargantext.Prelude
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Terms.Mono
(
monoTexts
)
import
Gargantext.Core.Viz.AdaptativePhylo
import
Gargantext.Core.Viz.Phylo.PhyloTools
import
Gargantext.Core.Viz.Phylo.PhyloMaker
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.PhyloExport
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
adaptativeTemporalMatching
,
constanteTemporalMatching
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
import
Gargantext.Core.Viz.Phylo.PhyloTools
import
Gargantext.Core.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Control.Lens
import
Data.GraphViz.Types.Generalised
(
DotGraph
)
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
adaptativeTemporalMatching
,
constanteTemporalMatching
)
import
Gargantext.Prelude
import
qualified
Data.Vector
as
Vector
---------------------------------
...
...
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
View file @
85fcd70b
...
...
@@ -12,30 +12,27 @@ Portability : POSIX
module
Gargantext.Core.Viz.Phylo.PhyloExport
where
import
Data.Map
(
Map
,
fromList
,
empty
,
fromListWith
,
insert
,
(
!
),
elems
,
unionWith
,
findWithDefault
,
toList
,
member
)
import
Data.List
((
++
),
sort
,
nub
,
null
,
concat
,
sortOn
,
groupBy
,
union
,
(
\\
),
(
!!
),
init
,
partition
,
notElem
,
unwords
,
nubBy
,
inits
,
elemIndex
)
import
Data.Vector
(
Vector
)
import
Prelude
(
writeFile
)
import
Gargantext.Prelude
import
Gargantext.Core.Viz.AdaptativePhylo
import
Gargantext.Core.Viz.Phylo.PhyloTools
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
filterDocs
,
filterDiago
,
reduceDiagos
,
toProximity
,
getNextPeriods
)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Data.GraphViz
hiding
(
DotGraph
,
Order
)
import
Data.GraphViz.Types.Generalised
(
DotGraph
)
import
Data.GraphViz.Attributes.Complete
hiding
(
EdgeType
,
Order
)
import
Data.GraphViz.Types.Generalised
(
DotGraph
)
import
Data.GraphViz.Types.Monadic
import
Data.List
((
++
),
sort
,
nub
,
null
,
concat
,
sortOn
,
groupBy
,
union
,
(
\\
),
(
!!
),
init
,
partition
,
notElem
,
unwords
,
nubBy
,
inits
,
elemIndex
)
import
Data.Map
(
Map
,
fromList
,
empty
,
fromListWith
,
insert
,
(
!
),
elems
,
unionWith
,
findWithDefault
,
toList
,
member
)
import
Data.Text.Lazy
(
fromStrict
,
pack
,
unpack
)
import
System.FilePath
import
Data.Vector
(
Vector
)
import
Debug.Trace
(
trace
)
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.PhyloTools
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
filterDocs
,
filterDiago
,
reduceDiagos
,
toProximity
,
getNextPeriods
)
import
Gargantext.Prelude
import
Prelude
(
writeFile
)
import
System.FilePath
import
qualified
Data.GraphViz.Attributes.HTML
as
H
import
qualified
Data.Text
as
Text
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Text.Lazy
as
Lazy
import
qualified
Data.
GraphViz.Attributes.HTML
as
H
import
qualified
Data.
Vector
as
Vector
--------------------
-- | Dot export | --
...
...
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
85fcd70b
...
...
@@ -11,30 +11,29 @@ Portability : POSIX
module
Gargantext.Core.Viz.Phylo.PhyloMaker
where
import
Control.DeepSeq
(
NFData
)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Data.List
(
concat
,
nub
,
partition
,
sort
,
(
++
),
group
,
intersect
,
null
,
sortOn
,
groupBy
,
tail
)
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
,
empty
,
toList
,
elems
,
(
!
),
restrictKeys
,
foldlWithKey
,
insert
)
import
Data.Vector
(
Vector
)
import
Data.Text
(
Text
)
import
Data.Vector
(
Vector
)
import
Debug.Trace
(
trace
)
import
Gargantext.Prelude
import
Gargantext.Core.Viz.AdaptativePhylo
import
Gargantext.Core.Viz.Phylo.PhyloTools
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
adaptativeTemporalMatching
,
constanteTemporalMatching
,
getNextPeriods
,
filterDocs
,
filterDiago
,
reduceDiagos
,
toProximity
)
import
Gargantext.Core.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Gargantext.Core.Methods.Distances
(
Distance
(
Conditional
))
import
Gargantext.Core.Methods.Graph.MaxClique
(
getMaxCliques
)
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Metrics.FrequentItemSet
(
fisWithSizePolyMap
,
fisWithSizePolyMap'
,
Size
(
..
))
import
Gargantext.Core.Methods.Graph.MaxClique
(
getMaxCliques
)
import
Gargantext.Core.Methods.Distances
(
Distance
(
Conditional
))
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toHorizon
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
import
Gargantext.Core.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
adaptativeTemporalMatching
,
constanteTemporalMatching
,
getNextPeriods
,
filterDocs
,
filterDiago
,
reduceDiagos
,
toProximity
)
import
Gargantext.Prelude
import
Control.DeepSeq
(
NFData
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Debug.Trace
(
trace
)
import
Control.Lens
hiding
(
Level
)
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Set
as
Set
import
qualified
Data.Vector
as
Vector
------------------
-- | To Phylo | --
...
...
@@ -162,6 +161,7 @@ indexDates' m = map (\docs ->
-- To build the first phylo step from docs and terms
-- QL: backend entre phyloBase et phyloClique
toPhyloStep
::
[
Document
]
->
TermList
->
Config
->
Phylo
toPhyloStep
docs
lst
conf
=
case
(
getSeaElevation
phyloBase
)
of
Constante
_
_
->
appendGroups
cliqueToGroup
1
phyloClique
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
...
...
@@ -173,6 +173,7 @@ toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of
phyloClique
=
toPhyloClique
phyloBase
docs'
--------------------------------------
docs'
::
Map
(
Date
,
Date
)
[
Document
]
-- QL: Time Consuming here
docs'
=
groupDocsByPeriodRec
date
(
getPeriodIds
phyloBase
)
(
sortOn
date
docs
)
empty
--------------------------------------
phyloBase
::
Phylo
...
...
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
View file @
85fcd70b
...
...
@@ -12,28 +12,23 @@ Portability : POSIX
module
Gargantext.Core.Viz.Phylo.PhyloTools
where
import
Data.Vector
(
Vector
,
elemIndex
)
import
Control.Lens
hiding
(
Level
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
partition
,
tails
,
nubBy
,
group
)
import
Data.Set
(
Set
,
disjoint
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
,
restrictKeys
)
import
Data.Set
(
Set
,
disjoint
)
import
Data.String
(
String
)
import
Data.Text
(
Text
,
unpack
)
import
Prelude
(
floor
,
read
)
import
Data.Vector
(
Vector
,
elemIndex
)
import
Debug.Trace
(
trace
)
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Prelude
import
Gargantext.Core.Viz.AdaptativePhylo
import
Prelude
(
floor
,
read
)
import
Text.Printf
import
Debug.Trace
(
trace
)
import
Control.Lens
hiding
(
Level
)
import
qualified
Data.Vector
as
Vector
import
qualified
Data.List
as
List
import
qualified
Data.Set
as
Set
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Text
as
Text
import
qualified
Data.Vector
as
Vector
------------
-- | Io | --
...
...
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
View file @
85fcd70b
...
...
@@ -11,20 +11,17 @@ Portability : POSIX
module
Gargantext.Core.Viz.Phylo.SynchronicClustering
where
import
Gargantext.Prelude
import
Gargantext.Core.Viz.AdaptativePhylo
import
Gargantext.Core.Viz.Phylo.PhyloTools
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
weightedLogJaccard'
,
filterDiago
,
reduceDiagos
)
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
processDynamics
)
import
Data.List
((
++
),
null
,
intersect
,
nub
,
concat
,
sort
,
sortOn
,
groupBy
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
)
-- import Debug.Trace (trace)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Control.Monad
(
sequence
)
-- import Debug.Trace (trace)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Data.List
((
++
),
null
,
intersect
,
nub
,
concat
,
sort
,
sortOn
,
groupBy
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
)
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
processDynamics
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
weightedLogJaccard'
,
filterDiago
,
reduceDiagos
)
import
Gargantext.Prelude
import
qualified
Data.Map
as
Map
...
...
@@ -32,7 +29,6 @@ import qualified Data.Map as Map
-- | New Level Maker | --
-------------------------
mergeGroups
::
[
Cooc
]
->
PhyloGroupId
->
Map
PhyloGroupId
PhyloGroupId
->
[
PhyloGroup
]
->
PhyloGroup
mergeGroups
coocs
id
mapIds
childs
=
let
ngrams
=
(
sort
.
nub
.
concat
)
$
map
_phylo_groupNgrams
childs
...
...
src/Gargantext/Core/Viz/Phylo/TemporalMatching.hs
View file @
85fcd70b
...
...
@@ -11,26 +11,21 @@ Portability : POSIX
module
Gargantext.Core.Viz.Phylo.TemporalMatching
where
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
groupBy
,
scanl
,
nub
,
nubBy
,
union
,
dropWhile
,
partition
,
or
,
sort
,
(
!!
))
import
Data.Map
(
Map
,
fromList
,
elems
,
restrictKeys
,
unionWith
,
findWithDefault
,
keys
,
(
!
),
(
!?
),
filterWithKey
,
singleton
,
empty
,
mapKeys
,
adjust
)
import
Gargantext.Prelude
import
Gargantext.Core.Viz.AdaptativePhylo
import
Debug.Trace
(
trace
)
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.PhyloTools
import
Gargantext.Prelude
import
Prelude
(
floor
,
tan
,
pi
)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Debug.Trace
(
trace
)
import
Text.Printf
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Vector
as
Vector
-------------------
-- | Proximity | --
-------------------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment