Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
8790c9de
Commit
8790c9de
authored
Sep 21, 2022
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refactoring after code review
#1
parent
5cc28172
Pipeline
#3188
failed with stage
in 71 minutes and 47 seconds
Changes
11
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
326 additions
and
313 deletions
+326
-313
Main.hs
bin/gargantext-phylo/Main.hs
+31
-48
MaxClique.hs
src/Gargantext/Core/Methods/Graph/MaxClique.hs
+1
-1
Phylo.hs
src/Gargantext/Core/Viz/Phylo.hs
+71
-61
API.hs
src/Gargantext/Core/Viz/Phylo/API.hs
+2
-2
Tools.hs
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
+2
-2
Example.hs
src/Gargantext/Core/Viz/Phylo/Example.hs
+15
-15
PhyloExport.hs
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
+15
-14
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+68
-66
PhyloTools.hs
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
+45
-42
SynchronicClustering.hs
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
+18
-18
TemporalMatching.hs
src/Gargantext/Core/Viz/Phylo/TemporalMatching.hs
+58
-44
No files found.
bin/gargantext-phylo/Main.hs
View file @
8790c9de
...
...
@@ -38,7 +38,7 @@ import Gargantext.Core.Types.Main (ListType(..))
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.API.Tools
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhylo
Step
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhylo
WithoutLink
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
...
...
@@ -50,7 +50,7 @@ import qualified Data.Text as T
import
qualified
Data.Vector
as
Vector
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
data
PhyloStage
=
PhyloWithCliques
|
PhyloWithLinks
deriving
(
Show
)
data
Backup
=
BackupPhyloWithoutLink
|
BackupPhylo
deriving
(
Show
)
---------------
-- | Tools | --
...
...
@@ -179,7 +179,7 @@ configToLabel :: PhyloConfig -> [Char]
configToLabel
config
=
outputPath
config
<>
(
unpack
$
phyloName
config
)
<>
"-"
<>
(
timeToLabel
config
)
<>
"-scale_"
<>
(
show
(
phylo
Level
config
))
<>
"-scale_"
<>
(
show
(
phylo
Scale
config
))
<>
"-"
<>
(
seaToLabel
config
)
<>
"-"
<>
(
sensToLabel
config
)
<>
"-"
<>
(
cliqueToLabel
config
)
...
...
@@ -189,18 +189,18 @@ configToLabel config = outputPath config
-- To write a sha256 from a set of config's parameters
configToSha
::
PhyloStage
->
PhyloConfig
->
[
Char
]
configToSha
::
Backup
->
PhyloConfig
->
[
Char
]
configToSha
stage
config
=
unpack
$
replace
"/"
"-"
$
T
.
pack
(
show
(
hash
$
C8
.
pack
label
))
where
label
::
[
Char
]
label
=
case
stage
of
PhyloWithCliques
->
(
corpusPath
config
)
phyloWithoutLink
->
(
corpusPath
config
)
<>
(
listPath
config
)
<>
(
timeToLabel
config
)
<>
(
cliqueToLabel
config
)
PhyloWithLinks
->
(
corpusPath
config
)
phylo
->
(
corpusPath
config
)
<>
(
listPath
config
)
<>
(
timeToLabel
config
)
<>
(
cliqueToLabel
config
)
...
...
@@ -208,7 +208,7 @@ configToSha stage config = unpack
<>
(
seaToLabel
config
)
<>
(
syncToLabel
config
)
<>
(
qualToConfig
config
)
<>
(
show
(
phylo
Level
config
))
<>
(
show
(
phylo
Scale
config
))
readListV4
::
[
Char
]
->
IO
NgramsList
...
...
@@ -255,55 +255,38 @@ main = do
printIOMsg
"Reconstruct the phylo"
let
phyloWithCliquesFile
=
(
outputPath
config
)
<>
"phyloWithCliques_"
<>
(
configToSha
PhyloWithCliques
config
)
<>
".json"
let
phyloWithLinksFile
=
(
outputPath
config
)
<>
"phyloWithLinks_"
<>
(
configToSha
PhyloWithLinks
config
)
<>
".json"
-- check the existing backup files
phyloWithCliquesExists
<-
doesFileExist
phyloWithCliquesFile
phyloWithLinksExists
<-
doesFileExist
phyloWithLinksFile
let
backupPhyloWithoutLink
=
(
outputPath
config
)
<>
"backupPhyloWithoutLink_"
<>
(
configToSha
BackupPhyloWithoutLink
config
)
<>
".json"
let
backupPhylo
=
(
outputPath
config
)
<>
"backupPhylo_"
<>
(
configToSha
BackupPhylo
config
)
<>
".json"
-- phyloStep <- if phyloWithCliquesExists
-- then do
-- printIOMsg "Reconstruct the phylo step from an existing file"
-- readPhylo phyloWithCliquesFile
-- else do
-- printIOMsg "Reconstruct the phylo step from scratch"
-- pure $ toPhyloStep corpus mapList config
phyloWithoutLinkExists
<-
doesFileExist
backupPhyloWithoutLink
phyloExists
<-
doesFileExist
backupPhylo
--
writePhylo phyloWithCliquesFile phyloStep
--
reconstruct the phylo
-- let phylo = toPhylo (setConfig config phyloStep)
phylo
<-
if
phyloExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file"
readPhylo
backupPhylo
else
do
if
phyloWithoutLinkExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file without links"
phyloWithoutLink
<-
readPhylo
backupPhyloWithoutLink
writePhylo
backupPhyloWithoutLink
phyloWithoutLink
pure
$
toPhylo
(
setConfig
config
phyloWithoutLink
)
else
do
printIOMsg
"Reconstruct the phylo from scratch"
phyloWithoutLink
<-
pure
$
toPhyloWithoutLink
corpus
mapList
config
writePhylo
backupPhyloWithoutLink
phyloWithoutLink
pure
$
toPhylo
(
setConfig
config
phyloWithoutLink
)
phyloWithLinks
<-
if
phyloWithLinksExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file with intertemporal links"
readPhylo
phyloWithLinksFile
else
do
if
phyloWithCliquesExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file with cliques"
phyloWithCliques
<-
readPhylo
phyloWithCliquesFile
writePhylo
phyloWithCliquesFile
phyloWithCliques
pure
$
toPhylo
(
setConfig
config
phyloWithCliques
)
else
do
printIOMsg
"Reconstruct the phylo from scratch"
phyloWithCliques
<-
pure
$
toPhyloStep
corpus
mapList
config
writePhylo
phyloWithCliquesFile
phyloWithCliques
pure
$
toPhylo
(
setConfig
config
phyloWithCliques
)
writePhylo
phyloWithLinksFile
phyloWithLinks
-- probes
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_synchronic_distance_cumu_jaccard.txt")
-- $ synchronicDistance' phylo 1
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_inflexion_points.txt")
-- $ inflexionPoints phylo 1
writePhylo
backupPhylo
phylo
printIOMsg
"End of reconstruction, start the export"
let
dot
=
toPhyloExport
(
setConfig
config
phylo
WithLinks
)
let
dot
=
toPhyloExport
(
setConfig
config
phylo
)
let
output
=
configToLabel
config
...
...
src/Gargantext/Core/Methods/Graph/MaxClique.hs
View file @
8790c9de
...
...
@@ -72,7 +72,7 @@ type Neighbor = Node
-- | getMaxCliques
-- TODO chose distance order
getMaxCliques
::
Ord
a
=>
CliqueFilter
->
Distance
->
Threshold
->
Map
(
a
,
a
)
Int
->
[[
a
]]
getMaxCliques
::
Ord
a
=>
Max
CliqueFilter
->
Distance
->
Threshold
->
Map
(
a
,
a
)
Int
->
[[
a
]]
getMaxCliques
f
d
t
m
=
map
fromIndices
$
getMaxCliques'
t
m'
where
m'
=
toIndex
to
m
...
...
src/Gargantext/Core/Viz/Phylo.hs
View file @
8790c9de
...
...
@@ -150,24 +150,24 @@ instance ToSchema TimeUnit where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
data
CliqueFilter
=
ByThreshold
|
ByNeighbours
deriving
(
Show
,
Generic
,
Eq
)
data
Max
CliqueFilter
=
ByThreshold
|
ByNeighbours
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
CliqueFilter
where
instance
ToSchema
Max
CliqueFilter
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
data
Cl
ique
=
data
Cl
uster
=
Fis
{
_fis_support
::
Int
,
_fis_size
::
Int
}
|
MaxClique
{
_mcl_size
::
Int
,
_mcl_threshold
::
Double
,
_mcl_filter
::
CliqueFilter
}
,
_mcl_filter
::
Max
CliqueFilter
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
Cl
ique
where
instance
ToSchema
Cl
uster
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
...
...
@@ -187,14 +187,14 @@ data PhyloConfig =
,
corpusParser
::
CorpusParser
,
listParser
::
ListParser
,
phyloName
::
Text
,
phylo
Level
::
Int
,
phylo
Scale
::
Int
,
phyloProximity
::
Proximity
,
seaElevation
::
SeaElevation
,
findAncestors
::
Bool
,
phyloSynchrony
::
Synchrony
,
phyloQuality
::
Quality
,
timeUnit
::
TimeUnit
,
clique
::
Cl
ique
,
clique
::
Cl
uster
,
exportLabel
::
[
PhyloLabel
]
,
exportSort
::
Sort
,
exportFilter
::
[
Filter
]
...
...
@@ -207,7 +207,7 @@ data PhyloSubConfig =
,
_sc_phyloSynchrony
::
Double
,
_sc_phyloQuality
::
Double
,
_sc_timeUnit
::
TimeUnit
,
_sc_clique
::
Cl
ique
,
_sc_clique
::
Cl
uster
,
_sc_exportFilter
::
Double
}
deriving
(
Show
,
Generic
,
Eq
)
...
...
@@ -231,7 +231,7 @@ defaultConfig =
,
corpusParser
=
Csv
100000
,
listParser
=
V4
,
phyloName
=
pack
"Phylo Name"
,
phylo
Level
=
2
,
phylo
Scale
=
2
,
phyloProximity
=
WeightedLogJaccard
0.5
,
seaElevation
=
Constante
0.1
0.1
,
findAncestors
=
False
...
...
@@ -269,11 +269,11 @@ instance ToJSON SeaElevation
instance
FromJSON
TimeUnit
instance
ToJSON
TimeUnit
instance
FromJSON
CliqueFilter
instance
ToJSON
CliqueFilter
instance
FromJSON
Max
CliqueFilter
instance
ToJSON
Max
CliqueFilter
instance
FromJSON
Cl
ique
instance
ToJSON
Cl
ique
instance
FromJSON
Cl
uster
instance
ToJSON
Cl
uster
instance
FromJSON
PhyloLabel
instance
ToJSON
PhyloLabel
...
...
@@ -346,6 +346,9 @@ defaultPhyloParam =
-- | Date : a simple Integer
type
Date
=
Int
-- | DateStr : the string version of a Date
type
DateStr
=
Text
-- | Ngrams : a contiguous sequence of n terms
type
Ngrams
=
Text
...
...
@@ -354,7 +357,7 @@ type Ngrams = Text
-- Export Database to Document
data
Document
=
Document
{
date
::
Date
-- datatype Date {unDate :: Int}
,
date'
::
Text
-- show date
,
date'
::
DateStr
-- show date
,
text
::
[
Ngrams
]
,
weight
::
Maybe
Double
,
sources
::
[
Text
]
...
...
@@ -396,6 +399,12 @@ type Cooc = Map (Int,Int) Double
-- | Phylomemy | --
-------------------
-- | Period : a tuple of Dates
type
Period
=
(
Date
,
Date
)
-- | PeriodStr : a tuple of DateStr
type
PeriodStr
=
(
DateStr
,
DateStr
)
-- | Phylo datatype of a phylomemy
-- foundations : the foundations of the phylo
...
...
@@ -413,7 +422,8 @@ data Phylo =
,
_phylo_horizon
::
!
(
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
)
,
_phylo_groupsProxi
::
!
(
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
)
,
_phylo_param
::
PhyloParam
,
_phylo_periods
::
Map
PhyloPeriodId
PhyloPeriod
,
_phylo_periods
::
Map
Period
PhyloPeriod
,
_phylo_quality
::
Double
}
deriving
(
Generic
,
Show
,
Eq
)
...
...
@@ -421,57 +431,56 @@ instance ToSchema Phylo where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_phylo_"
)
-- | PhyloPeriodId : the id of a given period
type
PhyloPeriodId
=
(
Date
,
Date
)
----------------
-- | Period | --
----------------
-- | PhyloPeriod : steps of a phylomemy on a temporal axis
-- id: tuple (start date, end date) of the temporal step of the phylomemy
--
levels: levels of granularity
--
scales: scales of synchronic description
data
PhyloPeriod
=
PhyloPeriod
{
_phylo_periodPeriod
::
(
Date
,
Date
)
,
_phylo_periodPeriod
'
::
(
Text
,
Text
)
,
_phylo_period
Levels
::
Map
PhyloLevelId
PhyloLevel
PhyloPeriod
{
_phylo_periodPeriod
::
Period
,
_phylo_periodPeriod
Str
::
PeriodStr
,
_phylo_period
Scales
::
Map
PhyloScaleId
PhyloScale
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
PhyloPeriod
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_phylo_"
)
---------------
-- | Scale | --
---------------
-- | Scale : a scale of synchronic description
type
Scale
=
Int
-- | Level : a level of clustering
type
Level
=
Int
-- | PhyloLevelId : the id of a level of clustering in a given period
type
PhyloLevelId
=
(
PhyloPeriodId
,
Level
)
-- | PhyloScaleId : the id of a scale of synchronic description
type
PhyloScaleId
=
(
Period
,
Scale
)
-- | PhyloLevel : levels of phylomemy on a synchronic axis
-- Levels description:
-- Level 0: The foundations and the base of the phylo
-- Level 1: First level of clustering (the Fis)
-- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
data
PhyloLevel
=
PhyloLevel
{
_phylo_levelPeriod
::
(
Date
,
Date
)
,
_phylo_levelPeriod'
::
(
Text
,
Text
)
,
_phylo_levelLevel
::
Level
,
_phylo_levelGroups
::
Map
PhyloGroupId
PhyloGroup
-- | PhyloScale : sub-structure of the phylomemy in scale of synchronic description
data
PhyloScale
=
PhyloScale
{
_phylo_scalePeriod
::
Period
,
_phylo_scalePeriodStr
::
PeriodStr
,
_phylo_scaleScale
::
Scale
,
_phylo_scaleGroups
::
Map
PhyloGroupId
PhyloGroup
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
Phylo
Level
where
instance
ToSchema
Phylo
Scale
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_phylo_"
)
type
PhyloGroupId
=
(
Phylo
Level
Id
,
Int
)
type
PhyloGroupId
=
(
Phylo
Scale
Id
,
Int
)
-- | BranchId : (a
level
, a sequence of branch index)
-- | BranchId : (a
scale
, a sequence of branch index)
-- the sequence is a path of heritage from the most to the less specific branch
type
PhyloBranchId
=
(
Level
,
[
Int
])
type
PhyloBranchId
=
(
Scale
,
[
Int
])
-- | PhyloGroup : group of ngrams at each
level
and period
-- | PhyloGroup : group of ngrams at each
scale
and period
data
PhyloGroup
=
PhyloGroup
{
_phylo_groupPeriod
::
(
Date
,
Date
)
PhyloGroup
{
_phylo_groupPeriod
::
Period
,
_phylo_groupPeriod'
::
(
Text
,
Text
)
,
_phylo_group
Level
::
Level
,
_phylo_group
Scale
::
Scale
,
_phylo_groupIndex
::
Int
,
_phylo_groupLabel
::
Text
,
_phylo_groupSupport
::
Support
...
...
@@ -481,8 +490,8 @@ data PhyloGroup =
,
_phylo_groupCooc
::
!
(
Cooc
)
,
_phylo_groupBranchId
::
PhyloBranchId
,
_phylo_groupMeta
::
Map
Text
[
Double
]
,
_phylo_group
Level
Parents
::
[
Pointer
]
,
_phylo_group
Level
Childs
::
[
Pointer
]
,
_phylo_group
Scale
Parents
::
[
Pointer
]
,
_phylo_group
Scale
Childs
::
[
Pointer
]
,
_phylo_groupPeriodParents
::
[
Pointer
]
,
_phylo_groupPeriodChilds
::
[
Pointer
]
,
_phylo_groupAncestors
::
[
Pointer
]
...
...
@@ -505,22 +514,23 @@ type Pointer = (PhyloGroupId, Weight)
type
Pointer'
=
(
PhyloGroupId
,
(
Thr
,
Weight
))
data
Filiation
=
ToParents
|
ToChilds
|
ToParentsMemory
|
ToChildsMemory
deriving
(
Generic
,
Show
)
data
PointerType
=
TemporalPointer
|
Level
Pointer
deriving
(
Generic
,
Show
)
data
PointerType
=
TemporalPointer
|
Scale
Pointer
deriving
(
Generic
,
Show
)
----------------------
-- | Phylo Cl
ique
| --
----------------------
----------------------
----
-- | Phylo Cl
ustering
| --
----------------------
----
-- | Support : Number of Documents where a Cl
ique
occurs
-- | Support : Number of Documents where a Cl
uster
occurs
type
Support
=
Int
data
PhyloClique
=
PhyloClique
{
_phyloClique_nodes
::
[
Int
]
,
_phyloClique_support
::
Support
,
_phyloClique_period
::
(
Date
,
Date
)
,
_phyloClique_weight
::
Maybe
Double
,
_phyloClique_sources
::
[
Int
]
data
Clustering
=
Clustering
{
_clustering_roots
::
[
Int
]
,
_clustering_support
::
Support
,
_clustering_period
::
Period
-- additional materials for visualization
,
_clustering_visWeighting
::
Maybe
Double
,
_clustering_visFiltering
::
[
Int
]
}
deriving
(
Generic
,
NFData
,
Show
,
Eq
)
----------------
...
...
@@ -595,14 +605,14 @@ makeLenses ''PhyloSubConfig
makeLenses
''
P
roximity
makeLenses
''
S
eaElevation
makeLenses
''
Q
uality
makeLenses
''
C
l
ique
makeLenses
''
C
l
uster
makeLenses
''
P
hyloLabel
makeLenses
''
T
imeUnit
makeLenses
''
P
hyloFoundations
makeLenses
''
P
hyloClique
makeLenses
''
C
lustering
makeLenses
''
P
hylo
makeLenses
''
P
hyloPeriod
makeLenses
''
P
hylo
Level
makeLenses
''
P
hylo
Scale
makeLenses
''
P
hyloGroup
makeLenses
''
P
hyloParam
makeLenses
''
P
hyloExport
...
...
@@ -624,8 +634,8 @@ instance ToJSON PhyloParam
instance
FromJSON
PhyloPeriod
instance
ToJSON
PhyloPeriod
instance
FromJSON
Phylo
Level
instance
ToJSON
Phylo
Level
instance
FromJSON
Phylo
Scale
instance
ToJSON
Phylo
Scale
instance
FromJSON
Software
instance
ToJSON
Software
...
...
src/Gargantext/Core/Viz/Phylo/API.hs
View file @
8790c9de
...
...
@@ -26,7 +26,7 @@ import Gargantext.Core.Types (TODO(..))
import
Gargantext.Core.Viz.LegacyPhylo
import
Gargantext.Core.Viz.Phylo
(
defaultConfig
)
import
Gargantext.Core.Viz.Phylo.API.Tools
import
Gargantext.Core.Viz.Phylo.Example
(
phylo
Exampl
e
)
import
Gargantext.Core.Viz.Phylo.Example
(
phylo
Cleopatr
e
)
import
Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Node
-- (PhyloId, ListId, CorpusId, UserId, NodeId(..))
...
...
@@ -99,7 +99,7 @@ getPhylo phyloId _lId _level _minSizeBranch = do
getPhyloDataJson
::
PhyloId
->
GargNoServer
Value
getPhyloDataJson
phyloId
=
do
maybePhyloData
<-
getPhyloData
phyloId
let
phyloData
=
fromMaybe
phylo
Exampl
e
maybePhyloData
let
phyloData
=
fromMaybe
phylo
Cleopatr
e
maybePhyloData
phyloJson
<-
liftBase
$
phylo2dot2json
phyloData
pure
phyloJson
...
...
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
View file @
8790c9de
...
...
@@ -30,7 +30,7 @@ import Gargantext.Core.Types (Context)
import
Gargantext.Core.Types.Main
(
ListType
(
MapTerm
))
import
Gargantext.Core.Viz.Phylo
(
TimeUnit
(
..
),
Date
,
Document
(
..
),
PhyloConfig
(
..
),
Phylo
)
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhylo
Step
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhylo
WithoutLink
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
{-printIOMsg, printIOComment,-}
setConfig
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataPhylo
(
..
))
...
...
@@ -87,7 +87,7 @@ phylo2dot2json phylo = do
flowPhyloAPI
::
PhyloConfig
->
CorpusId
->
GargNoServer
Phylo
flowPhyloAPI
config
cId
=
do
(
mapList
,
corpus
)
<-
corpusIdtoDocuments
(
timeUnit
config
)
cId
phyloWithCliques
<-
pure
$
toPhylo
Step
corpus
mapList
config
phyloWithCliques
<-
pure
$
toPhylo
WithoutLink
corpus
mapList
config
-- writePhylo phyloWithCliquesFile phyloWithCliques
pure
$
toPhylo
(
setConfig
config
phyloWithCliques
)
...
...
src/Gargantext/Core/Viz/Phylo/Example.hs
View file @
8790c9de
...
...
@@ -39,27 +39,27 @@ phyloExport :: IO ()
phyloExport
=
dotToFile
"/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot"
phyloDot
phyloDot
::
DotGraph
DotId
phyloDot
=
toPhyloExport
phylo
Exampl
e
phyloDot
=
toPhyloExport
phylo
Cleopatr
e
--------------------------------------------------
-- | STEP 4 | -- Process the synchronic clustering
--------------------------------------------------
phylo
Exampl
e
::
Phylo
phylo
Example
=
synchronicClustering
$
toHorizon
phylo1
phylo
Cleopatr
e
::
Phylo
phylo
Cleopatre
=
synchronicClustering
$
toHorizon
flatPhylo
-----------------------------------------------
-- | STEP 3 | -- Build the Level 1 of the Phylo
-----------------------------------------------
phylo1
::
Phylo
phylo1
=
case
(
getSeaElevation
phyloBase
)
of
flatPhylo
::
Phylo
flatPhylo
=
case
(
getSeaElevation
emptyPhylo
)
of
Constante
s
g
->
constanteTemporalMatching
s
g
$
toGroupsProxi
1
$
appendGroups
cl
iqueToGroup
1
phyloClique
phyloBase
$
appendGroups
cl
usterToGroup
1
seriesOfClustering
emptyPhylo
Adaptative
s
->
adaptativeTemporalMatching
s
$
toGroupsProxi
1
$
appendGroups
cl
iqueToGroup
1
phyloClique
phyloBase
$
appendGroups
cl
usterToGroup
1
seriesOfClustering
emptyPhylo
---------------------------------------------
...
...
@@ -67,21 +67,21 @@ phylo1 = case (getSeaElevation phyloBase) of
---------------------------------------------
phyloClique
::
Map
(
Date
,
Date
)
[
PhyloClique
]
phyloClique
=
toPhyloClique
phyloBase
docsByPeriods
seriesOfClustering
::
Map
(
Date
,
Date
)
[
Clustering
]
seriesOfClustering
=
toSeriesOfClustering
emptyPhylo
docsByPeriods
docsByPeriods
::
Map
(
Date
,
Date
)
[
Document
]
docsByPeriods
=
groupDocsByPeriod
date
periods
docs
---------------------------------
-----------
-- | STEP 1 | -- Init the
Base of the
Phylo
---------------------------------
-----------
---------------------------------
-- | STEP 1 | -- Init the Phylo
---------------------------------
phyloBase
::
Phylo
phyloBase
=
toPhyloBase
docs
mapList
config
emptyPhylo
::
Phylo
emptyPhylo
=
initPhylo
docs
mapList
config
phyloCooc
::
Map
Date
Cooc
...
...
@@ -101,7 +101,7 @@ nbDocsByYear = docsToTimeScaleNb docs
config
::
PhyloConfig
config
=
defaultConfig
{
phyloName
=
"Cesar et Cleopatre"
,
phylo
Level
=
2
,
phylo
Scale
=
2
,
exportFilter
=
[
ByBranchSize
0
]
,
clique
=
MaxClique
0
15
ByNeighbours
}
...
...
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
View file @
8790c9de
...
...
@@ -26,7 +26,7 @@ import Debug.Trace (trace)
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.PhyloTools
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
filterDocs
,
filterDiago
,
reduceDiagos
,
toProximity
,
getNextPeriods
)
import
Gargantext.Prelude
import
Gargantext.Prelude
hiding
(
scale
)
import
Prelude
(
writeFile
)
import
System.FilePath
import
qualified
Data.GraphViz.Attributes.HTML
as
H
...
...
@@ -73,7 +73,7 @@ groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show
branchIdToDotId
::
PhyloBranchId
->
DotId
branchIdToDotId
bId
=
(
fromStrict
.
Text
.
pack
)
$
(
"branch"
<>
show
(
snd
bId
))
periodIdToDotId
::
P
hyloPeriodI
d
->
DotId
periodIdToDotId
::
P
erio
d
->
DotId
periodIdToDotId
prd
=
(
fromStrict
.
Text
.
pack
)
$
(
"period"
<>
show
(
fst
prd
)
<>
show
(
snd
prd
))
groupToTable
::
Vector
Ngrams
->
PhyloGroup
->
H
.
Label
...
...
@@ -220,7 +220,8 @@ exportToDot phylo export =
,(
toAttr
(
fromStrict
"phyloGroups"
)
$
pack
$
show
(
length
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"phyloSources"
)
$
pack
$
show
(
Vector
.
toList
$
getSources
phylo
))
,(
toAttr
(
fromStrict
"phyloTimeScale"
)
$
pack
$
getTimeScale
phylo
)
,(
toAttr
(
fromStrict
"phyloLevel"
)
$
pack
$
show
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"PhyloScale"
)
$
pack
$
show
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"phyloQuality"
)
$
pack
$
show
(
phylo
^.
phylo_quality
))
,(
toAttr
(
fromStrict
"phyloSeaRiseStart"
)
$
pack
$
show
(
_cons_start
$
getSeaElevation
phylo
))
,(
toAttr
(
fromStrict
"phyloSeaRiseSteps"
)
$
pack
$
show
(
_cons_step
$
getSeaElevation
phylo
))
-- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
...
...
@@ -249,7 +250,7 @@ exportToDot phylo export =
_
<-
mapM
(
\
period
->
subgraph
((
Str
.
fromStrict
.
Text
.
pack
)
$
(
"Period"
<>
show
(
fst
$
_phylo_periodPeriod
period
)
<>
show
(
snd
$
_phylo_periodPeriod
period
)))
$
do
graphAttrs
[
Rank
SameRank
]
periodToDotNode
(
period
^.
phylo_periodPeriod
)
(
period
^.
phylo_periodPeriod
'
)
periodToDotNode
(
period
^.
phylo_periodPeriod
)
(
period
^.
phylo_periodPeriod
Str
)
{-- 6) create a node for each group -}
mapM
(
\
g
->
groupToDotNode
(
getRoots
phylo
)
g
(
toBid
g
(
export
^.
export_branches
)))
(
filter
(
\
g
->
g
^.
phylo_groupPeriod
==
(
period
^.
phylo_periodPeriod
))
$
export
^.
export_groups
)
...
...
@@ -615,28 +616,28 @@ headsToAncestors nbDocs diago proximity step heads acc =
toHorizon
::
Phylo
->
Phylo
toHorizon
phylo
=
let
phyloAncestor
=
updatePhyloGroups
level
scale
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
concat
$
tracePhyloAncestors
newGroups
)
phylo
reBranched
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
concat
$
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
getGroupsFromLevel
level
phyloAncestor
in
updatePhyloGroups
level
reBranched
phylo
$
groupsToBranches
'
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
getGroupsFromLevel
scale
phyloAncestor
in
updatePhyloGroups
scale
reBranched
phylo
where
-- | 1) for each periods
periods
::
[
P
hyloPeriodI
d
]
periods
::
[
P
erio
d
]
periods
=
getPeriodIds
phylo
-- --
level
::
Level
level
=
getLastLevel
phylo
scale
::
Scale
scale
=
getLastLevel
phylo
-- --
frame
::
Int
frame
=
getTimeFrame
$
timeUnit
$
getConfig
phylo
-- | 2) find ancestors between groups without parents
mapGroups
::
[[
PhyloGroup
]]
mapGroups
=
map
(
\
prd
->
let
groups
=
getGroupsFromLevelPeriods
level
[
prd
]
phylo
childs
=
getPreviousChildIds
level
frame
prd
periods
phylo
let
groups
=
getGroupsFromLevelPeriods
scale
[
prd
]
phylo
childs
=
getPreviousChildIds
scale
frame
prd
periods
phylo
-- maybe add a better filter for non isolated ancestors
heads
=
filter
(
\
g
->
(
not
.
null
)
$
(
g
^.
phylo_groupPeriodChilds
))
$
filter
(
\
g
->
null
(
g
^.
phylo_groupPeriodParents
)
&&
(
notElem
(
getGroupId
g
)
childs
))
groups
...
...
@@ -656,7 +657,7 @@ toHorizon phylo =
newGroups
=
mapGroups
`
using
`
parList
rdeepseq
--------------------------------------
getPreviousChildIds
::
Level
->
Int
->
PhyloPeriodId
->
[
PhyloPeriodI
d
]
->
Phylo
->
[
PhyloGroupId
]
getPreviousChildIds
::
Scale
->
Int
->
Period
->
[
Perio
d
]
->
Phylo
->
[
PhyloGroupId
]
getPreviousChildIds
lvl
frame
curr
prds
phylo
=
concat
$
map
((
map
fst
)
.
_phylo_groupPeriodChilds
)
$
getGroupsFromLevelPeriods
lvl
(
getNextPeriods
ToParents
frame
curr
prds
)
phylo
...
...
@@ -695,7 +696,7 @@ toPhyloExport phylo = exportToDot phylo
groups
::
[
PhyloGroup
]
groups
=
traceExportGroups
$
processDynamics
$
getGroupsFromLevel
(
phylo
Level
$
getConfig
phylo
)
$
getGroupsFromLevel
(
phylo
Scale
$
getConfig
phylo
)
$
tracePhyloInfo
phylo
-- \$ toHorizon phylo
...
...
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
8790c9de
...
...
@@ -42,7 +42,7 @@ import qualified Data.Vector as Vector
{-
-- TODO AD
data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo}
| PhyloN { _phylo'_
phylo1
:: Phylo}
| PhyloN { _phylo'_
flatPhylo
:: Phylo}
toPhylo' :: Phylo' -> [Document] -> TermList -> PhyloConfig -> Phylo
...
...
@@ -50,23 +50,22 @@ toPhylo' (PhyloN phylo) = toPhylo'
toPhylo' (PhyloBase phylo) = toPhylo
-}
toPhylo
::
Phylo
->
Phylo
toPhylo
phylo
Step
=
trace
(
"# phylo1 groups "
<>
show
(
length
$
getGroupsFromLevel
1
phylo1
))
$
traceToPhylo
(
phylo
Level
$
getConfig
phyloStep
)
$
if
(
phylo
Level
$
getConfig
phyloStep
)
>
1
then
foldl'
(
\
phylo'
_
->
synchronicClustering
phylo'
)
phyloAncestors
[
2
..
(
phylo
Level
$
getConfig
phyloStep
)]
else
phylo1
toPhylo
phylo
withoutLink
=
trace
(
"# flatPhylo groups "
<>
show
(
length
$
getGroupsFromLevel
1
flatPhylo
))
$
traceToPhylo
(
phylo
Scale
$
getConfig
phylowithoutLink
)
$
if
(
phylo
Scale
$
getConfig
phylowithoutLink
)
>
1
then
foldl'
(
\
phylo'
_
->
synchronicClustering
phylo'
)
phyloAncestors
[
2
..
(
phylo
Scale
$
getConfig
phylowithoutLink
)]
else
flatPhylo
where
--------------------------------------
phyloAncestors
::
Phylo
phyloAncestors
=
if
(
findAncestors
$
getConfig
phylo
Step
)
then
toHorizon
phylo1
else
phylo1
if
(
findAncestors
$
getConfig
phylo
withoutLink
)
then
toHorizon
flatPhylo
else
flatPhylo
--------------------------------------
phylo1
::
Phylo
phylo1
=
toPhylo1
phyloStep
flatPhylo
::
Phylo
flatPhylo
=
addTemporalLinksToPhylo
phylowithoutLink
--------------------------------------
...
...
@@ -74,16 +73,16 @@ toPhylo phyloStep = trace ("# phylo1 groups " <> show(length $ getGroupsFromLeve
-- | To Phylo 1 | --
--------------------
toGroupsProxi
::
Level
->
Phylo
->
Phylo
toGroupsProxi
::
Scale
->
Phylo
->
Phylo
toGroupsProxi
lvl
phylo
=
let
proximity
=
phyloProximity
$
getConfig
phylo
groupsProxi
=
foldlWithKey
(
\
acc
pId
pds
->
-- 1) process period by period
let
egos
=
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
elems
$
view
(
phylo_period
Level
s
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_
levelLevel
==
lvl
)
.
phylo_
level
Groups
)
pds
$
view
(
phylo_period
Scale
s
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_
scaleScale
==
lvl
)
.
phylo_
scale
Groups
)
pds
next
=
getNextPeriods
ToParents
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
pId
(
keys
$
phylo
^.
phylo_periods
)
targets
=
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
getGroupsFromLevelPeriods
lvl
next
phylo
docs
=
filterDocs
(
phylo
^.
phylo_timeDocs
)
([
pId
]
++
next
)
...
...
@@ -102,19 +101,19 @@ toGroupsProxi lvl phylo =
in
phylo
&
phylo_groupsProxi
.~
((
traceGroupsProxi
.
fromList
)
groupsProxi
)
appendGroups
::
(
a
->
P
hyloPeriodId
->
(
Text
,
Text
)
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
)
->
Level
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
::
(
a
->
P
eriod
->
(
Text
,
Text
)
->
Scale
->
Int
->
[
Cooc
]
->
PhyloGroup
)
->
Scale
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
f
lvl
m
phylo
=
trace
(
"
\n
"
<>
"-- | Append "
<>
show
(
length
$
concat
$
elems
m
)
<>
" groups to Level "
<>
show
(
lvl
)
<>
"
\n
"
)
$
over
(
phylo_periods
.
traverse
.
phylo_period
Level
s
.
phylo_period
Scale
s
.
traverse
)
(
\
phyloLvl
->
if
lvl
==
(
phyloLvl
^.
phylo_
levelLevel
)
(
\
phyloLvl
->
if
lvl
==
(
phyloLvl
^.
phylo_
scaleScale
)
then
let
pId
=
phyloLvl
^.
phylo_
level
Period
pId'
=
phyloLvl
^.
phylo_
levelPeriod'
let
pId
=
phyloLvl
^.
phylo_
scale
Period
pId'
=
phyloLvl
^.
phylo_
scalePeriodStr
phyloCUnit
=
m
!
pId
in
phyloLvl
&
phylo_
level
Groups
.~
(
fromList
$
foldl
(
\
groups
obj
->
&
phylo_
scale
Groups
.~
(
fromList
$
foldl
(
\
groups
obj
->
groups
++
[
(((
pId
,
lvl
),
length
groups
)
,
f
obj
pId
pId'
lvl
(
length
groups
)
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[
pId
]))
...
...
@@ -124,22 +123,22 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
phylo
cl
iqueToGroup
::
PhyloClique
->
PhyloPeriodId
->
(
Text
,
Text
)
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
cl
ique
ToGroup
fis
pId
pId'
lvl
idx
coocs
=
PhyloGroup
pId
pId'
lvl
idx
""
(
fis
^.
phyloClique_support
)
(
fis
^.
phyloClique_weight
)
(
fis
^.
phyloClique_sources
)
(
fis
^.
phyloClique_node
s
)
(
ngramsToCooc
(
fis
^.
phyloClique_node
s
)
coocs
)
cl
usterToGroup
::
Clustering
->
Period
->
(
Text
,
Text
)
->
Scale
->
Int
->
[
Cooc
]
->
PhyloGroup
cl
uster
ToGroup
fis
pId
pId'
lvl
idx
coocs
=
PhyloGroup
pId
pId'
lvl
idx
""
(
fis
^.
clustering_support
)
(
fis
^.
clustering_visWeighting
)
(
fis
^.
clustering_visFiltering
)
(
fis
^.
clustering_root
s
)
(
ngramsToCooc
(
fis
^.
clustering_root
s
)
coocs
)
(
1
,[
0
])
-- branchid (lvl,[path in the branching tree])
(
fromList
[(
"breaks"
,[
0
]),(
"seaLevels"
,[
0
])])
[]
[]
[]
[]
[]
[]
[]
toPhylo1
::
Phylo
->
Phylo
toPhylo1
phyloStep
=
case
(
getSeaElevation
phyloStep
)
of
Constante
start
gap
->
constanteTemporalMatching
start
gap
phylo
Step
Adaptative
steps
->
adaptativeTemporalMatching
steps
phylo
Step
addTemporalLinksToPhylo
::
Phylo
->
Phylo
addTemporalLinksToPhylo
phylowithoutLink
=
case
(
getSeaElevation
phylowithoutLink
)
of
Constante
start
gap
->
constanteTemporalMatching
start
gap
phylo
withoutLink
Adaptative
steps
->
adaptativeTemporalMatching
steps
phylo
withoutLink
-----------------------
-- | To Phylo Step | --
...
...
@@ -159,23 +158,24 @@ indexDates' m = map (\docs ->
-- To build the first phylo step from docs and terms
-- QL: backend entre phyloBase et phyloClique
toPhyloStep
::
[
Document
]
->
TermList
->
PhyloConfig
->
Phylo
toPhyloStep
docs
lst
conf
=
case
(
getSeaElevation
phyloBase
)
of
Constante
_
_
->
appendGroups
cliqueToGroup
1
phyloClique
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
-- QL: backend entre phyloBase et Clustering
-- tophylowithoutLink
toPhyloWithoutLink
::
[
Document
]
->
TermList
->
PhyloConfig
->
Phylo
toPhyloWithoutLink
docs
lst
conf
=
case
(
getSeaElevation
phyloBase
)
of
Constante
_
_
->
appendGroups
clusterToGroup
1
seriesOfClustering
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
Adaptative
_
->
toGroupsProxi
1
$
appendGroups
cl
iqueToGroup
1
phyloClique
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
$
appendGroups
cl
usterToGroup
1
seriesOfClustering
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
where
--------------------------------------
phyloClique
::
Map
(
Date
,
Date
)
[
PhyloClique
]
phyloClique
=
toPhyloClique
phyloBase
docs'
seriesOfClustering
::
Map
(
Date
,
Date
)
[
Clustering
]
seriesOfClustering
=
toSeriesOfClustering
phyloBase
docs'
--------------------------------------
docs'
::
Map
(
Date
,
Date
)
[
Document
]
-- QL: Time Consuming here
docs'
=
groupDocsByPeriodRec
date
(
getPeriodIds
phyloBase
)
(
sortOn
date
docs
)
empty
--------------------------------------
phyloBase
::
Phylo
phyloBase
=
toPhyloBase
docs
lst
conf
phyloBase
=
initPhylo
docs
lst
conf
--------------------------------------
---------------------------
...
...
@@ -184,30 +184,30 @@ toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of
-- To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterClique
::
Bool
->
Int
->
(
Int
->
[
PhyloClique
]
->
[
PhyloClique
])
->
Map
(
Date
,
Date
)
[
PhyloClique
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
filterClique
::
Bool
->
Int
->
(
Int
->
[
Clustering
]
->
[
Clustering
])
->
Map
(
Date
,
Date
)
[
Clustering
]
->
Map
(
Date
,
Date
)
[
Clustering
]
filterClique
keep
thr
f
m
=
case
keep
of
False
->
map
(
\
l
->
f
thr
l
)
m
True
->
map
(
\
l
->
keepFilled
(
f
)
thr
l
)
m
-- To filter Fis with small Support
filterCliqueBySupport
::
Int
->
[
PhyloClique
]
->
[
PhyloClique
]
filterCliqueBySupport
thr
l
=
filter
(
\
clq
->
(
clq
^.
phyloClique_support
)
>=
thr
)
l
filterCliqueBySupport
::
Int
->
[
Clustering
]
->
[
Clustering
]
filterCliqueBySupport
thr
l
=
filter
(
\
clq
->
(
clq
^.
clustering_support
)
>=
thr
)
l
-- To filter Fis with small Clique size
filterCliqueBySize
::
Int
->
[
PhyloClique
]
->
[
PhyloClique
]
filterCliqueBySize
thr
l
=
filter
(
\
clq
->
(
length
$
clq
^.
phyloClique_node
s
)
>=
thr
)
l
filterCliqueBySize
::
Int
->
[
Clustering
]
->
[
Clustering
]
filterCliqueBySize
thr
l
=
filter
(
\
clq
->
(
length
$
clq
^.
clustering_root
s
)
>=
thr
)
l
-- To filter nested Fis
filterCliqueByNested
::
Map
(
Date
,
Date
)
[
PhyloClique
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
filterCliqueByNested
::
Map
(
Date
,
Date
)
[
Clustering
]
->
Map
(
Date
,
Date
)
[
Clustering
]
filterCliqueByNested
m
=
let
clq
=
map
(
\
l
->
foldl
(
\
mem
f
->
if
(
any
(
\
f'
->
isNested
(
f'
^.
phyloClique_nodes
)
(
f
^.
phyloClique_node
s
))
mem
)
foldl
(
\
mem
f
->
if
(
any
(
\
f'
->
isNested
(
f'
^.
clustering_roots
)
(
f
^.
clustering_root
s
))
mem
)
then
mem
else
let
fMax
=
filter
(
\
f'
->
not
$
isNested
(
f
^.
phyloClique_nodes
)
(
f'
^.
phyloClique_node
s
))
mem
let
fMax
=
filter
(
\
f'
->
not
$
isNested
(
f
^.
clustering_roots
)
(
f'
^.
clustering_root
s
))
mem
in
fMax
++
[
f
]
)
[]
l
)
$
elems
m
clq'
=
clq
`
using
`
parList
rdeepseq
...
...
@@ -215,8 +215,8 @@ filterCliqueByNested m =
-- | To transform a time map of docs into a time map of Fis with some filters
to
PhyloClique
::
Phylo
->
Map
(
Date
,
Date
)
[
Document
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
to
PhyloClique
phylo
phyloDocs
=
case
(
clique
$
getConfig
phylo
)
of
to
SeriesOfClustering
::
Phylo
->
Map
(
Date
,
Date
)
[
Document
]
->
Map
(
Date
,
Date
)
[
Clustering
]
to
SeriesOfClustering
phylo
phyloDocs
=
case
(
clique
$
getConfig
phylo
)
of
Fis
s
s'
->
-- traceFis "Filtered Fis"
filterCliqueByNested
{- \$ traceFis "Filtered by clique size" -}
...
...
@@ -224,22 +224,22 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
{- \$ traceFis "Filtered by support" -}
$
filterClique
True
s
(
filterCliqueBySupport
)
{- \$ traceFis "Unfiltered Fis" -}
phyloClique
seriesOfClustering
MaxClique
s
_
_
->
filterClique
True
s
(
filterCliqueBySize
)
phyloClique
seriesOfClustering
where
--------------------------------------
phyloClique
::
Map
(
Date
,
Date
)
[
PhyloClique
]
phyloClique
=
case
(
clique
$
getConfig
phylo
)
of
seriesOfClustering
::
Map
(
Date
,
Date
)
[
Clustering
]
seriesOfClustering
=
case
(
clique
$
getConfig
phylo
)
of
Fis
_
_
->
let
fis
=
map
(
\
(
prd
,
docs
)
->
case
(
corpusParser
$
getConfig
phylo
)
of
Csv'
_
->
let
lst
=
toList
$
fisWithSizePolyMap'
(
Segment
1
20
)
1
(
map
(
\
d
->
(
ngramsToIdx
(
text
d
)
(
getRoots
phylo
),
(
weight
d
,
(
sourcesToIdx
(
sources
d
)
(
getSources
phylo
)))))
docs
)
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
((
fst
.
snd
)
f
)
prd
((
fst
.
snd
.
snd
)
f
)
(((
snd
.
snd
.
snd
)
f
)))
lst
)
in
(
prd
,
map
(
\
f
->
Clustering
(
Set
.
toList
$
fst
f
)
((
fst
.
snd
)
f
)
prd
((
fst
.
snd
.
snd
)
f
)
(((
snd
.
snd
.
snd
)
f
)))
lst
)
_
->
let
lst
=
toList
$
fisWithSizePolyMap
(
Segment
1
20
)
1
(
map
(
\
d
->
ngramsToIdx
(
text
d
)
(
getRoots
phylo
))
docs
)
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
(
snd
f
)
prd
(
Just
$
fromIntegral
$
snd
f
)
[]
)
lst
)
in
(
prd
,
map
(
\
f
->
Clustering
(
Set
.
toList
$
fst
f
)
(
snd
f
)
prd
(
Just
$
fromIntegral
$
snd
f
)
[]
)
lst
)
)
$
toList
phyloDocs
fis'
=
fis
`
using
`
parList
rdeepseq
...
...
@@ -250,7 +250,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
$
foldl
sumCooc
empty
$
map
listToMatrix
$
map
(
\
d
->
ngramsToIdx
(
text
d
)
(
getRoots
phylo
))
docs
in
(
prd
,
map
(
\
cl
->
PhyloClique
cl
0
prd
Nothing
[]
)
$
getMaxCliques
filterType
Conditional
thr
cooc
))
in
(
prd
,
map
(
\
cl
->
Clustering
cl
0
prd
Nothing
[]
)
$
getMaxCliques
filterType
Conditional
thr
cooc
))
$
toList
phyloDocs
mcl'
=
mcl
`
using
`
parList
rdeepseq
in
fromList
mcl'
...
...
@@ -355,20 +355,21 @@ docsToTimeScaleNb docs =
$
unionWith
(
+
)
time
docs'
initPhylo
Levels
::
Int
->
PhyloPeriodId
->
Map
PhyloLevelId
PhyloLevel
initPhylo
Level
s
lvlMax
pId
=
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
Phylo
Level
pId
(
""
,
""
)
lvl
empty
))
[
1
..
lvlMax
]
initPhylo
Scales
::
Int
->
Period
->
Map
PhyloScaleId
PhyloScale
initPhylo
Scale
s
lvlMax
pId
=
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
Phylo
Scale
pId
(
""
,
""
)
lvl
empty
))
[
1
..
lvlMax
]
-- To init the basic elements of a Phylo
toPhyloBase
::
[
Document
]
->
TermList
->
PhyloConfig
->
Phylo
toPhyloBase
docs
lst
conf
=
-- Init the basic elements of a Phylo
--
initPhylo
::
[
Document
]
->
TermList
->
PhyloConfig
->
Phylo
initPhylo
docs
lst
conf
=
let
foundations
=
PhyloFoundations
(
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
)
lst
docsSources
=
PhyloSources
(
Vector
.
fromList
$
nub
$
concat
$
map
sources
docs
)
params
=
defaultPhyloParam
{
_phyloParam_config
=
conf
}
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
conf
)
(
getTimeStep
$
timeUnit
conf
)
in
trace
(
"
\n
"
<>
"-- |
Create PhyloBase
out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
in
trace
(
"
\n
"
<>
"-- |
Init a phylo
out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
$
Phylo
foundations
docsSources
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
...
...
@@ -378,4 +379,5 @@ toPhyloBase docs lst conf =
empty
empty
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
""
,
""
)
(
initPhyloLevels
1
prd
)))
periods
)
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
""
,
""
)
(
initPhyloScales
1
prd
)))
periods
)
0
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
View file @
8790c9de
...
...
@@ -231,41 +231,41 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l)
else
f
thr
l
traceClique
::
Map
(
Date
,
Date
)
[
PhyloClique
]
->
String
traceClique
::
Map
(
Date
,
Date
)
[
Clustering
]
->
String
traceClique
mFis
=
foldl
(
\
msg
cpt
->
msg
<>
show
(
countSup
cpt
cliques
)
<>
" (>"
<>
show
(
cpt
)
<>
") "
)
""
[
1
..
6
]
where
--------------------------------------
cliques
::
[
Double
]
cliques
=
sort
$
map
(
fromIntegral
.
length
.
_
phyloClique_node
s
)
$
concat
$
elems
mFis
cliques
=
sort
$
map
(
fromIntegral
.
length
.
_
clustering_root
s
)
$
concat
$
elems
mFis
--------------------------------------
traceSupport
::
Map
(
Date
,
Date
)
[
PhyloClique
]
->
String
traceSupport
::
Map
(
Date
,
Date
)
[
Clustering
]
->
String
traceSupport
mFis
=
foldl
(
\
msg
cpt
->
msg
<>
show
(
countSup
cpt
supports
)
<>
" (>"
<>
show
(
cpt
)
<>
") "
)
""
[
1
..
6
]
where
--------------------------------------
supports
::
[
Double
]
supports
=
sort
$
map
(
fromIntegral
.
_
phyloClique
_support
)
$
concat
$
elems
mFis
supports
=
sort
$
map
(
fromIntegral
.
_
clustering
_support
)
$
concat
$
elems
mFis
--------------------------------------
traceFis
::
[
Char
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
traceFis
::
[
Char
]
->
Map
(
Date
,
Date
)
[
Clustering
]
->
Map
(
Date
,
Date
)
[
Clustering
]
traceFis
msg
mFis
=
trace
(
"
\n
"
<>
"-- | "
<>
msg
<>
" : "
<>
show
(
sum
$
map
length
$
elems
mFis
)
<>
"
\n
"
<>
"Support : "
<>
(
traceSupport
mFis
)
<>
"
\n
"
<>
"Nb Ngrams : "
<>
(
traceClique
mFis
)
<>
"
\n
"
)
mFis
---------------
-- | Cl
ique
| --
---------------
---------------
-
-- | Cl
uster
| --
---------------
-
getCliqueSupport
::
Cl
ique
->
Int
getCliqueSupport
::
Cl
uster
->
Int
getCliqueSupport
unit
=
case
unit
of
Fis
s
_
->
s
MaxClique
_
_
_
->
0
getCliqueSize
::
Cl
ique
->
Int
getCliqueSize
::
Cl
uster
->
Int
getCliqueSize
unit
=
case
unit
of
Fis
_
s
->
s
MaxClique
s
_
_
->
s
...
...
@@ -315,9 +315,9 @@ ngramsToCooc ngrams coocs =
--------------------
getGroupId
::
PhyloGroup
->
PhyloGroupId
getGroupId
g
=
((
g
^.
phylo_groupPeriod
,
g
^.
phylo_group
Level
),
g
^.
phylo_groupIndex
)
getGroupId
g
=
((
g
^.
phylo_groupPeriod
,
g
^.
phylo_group
Scale
),
g
^.
phylo_groupIndex
)
idToPrd
::
PhyloGroupId
->
P
hyloPeriodI
d
idToPrd
::
PhyloGroupId
->
P
erio
d
idToPrd
id
=
(
fst
.
fst
)
id
groupByField
::
Ord
a
=>
(
PhyloGroup
->
a
)
->
[
PhyloGroup
]
->
Map
a
[
PhyloGroup
]
...
...
@@ -357,9 +357,9 @@ addPointers fil pty pointers g =
ToParents
->
g
&
phylo_groupPeriodParents
.~
pointers
ToChildsMemory
->
undefined
ToParentsMemory
->
undefined
Level
Pointer
->
case
fil
of
ToChilds
->
g
&
phylo_group
Level
Childs
.~
pointers
ToParents
->
g
&
phylo_group
Level
Parents
.~
pointers
Scale
Pointer
->
case
fil
of
ToChilds
->
g
&
phylo_group
Scale
Childs
.~
pointers
ToParents
->
g
&
phylo_group
Scale
Parents
.~
pointers
ToChildsMemory
->
undefined
ToParentsMemory
->
undefined
...
...
@@ -375,7 +375,7 @@ addMemoryPointers fil pty thr pointers g =
ToParents
->
undefined
ToChildsMemory
->
g
&
phylo_groupPeriodMemoryChilds
.~
(
concat
[(
g
^.
phylo_groupPeriodMemoryChilds
),(
map
(
\
pt
->
toPointer'
thr
pt
)
pointers
)])
ToParentsMemory
->
g
&
phylo_groupPeriodMemoryParents
.~
(
concat
[(
g
^.
phylo_groupPeriodMemoryParents
),(
map
(
\
pt
->
toPointer'
thr
pt
)
pointers
)])
Level
Pointer
->
undefined
Scale
Pointer
->
undefined
getPeriodIds
::
Phylo
->
[(
Date
,
Date
)]
...
...
@@ -384,17 +384,17 @@ getPeriodIds phylo = sortOn fst
$
phylo
^.
phylo_periods
getLevelParentId
::
PhyloGroup
->
PhyloGroupId
getLevelParentId
g
=
fst
$
head'
"getLevelParentId"
$
g
^.
phylo_group
Level
Parents
getLevelParentId
g
=
fst
$
head'
"getLevelParentId"
$
g
^.
phylo_group
Scale
Parents
getLastLevel
::
Phylo
->
Level
getLastLevel
::
Phylo
->
Scale
getLastLevel
phylo
=
last'
"lastLevel"
$
getLevels
phylo
getLevels
::
Phylo
->
[
Level
]
getLevels
::
Phylo
->
[
Scale
]
getLevels
phylo
=
nub
$
map
snd
$
keys
$
view
(
phylo_periods
.
traverse
.
phylo_period
Level
s
)
phylo
.
phylo_period
Scale
s
)
phylo
getSeaElevation
::
Phylo
->
SeaElevation
getSeaElevation
phylo
=
seaElevation
(
getConfig
phylo
)
...
...
@@ -426,44 +426,44 @@ phyloToLastBranches phylo = elems
$
map
(
\
g
->
(
g
^.
phylo_groupBranchId
,
[
g
]))
$
getGroupsFromLevel
(
last'
"byBranches"
$
getLevels
phylo
)
phylo
getGroupsFromLevel
::
Level
->
Phylo
->
[
PhyloGroup
]
getGroupsFromLevel
::
Scale
->
Phylo
->
[
PhyloGroup
]
getGroupsFromLevel
lvl
phylo
=
elems
$
view
(
phylo_periods
.
traverse
.
phylo_period
Level
s
.
phylo_period
Scale
s
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_
levelLevel
==
lvl
)
.
phylo_
level
Groups
)
phylo
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_
scaleScale
==
lvl
)
.
phylo_
scale
Groups
)
phylo
getGroupsFromLevelPeriods
::
Level
->
[
PhyloPeriodI
d
]
->
Phylo
->
[
PhyloGroup
]
getGroupsFromLevelPeriods
::
Scale
->
[
Perio
d
]
->
Phylo
->
[
PhyloGroup
]
getGroupsFromLevelPeriods
lvl
periods
phylo
=
elems
$
view
(
phylo_periods
.
traverse
.
filtered
(
\
phyloPrd
->
elem
(
phyloPrd
^.
phylo_periodPeriod
)
periods
)
.
phylo_period
Level
s
.
phylo_period
Scale
s
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_
levelLevel
==
lvl
)
.
phylo_
level
Groups
)
phylo
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_
scaleScale
==
lvl
)
.
phylo_
scale
Groups
)
phylo
getGroupsFromPeriods
::
Level
->
Map
PhyloPeriodI
d
PhyloPeriod
->
[
PhyloGroup
]
getGroupsFromPeriods
::
Scale
->
Map
Perio
d
PhyloPeriod
->
[
PhyloGroup
]
getGroupsFromPeriods
lvl
periods
=
elems
$
view
(
traverse
.
phylo_period
Level
s
.
phylo_period
Scale
s
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_
levelLevel
==
lvl
)
.
phylo_
level
Groups
)
periods
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_
scaleScale
==
lvl
)
.
phylo_
scale
Groups
)
periods
updatePhyloGroups
::
Level
->
Map
PhyloGroupId
PhyloGroup
->
Phylo
->
Phylo
updatePhyloGroups
::
Scale
->
Map
PhyloGroupId
PhyloGroup
->
Phylo
->
Phylo
updatePhyloGroups
lvl
m
phylo
=
over
(
phylo_periods
.
traverse
.
phylo_period
Level
s
.
phylo_period
Scale
s
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_
levelLevel
==
lvl
)
.
phylo_
level
Groups
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_
scaleScale
==
lvl
)
.
phylo_
scale
Groups
.
traverse
)
(
\
g
->
let
id
=
getGroupId
g
...
...
@@ -477,13 +477,16 @@ updatePeriods periods' phylo =
over
(
phylo_periods
.
traverse
)
(
\
prd
->
let
prd'
=
periods'
!
(
prd
^.
phylo_periodPeriod
)
lvls
=
map
(
\
lvl
->
lvl
&
phylo_
levelPeriod'
.~
prd'
)
$
prd
^.
phylo_periodLevel
s
in
prd
&
phylo_periodPeriod
'
.~
prd'
&
phylo_period
Levels
.~
lvls
lvls
=
map
(
\
lvl
->
lvl
&
phylo_
scalePeriodStr
.~
prd'
)
$
prd
^.
phylo_periodScale
s
in
prd
&
phylo_periodPeriod
Str
.~
prd'
&
phylo_period
Scales
.~
lvls
)
phylo
updateQuality
::
Double
->
Phylo
->
Phylo
updateQuality
quality
phylo
=
phylo
{
_phylo_quality
=
quality
}
traceToPhylo
::
Level
->
Phylo
->
Phylo
traceToPhylo
::
Scale
->
Phylo
->
Phylo
traceToPhylo
lvl
phylo
=
trace
(
"
\n
"
<>
"-- | End of phylo making at level "
<>
show
(
lvl
)
<>
" with "
<>
show
(
length
$
getGroupsFromLevel
lvl
phylo
)
<>
" groups and "
...
...
@@ -516,8 +519,8 @@ mergeMeta bId groups =
in
fromList
[(
"breaks"
,(
ego
^.
phylo_groupMeta
)
!
"breaks"
),(
"seaLevels"
,(
ego
^.
phylo_groupMeta
)
!
"seaLevels"
)]
groupsToBranches
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
groupsToBranches
groups
=
groupsToBranches
'
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
groupsToBranches
'
groups
=
{- run the related component algorithm -}
let
egos
=
map
(
\
g
->
[
getGroupId
g
]
++
(
map
fst
$
g
^.
phylo_groupPeriodParents
)
...
...
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
View file @
8790c9de
...
...
@@ -60,19 +60,19 @@ mergeGroups coocs id mapIds childs =
mergeAncestors
::
[
Pointer
]
->
[
Pointer
]
mergeAncestors
pointers
=
Map
.
toList
$
fromListWith
max
pointers
addPhylo
Level
::
Level
->
Phylo
->
Phylo
addPhylo
Level
lvl
phylo
=
addPhylo
Scale
::
Scale
->
Phylo
->
Phylo
addPhylo
Scale
lvl
phylo
=
over
(
phylo_periods
.
traverse
)
(
\
phyloPrd
->
phyloPrd
&
phylo_period
Level
s
(
\
phyloPrd
->
phyloPrd
&
phylo_period
Scale
s
%~
(
insert
(
phyloPrd
^.
phylo_periodPeriod
,
lvl
)
(
Phylo
Level
(
phyloPrd
^.
phylo_periodPeriod
)
(
phyloPrd
^.
phylo_periodPeriod'
)
lvl
empty
)))
phylo
(
Phylo
Scale
(
phyloPrd
^.
phylo_periodPeriod
)
(
phyloPrd
^.
phylo_periodPeriodStr
)
lvl
empty
)))
phylo
toNext
Level'
::
Phylo
->
[
PhyloGroup
]
->
Phylo
toNext
Level'
phylo
groups
=
toNext
Scale
::
Phylo
->
[
PhyloGroup
]
->
Phylo
toNext
Scale
phylo
groups
=
let
curLvl
=
getLastLevel
phylo
oldGroups
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
getLevelParentId
g
))
groups
newGroups
=
concat
$
groupsToBranches
newGroups
=
concat
$
groupsToBranches
'
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
foldlWithKey
(
\
acc
id
groups'
->
-- 4) create the parent group
...
...
@@ -83,17 +83,17 @@ toNextLevel' phylo groups =
newPeriods
=
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,
[
g
]))
newGroups
in
traceSynchronyEnd
$
over
(
phylo_periods
.
traverse
.
phylo_period
Level
s
.
traverse
$
over
(
phylo_periods
.
traverse
.
phylo_period
Scale
s
.
traverse
-- 6) update each period at curLvl + 1
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_
levelLevel
==
(
curLvl
+
1
)))
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_
scaleScale
==
(
curLvl
+
1
)))
-- 7) by adding the parents
(
\
phyloLvl
->
if
member
(
phyloLvl
^.
phylo_
level
Period
)
newPeriods
then
phyloLvl
&
phylo_
level
Groups
.~
fromList
(
map
(
\
g
->
(
getGroupId
g
,
g
))
$
newPeriods
!
(
phyloLvl
^.
phylo_
level
Period
))
if
member
(
phyloLvl
^.
phylo_
scale
Period
)
newPeriods
then
phyloLvl
&
phylo_
scale
Groups
.~
fromList
(
map
(
\
g
->
(
getGroupId
g
,
g
))
$
newPeriods
!
(
phyloLvl
^.
phylo_
scale
Period
))
else
phyloLvl
)
-- 2) add the curLvl + 1
phyloLevel
to the phylo
$
addPhylo
Level
(
curLvl
+
1
)
-- 2) add the curLvl + 1
PhyloScale
to the phylo
$
addPhylo
Scale
(
curLvl
+
1
)
-- 1) update the current groups (with level parent pointers) in the phylo
$
updatePhyloGroups
curLvl
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
groups
)
phylo
...
...
@@ -150,7 +150,7 @@ groupsToEdges prox sync nbDocs diago groups =
_
->
undefined
toParentId
::
PhyloGroup
->
PhyloGroupId
toParentId
child
=
((
child
^.
phylo_groupPeriod
,
child
^.
phylo_group
Level
+
1
),
child
^.
phylo_groupIndex
)
toParentId
child
=
((
child
^.
phylo_groupPeriod
,
child
^.
phylo_group
Scale
+
1
),
child
^.
phylo_groupIndex
)
reduceGroups
::
Proximity
->
Synchrony
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloGroup
]
->
[
PhyloGroup
]
...
...
@@ -166,7 +166,7 @@ reduceGroups prox sync docs diagos branch =
in
map
(
\
comp
->
-- 4) add to each groups their futur level parent group
let
parentId
=
toParentId
(
head'
"parentId"
comp
)
in
map
(
\
g
->
g
&
phylo_group
Level
Parents
%~
(
++
[(
parentId
,
1
)])
)
comp
)
in
map
(
\
g
->
g
&
phylo_group
Scale
Parents
%~
(
++
[(
parentId
,
1
)])
)
comp
)
-- 3) reduce the graph a a set of related components
$
toRelatedComponents
groups
edges
)
periods
...
...
@@ -185,7 +185,7 @@ adjustClustering sync branches = case sync of
levelUpAncestors
::
[
PhyloGroup
]
->
[
PhyloGroup
]
levelUpAncestors
groups
=
-- 1) create an associative map of (old,new) ids
let
ids'
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
fst
$
head'
"levelUpAncestors"
(
g
^.
phylo_group
Level
Parents
)))
groups
let
ids'
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
fst
$
head'
"levelUpAncestors"
(
g
^.
phylo_group
Scale
Parents
)))
groups
in
map
(
\
g
->
let
id'
=
ids'
!
(
getGroupId
g
)
ancestors
=
g
^.
phylo_groupAncestors
...
...
@@ -206,7 +206,7 @@ synchronicClustering phylo =
$
phyloToLastBranches
$
traceSynchronyStart
phylo
newBranches'
=
newBranches
`
using
`
parList
rdeepseq
in
toNext
Level'
phylo
$
levelUpAncestors
$
concat
newBranches'
in
toNext
Scale
phylo
$
levelUpAncestors
$
concat
newBranches'
-- synchronicDistance :: Phylo -> Level -> String
...
...
src/Gargantext/Core/Viz/Phylo/TemporalMatching.hs
View file @
8790c9de
...
...
@@ -13,7 +13,8 @@ module Gargantext.Core.Viz.Phylo.TemporalMatching where
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
groupBy
,
scanl
,
nub
,
nubBy
,
union
,
dropWhile
,
partition
,
or
,
sort
,
(
!!
))
import
Data.Ord
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
sortBy
,
(
++
),
intersect
,
null
,
inits
,
groupBy
,
scanl
,
nub
,
nubBy
,
union
,
dropWhile
,
partition
,
or
,
sort
,
(
!!
))
import
Data.Map
(
Map
,
fromList
,
elems
,
restrictKeys
,
unionWith
,
findWithDefault
,
keys
,
(
!
),
(
!?
),
filterWithKey
,
singleton
,
empty
,
mapKeys
,
adjust
)
import
Debug.Trace
(
trace
)
import
Gargantext.Core.Viz.Phylo
...
...
@@ -115,7 +116,7 @@ toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
-- | Local Matching | --
------------------------
findLastPeriod
::
Filiation
->
[
P
hyloPeriodId
]
->
PhyloPeriodI
d
findLastPeriod
::
Filiation
->
[
P
eriod
]
->
Perio
d
findLastPeriod
fil
periods
=
case
fil
of
ToParents
->
head'
"findLastPeriod"
(
sortOn
fst
periods
)
ToChilds
->
last'
"findLastPeriod"
(
sortOn
fst
periods
)
...
...
@@ -124,7 +125,7 @@ findLastPeriod fil periods = case fil of
-- | To filter pairs of candidates related to old pointers periods
removeOldPointers
::
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
P
hyloPeriodI
d
removeOldPointers
::
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
P
erio
d
->
[((
PhyloGroupId
,[
Int
]),(
PhyloGroupId
,[
Int
]))]
->
[((
PhyloGroupId
,[
Int
]),(
PhyloGroupId
,[
Int
]))]
removeOldPointers
oldPointers
fil
thr
prox
prd
pairs
...
...
@@ -144,7 +145,33 @@ removeOldPointers oldPointers fil thr prox prd pairs
|
otherwise
=
[]
makePairs'
::
(
PhyloGroupId
,[
Int
])
->
[(
PhyloGroupId
,[
Int
])]
->
[
PhyloPeriodId
]
->
[
Pointer
]
->
Filiation
->
Double
->
Proximity
makePairs
::
(
PhyloGroupId
,[
Int
])
->
[(
PhyloGroupId
,[
Int
])]
->
[
Period
]
->
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
Map
Date
Double
->
Map
Date
Cooc
->
[((
PhyloGroupId
,[
Int
]),(
PhyloGroupId
,[
Int
]))]
makePairs
(
egoId
,
egoNgrams
)
candidates
periods
oldPointers
fil
thr
prox
docs
diagos
=
if
(
null
periods
)
then
[]
else
removeOldPointers
oldPointers
fil
thr
prox
lastPrd
{- at least on of the pair candidates should be from the last added period -}
$
filter
(
\
((
id
,
_
),(
id'
,
_
))
->
((
fst
.
fst
)
id
==
lastPrd
)
||
((
fst
.
fst
)
id'
==
lastPrd
))
$
filter
(
\
((
id
,
_
),(
id'
,
_
))
->
(
elem
id
inPairs
)
||
(
elem
id'
inPairs
))
$
listToKeys
candidates
where
--------------------------------------
inPairs
::
[
PhyloGroupId
]
inPairs
=
map
fst
$
filter
(
\
(
id
,
ngrams
)
->
let
nbDocs
=
(
sum
.
elems
)
$
filterDocs
docs
([(
fst
.
fst
)
egoId
,
(
fst
.
fst
)
id
])
diago
=
reduceDiagos
$
filterDiago
diagos
([(
fst
.
fst
)
egoId
,
(
fst
.
fst
)
id
])
in
(
toProximity
nbDocs
diago
prox
egoNgrams
egoNgrams
ngrams
)
>=
thr
)
candidates
--------------------------------------
lastPrd
::
Period
lastPrd
=
findLastPeriod
fil
periods
--------------------------------------
makePairs'
::
(
PhyloGroupId
,[
Int
])
->
[(
PhyloGroupId
,[
Int
])]
->
[
Period
]
->
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
Map
Date
Double
->
Map
Date
Cooc
->
[((
PhyloGroupId
,[
Int
]),(
PhyloGroupId
,[
Int
]))]
makePairs'
(
egoId
,
egoNgrams
)
candidates
periods
oldPointers
fil
thr
prox
docs
diagos
=
if
(
null
periods
)
...
...
@@ -159,7 +186,7 @@ makePairs' (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs d
in
(
toProximity
nbDocs
diago
prox
egoNgrams
egoNgrams
ngrams
)
>=
thr
)
candidates
where
lastPrd
::
P
hyloPeriodI
d
lastPrd
::
P
erio
d
lastPrd
=
findLastPeriod
fil
periods
...
...
@@ -190,16 +217,17 @@ filterPointersByPeriod fil pts =
phyloGroupMatching
::
[[(
PhyloGroupId
,[
Int
])]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Map
Date
Cooc
->
Double
->
[
Pointer
]
->
(
PhyloGroupId
,[
Int
])
->
[
Pointer
]
phyloGroupMatching
candidates
fil
proxi
docs
diagos
thr
oldPointers
(
id
,
ngrams
)
=
phyloGroupMatching
candidates
fil
iation
proxi
docs
diagos
thr
oldPointers
(
id
,
ngrams
)
=
if
(
null
$
filterPointers
proxi
thr
oldPointers
)
{- let's find new pointers -}
then
if
null
nextPointers
then
[]
else
filterPointersByPeriod
fil
else
filterPointersByPeriod
fil
iation
$
head'
"phyloGroupMatching"
-- Keep only the best set of pointers grouped by proximity
$
groupBy
(
\
pt
pt'
->
(
snd
.
fst
)
pt
==
(
snd
.
fst
)
pt'
)
$
reverse
$
sortOn
(
snd
.
fst
)
$
head'
"pointers"
nextPointers
-- verifier que l on garde bien les plus importants
$
sortBy
(
comparing
(
Down
.
snd
.
fst
))
$
head'
"pointers"
nextPointers
-- Find the first time frame where at leats one pointer satisfies the proximity threshold
else
oldPointers
where
...
...
@@ -212,8 +240,7 @@ phyloGroupMatching candidates fil proxi docs diagos thr oldPointers (id,ngrams)
nbdocs
=
sum
$
elems
$
(
filterDocs
docs
([(
fst
.
fst
)
id
]
++
periods
))
diago
=
reduceDiagos
$
filterDiago
diagos
([(
fst
.
fst
)
id
]
++
periods
)
{- important resize nbdocs et diago dans le make pairs -}
pairs
=
makePairs'
(
id
,
ngrams
)
(
concat
groups
)
periods
oldPointers
fil
thr
proxi
docs
diagos
pairs
=
makePairs
(
id
,
ngrams
)
(
concat
groups
)
periods
oldPointers
filiation
thr
proxi
docs
diagos
in
acc
++
(
filterPointers'
proxi
thr
$
concat
$
map
(
\
(
c
,
c'
)
->
...
...
@@ -225,10 +252,10 @@ phyloGroupMatching candidates fil proxi docs diagos thr oldPointers (id,ngrams)
$
inits
candidates
-- groups from [[1900],[1900,1901],[1900,1901,1902],...]
filterDocs
::
Map
Date
Double
->
[
P
hyloPeriodI
d
]
->
Map
Date
Double
filterDocs
::
Map
Date
Double
->
[
P
erio
d
]
->
Map
Date
Double
filterDocs
d
pds
=
restrictKeys
d
$
periodsToYears
pds
filterDiago
::
Map
Date
Cooc
->
[
P
hyloPeriodI
d
]
->
Map
Date
Cooc
filterDiago
::
Map
Date
Cooc
->
[
P
erio
d
]
->
Map
Date
Cooc
filterDiago
diago
pds
=
restrictKeys
diago
$
periodsToYears
pds
...
...
@@ -237,7 +264,7 @@ filterDiago diago pds = restrictKeys diago $ periodsToYears pds
-----------------------------
getNextPeriods
::
Filiation
->
Int
->
P
hyloPeriodId
->
[
PhyloPeriodId
]
->
[
PhyloPeriodI
d
]
getNextPeriods
::
Filiation
->
Int
->
P
eriod
->
[
Period
]
->
[
Perio
d
]
getNextPeriods
fil
max'
pId
pIds
=
case
fil
of
ToChilds
->
take
max'
$
(
tail
.
snd
)
$
splitAt
(
elemIndex'
pId
pIds
)
pIds
...
...
@@ -255,7 +282,7 @@ getCandidates ego targets =
map
(
\
groups'
->
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
ego
^.
phylo_groupNgrams
)
(
snd
g'
))
groups'
)
targets
matchGroupsToGroups
::
Int
->
[
P
hyloPeriodI
d
]
->
Proximity
->
Double
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloGroup
]
->
[
PhyloGroup
]
matchGroupsToGroups
::
Int
->
[
P
erio
d
]
->
Proximity
->
Double
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloGroup
]
->
[
PhyloGroup
]
matchGroupsToGroups
frame
periods
proximity
thr
docs
coocs
groups
=
let
groups'
=
groupByField
_phylo_groupPeriod
groups
in
foldl'
(
\
acc
prd
->
...
...
@@ -319,18 +346,6 @@ fScore lambda x periods bk bx =
wk
::
[
PhyloGroup
]
->
Double
wk
bk
=
fromIntegral
$
length
bk
toPhyloQuality'
::
Double
->
Map
Int
Double
->
[[
PhyloGroup
]]
->
Double
toPhyloQuality'
lambda
freq
branches
=
if
(
null
branches
)
then
0
else
sum
$
map
(
\
i
->
let
bks
=
relevantBranches
i
branches
periods
=
nub
$
map
_phylo_groupPeriod
$
filter
(
\
g
->
elem
i
$
g
^.
phylo_groupNgrams
)
$
concat
bks
in
(
freq
!
i
)
*
(
sum
$
map
(
\
bk
->
((
wk
bk
)
/
(
sum
$
map
wk
bks
))
*
(
fScore
lambda
i
periods
bk
bks
))
bks
))
$
keys
freq
toRecall
::
Map
Int
Double
->
[[
PhyloGroup
]]
->
Double
toRecall
freq
branches
=
if
(
null
branches
)
...
...
@@ -391,9 +406,8 @@ toPhyloQuality fdt lambda freq branches =
-- | Constant Temporal Matching | --
------------------------------------
groupsToBranches'
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
groupsToBranches'
groups
=
groupsToBranches
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
groupsToBranches
groups
=
{- run the related component algorithm -}
let
egos
=
groupBy
(
\
gs
gs'
->
(
fst
$
fst
$
head'
"egos"
gs
)
==
(
fst
$
fst
$
head'
"egos"
gs'
))
$
sortOn
(
\
gs
->
fst
$
fst
$
head'
"egos"
gs
)
...
...
@@ -427,12 +441,12 @@ updateThr thr branches = map (\b -> map (\g ->
-- ego = the current branch we want to break
-- rest = the branches we still have to break
breakBranches
::
Double
->
Proximity
->
Double
->
Map
Int
Double
->
Int
->
Double
->
Double
->
Double
->
Int
->
Map
Date
Double
->
Map
Date
Cooc
->
[
P
hyloPeriodI
d
]
->
[([
PhyloGroup
],
Bool
)]
->
([
PhyloGroup
],
Bool
)
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
->
Int
->
Map
Date
Double
->
Map
Date
Cooc
->
[
P
erio
d
]
->
[([
PhyloGroup
],
Bool
)]
->
([
PhyloGroup
],
Bool
)
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
breakBranches
fdt
proximity
lambda
frequency
minBranch
thr
depth
elevation
frame
docs
coocs
periods
done
ego
rest
=
-- 1) keep or not the new division of ego
let
done'
=
done
++
(
if
snd
ego
then
(
if
((
null
(
fst
ego'
))
||
(
quality
>
quality'
))
(
if
((
null
(
fst
ego'
))
||
(
quality
>
quality'
))
then
-- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
-- <> " | " <> show(length $ fst ego) <> " groups : "
...
...
@@ -460,7 +474,7 @@ breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame
--------------------------------------
ego'
::
([[
PhyloGroup
]],[[
PhyloGroup
]])
ego'
=
let
branches
=
groupsToBranches
'
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
let
branches
=
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
matchGroupsToGroups
frame
periods
proximity
thr
docs
coocs
(
fst
ego
)
branches'
=
branches
`
using
`
parList
rdeepseq
in
partition
(
\
b
->
(
length
$
nub
$
map
_phylo_groupPeriod
b
)
>=
minBranch
)
...
...
@@ -473,11 +487,11 @@ breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame
seaLevelMatching
::
Double
->
Proximity
->
Double
->
Int
->
Map
Int
Double
->
Double
->
Double
->
Double
->
Double
->
Int
->
[
P
hyloPeriodId
]
->
Map
Date
Double
->
Map
Date
Cooc
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
->
Int
->
[
P
eriod
]
->
Map
Date
Double
->
Map
Date
Cooc
->
[([
PhyloGroup
],
Bool
)]
->
([([
PhyloGroup
],
Bool
)],
Double
)
seaLevelMatching
fdt
proximity
lambda
minBranch
frequency
thr
step
depth
elevation
frame
periods
docs
coocs
branches
=
-- if there is no branch to break or if seaLvl level > 1 then end
if
(
thr
>=
1
)
||
((
not
.
or
)
$
map
snd
branches
)
then
branches
then
(
branches
,
toPhyloQuality
fdt
lambda
frequency
(
map
fst
branches
))
else
-- break all the possible branches at the current seaLvl level
let
quality
=
toPhyloQuality
fdt
lambda
frequency
(
map
fst
branches
)
...
...
@@ -495,13 +509,13 @@ seaLevelMatching fdt proximity lambda minBranch frequency thr step depth elevati
constanteTemporalMatching
::
Double
->
Double
->
Phylo
->
Phylo
constanteTemporalMatching
start
step
phylo
=
updatePhyloGroups
1
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
traceMatchEnd
$
concat
branches
)
(
toPhyloHorizon
phylo
)
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
traceMatchEnd
$
concat
(
map
fst
$
(
fst
branches
))
)
(
toPhyloHorizon
(
updateQuality
(
snd
branches
)
phylo
)
)
where
-- 2) process the temporal matching by elevating seaLvl level
branches
::
[[
PhyloGroup
]]
branches
=
map
fst
$
seaLevelMatching
(
fromIntegral
$
Vector
.
length
$
getRoots
phylo
)
-- branches :: ([([groups in the same branch],should we still break the branch?)],final quality)
branches
::
([([
PhyloGroup
],
Bool
)],
Double
)
branches
=
seaLevelMatching
(
fromIntegral
$
Vector
.
length
$
getRoots
phylo
)
(
phyloProximity
$
getConfig
phylo
)
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
)
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)
...
...
@@ -518,7 +532,7 @@ constanteTemporalMatching start step phylo = updatePhyloGroups 1
-- here we suppose that all the groups of level 1 are part of the same big branch
groups
::
[([
PhyloGroup
],
Bool
)]
groups
=
map
(
\
b
->
(
b
,(
length
$
nub
$
map
_phylo_groupPeriod
b
)
>=
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)))
$
groupsToBranches
'
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
matchGroupsToGroups
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phyloProximity
$
getConfig
phylo
)
start
...
...
@@ -583,7 +597,7 @@ toThreshold lvl proxiGroups =
-- rest = the branches we still have to break
adaptativeBreakBranches
::
Double
->
Proximity
->
Double
->
Double
->
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
->
Double
->
Map
Int
Double
->
Int
->
Int
->
Map
Date
Double
->
Map
Date
Cooc
->
[
P
hyloPeriodI
d
]
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
->
([
PhyloGroup
],(
Bool
,[
Double
]))
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
->
[
P
erio
d
]
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
->
([
PhyloGroup
],(
Bool
,[
Double
]))
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
adaptativeBreakBranches
fdt
proxiConf
depth
elevation
groupsProxi
lambda
frequency
minBranch
frame
docs
coocs
periods
done
ego
rest
=
-- 1) keep or not the new division of ego
...
...
@@ -617,7 +631,7 @@ adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda freque
--------------------------------------
ego'
::
([[
PhyloGroup
]],[[
PhyloGroup
]])
ego'
=
let
branches
=
groupsToBranches
'
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
let
branches
=
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
matchGroupsToGroups
frame
periods
proxiConf
thr
docs
coocs
(
fst
ego
)
branches'
=
branches
`
using
`
parList
rdeepseq
in
partition
(
\
b
->
(
length
$
nub
$
map
_phylo_groupPeriod
b
)
>
minBranch
)
...
...
@@ -631,7 +645,7 @@ adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda freque
adaptativeSeaLevelMatching
::
Double
->
Proximity
->
Double
->
Double
->
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
->
Double
->
Int
->
Map
Int
Double
->
Int
->
[
P
hyloPeriodI
d
]
->
Map
Date
Double
->
Map
Date
Cooc
->
Int
->
[
P
erio
d
]
->
Map
Date
Double
->
Map
Date
Cooc
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
adaptativeSeaLevelMatching
fdt
proxiConf
depth
elevation
groupsProxi
lambda
minBranch
frequency
frame
periods
docs
coocs
branches
=
-- if there is no branch to break or if seaLvl level >= depth then end
...
...
@@ -676,7 +690,7 @@ adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
-- here we suppose that all the groups of level 1 are part of the same big branch
groups
::
[([
PhyloGroup
],(
Bool
,[
Double
]))]
groups
=
map
(
\
b
->
(
b
,((
length
$
nub
$
map
_phylo_groupPeriod
b
)
>=
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
),[
thr
])))
$
groupsToBranches
'
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
matchGroupsToGroups
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phyloProximity
$
getConfig
phylo
)
thr
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment