Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
152
Issues
152
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
386ad673
Commit
386ad673
authored
Mar 03, 2023
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
1 click phylo v1 is ok
parent
968d52b3
Pipeline
#3719
failed with stage
in 72 minutes
Changes
8
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
295 additions
and
114 deletions
+295
-114
Main.hs
bin/gargantext-phylo/Main.hs
+1
-0
Phylo.hs
src/Gargantext/Core/Viz/Phylo.hs
+24
-11
Example.hs
src/Gargantext/Core/Viz/Phylo/Example.hs
+4
-11
PhyloExport.hs
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
+10
-8
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+143
-71
PhyloTools.hs
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
+103
-3
SynchronicClustering.hs
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
+3
-3
TemporalMatching.hs
src/Gargantext/Core/Viz/Phylo/TemporalMatching.hs
+7
-7
No files found.
bin/gargantext-phylo/Main.hs
View file @
386ad673
...
@@ -149,6 +149,7 @@ seaToLabel :: PhyloConfig -> [Char]
...
@@ -149,6 +149,7 @@ seaToLabel :: PhyloConfig -> [Char]
seaToLabel
config
=
case
(
seaElevation
config
)
of
seaToLabel
config
=
case
(
seaElevation
config
)
of
Constante
start
step
->
(
"sea_cst_"
<>
(
show
start
)
<>
"_"
<>
(
show
step
))
Constante
start
step
->
(
"sea_cst_"
<>
(
show
start
)
<>
"_"
<>
(
show
step
))
Adaptative
granularity
->
(
"sea_adapt"
<>
(
show
granularity
))
Adaptative
granularity
->
(
"sea_adapt"
<>
(
show
granularity
))
Evolving
_
->
(
"sea_evolv"
)
sensToLabel
::
PhyloConfig
->
[
Char
]
sensToLabel
::
PhyloConfig
->
[
Char
]
...
...
src/Gargantext/Core/Viz/Phylo.hs
View file @
386ad673
...
@@ -31,7 +31,6 @@ import Control.Lens (makeLenses)
...
@@ -31,7 +31,6 @@ import Control.Lens (makeLenses)
import
Data.Aeson
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Map
(
Map
)
import
Data.Map
(
Map
)
import
Data.Set
(
Set
)
import
Data.Swagger
import
Data.Swagger
import
Data.Text
(
Text
,
pack
)
import
Data.Text
(
Text
,
pack
)
import
Data.Vector
(
Vector
)
import
Data.Vector
(
Vector
)
...
@@ -66,6 +65,8 @@ data SeaElevation =
...
@@ -66,6 +65,8 @@ data SeaElevation =
,
_cons_gap
::
Double
}
,
_cons_gap
::
Double
}
|
Adaptative
|
Adaptative
{
_adap_steps
::
Double
}
{
_adap_steps
::
Double
}
|
Evolving
{
_evol_neighborhood
::
Bool
}
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
SeaElevation
instance
ToSchema
SeaElevation
...
@@ -180,6 +181,7 @@ data PhyloConfig =
...
@@ -180,6 +181,7 @@ data PhyloConfig =
,
phyloScale
::
Int
,
phyloScale
::
Int
,
similarity
::
Similarity
,
similarity
::
Similarity
,
seaElevation
::
SeaElevation
,
seaElevation
::
SeaElevation
,
defaultMode
::
Bool
,
findAncestors
::
Bool
,
findAncestors
::
Bool
,
phyloSynchrony
::
Synchrony
,
phyloSynchrony
::
Synchrony
,
phyloQuality
::
Quality
,
phyloQuality
::
Quality
...
@@ -215,7 +217,7 @@ subConfig2config subConfig = defaultConfig { similarity = WeightedLogJaccard
...
@@ -215,7 +217,7 @@ subConfig2config subConfig = defaultConfig { similarity = WeightedLogJaccard
------------------------------------------------------------------------
------------------------------------------------------------------------
defaultConfig
::
PhyloConfig
defaultConfig
::
PhyloConfig
defaultConfig
=
defaultConfig
=
PhyloConfig
{
corpusPath
=
"corpus.csv"
-- useful for commandline only
PhyloConfig
{
corpusPath
=
"corpus.csv"
-- useful for commandline only
,
listPath
=
"list.csv"
-- useful for commandline only
,
listPath
=
"list.csv"
-- useful for commandline only
,
outputPath
=
"data/"
,
outputPath
=
"data/"
,
corpusParser
=
Csv
100000
,
corpusParser
=
Csv
100000
...
@@ -224,6 +226,7 @@ defaultConfig =
...
@@ -224,6 +226,7 @@ defaultConfig =
,
phyloScale
=
2
,
phyloScale
=
2
,
similarity
=
WeightedLogJaccard
0.5
1
,
similarity
=
WeightedLogJaccard
0.5
1
,
seaElevation
=
Constante
0.1
0.1
,
seaElevation
=
Constante
0.1
0.1
,
defaultMode
=
True
,
findAncestors
=
False
,
findAncestors
=
False
,
phyloSynchrony
=
ByProximityThreshold
0.5
0
AllBranches
MergeAllGroups
,
phyloSynchrony
=
ByProximityThreshold
0.5
0
AllBranches
MergeAllGroups
,
phyloQuality
=
Quality
0.5
1
,
phyloQuality
=
Quality
0.5
1
...
@@ -365,14 +368,21 @@ data PhyloFoundations = PhyloFoundations
...
@@ -365,14 +368,21 @@ data PhyloFoundations = PhyloFoundations
,
_foundations_rootsInGroups
::
Map
Int
[
PhyloGroupId
]
-- map of roots associated to groups
,
_foundations_rootsInGroups
::
Map
Int
[
PhyloGroupId
]
-- map of roots associated to groups
}
deriving
(
Generic
,
Show
,
Eq
)
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
PhyloFoundations
where
data
PhyloCounts
=
PhyloCounts
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_foundations_"
)
{
coocByDate
::
!
(
Map
Date
Cooc
)
,
docsByDate
::
!
(
Map
Date
Double
)
,
rootsCount
::
!
(
Map
Int
Double
)
,
rootsFreq
::
!
(
Map
Int
Double
)
,
lastRootsFreq
::
!
(
Map
Int
Double
)
}
deriving
(
Generic
,
Show
,
Eq
)
data
PhyloSources
=
PhyloSources
data
PhyloSources
=
PhyloSources
{
_sources
::
!
(
Vector
Text
)
}
deriving
(
Generic
,
Show
,
Eq
)
{
_sources
::
!
(
Vector
Text
)
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
PhyloFoundations
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_foundations_"
)
instance
ToSchema
PhyloCounts
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_"
)
instance
ToSchema
PhyloSources
where
instance
ToSchema
PhyloSources
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_"
)
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_"
)
...
@@ -396,6 +406,8 @@ type Period = (Date,Date)
...
@@ -396,6 +406,8 @@ type Period = (Date,Date)
type
PeriodStr
=
(
DateStr
,
DateStr
)
type
PeriodStr
=
(
DateStr
,
DateStr
)
-- | Phylo datatype of a phylomemy
-- | Phylo datatype of a phylomemy
-- foundations : the foundations of the phylo
-- foundations : the foundations of the phylo
-- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
-- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
...
@@ -405,14 +417,12 @@ type PeriodStr = (DateStr,DateStr)
...
@@ -405,14 +417,12 @@ type PeriodStr = (DateStr,DateStr)
data
Phylo
=
data
Phylo
=
Phylo
{
_phylo_foundations
::
PhyloFoundations
Phylo
{
_phylo_foundations
::
PhyloFoundations
,
_phylo_sources
::
PhyloSources
,
_phylo_sources
::
PhyloSources
,
_phylo_timeCooc
::
!
(
Map
Date
Cooc
)
,
_phylo_counts
::
PhyloCounts
,
_phylo_timeDocs
::
!
(
Map
Date
Double
)
,
_phylo_seaLadder
::
[
Double
]
,
_phylo_termFreq
::
!
(
Map
Int
Double
)
,
_phylo_lastTermFreq
::
!
(
Map
Int
Double
)
,
_phylo_diaSimScan
::
Set
Double
,
_phylo_param
::
PhyloParam
,
_phylo_param
::
PhyloParam
,
_phylo_periods
::
Map
Period
PhyloPeriod
,
_phylo_periods
::
Map
Period
PhyloPeriod
,
_phylo_quality
::
Double
,
_phylo_quality
::
Double
,
_phylo_level
::
Double
}
}
deriving
(
Generic
,
Show
,
Eq
)
deriving
(
Generic
,
Show
,
Eq
)
...
@@ -620,6 +630,9 @@ instance ToJSON PhyloSources
...
@@ -620,6 +630,9 @@ instance ToJSON PhyloSources
instance
FromJSON
PhyloParam
instance
FromJSON
PhyloParam
instance
ToJSON
PhyloParam
instance
ToJSON
PhyloParam
instance
FromJSON
PhyloCounts
instance
ToJSON
PhyloCounts
instance
FromJSON
PhyloPeriod
instance
FromJSON
PhyloPeriod
instance
ToJSON
PhyloPeriod
instance
ToJSON
PhyloPeriod
...
...
src/Gargantext/Core/Viz/Phylo/Example.hs
View file @
386ad673
...
@@ -30,7 +30,6 @@ import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
...
@@ -30,7 +30,6 @@ import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
temporalMatching
)
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
temporalMatching
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Set
as
Set
import
qualified
Data.Map
as
Map
import
qualified
Data.Map
as
Map
---------------------------------
---------------------------------
...
@@ -55,15 +54,11 @@ phyloCleopatre = synchronicClustering $ toHorizon flatPhylo
...
@@ -55,15 +54,11 @@ phyloCleopatre = synchronicClustering $ toHorizon flatPhylo
-----------------------------------------------
-----------------------------------------------
flatPhylo
::
Phylo
flatPhylo
::
Phylo
flatPhylo
=
case
(
getSeaElevation
emptyPhylo
)
of
flatPhylo
=
temporalMatching
(
getLadder
emptyPhylo'
)
emptyPhylo'
Constante
s
g
->
temporalMatching
(
constDiachronicLadder
s
g
Set
.
empty
)
$
scanSimilarity
1
$
appendGroups
clusterToGroup
1
seriesOfClustering
emptyPhylo
Adaptative
s
->
temporalMatching
(
adaptDiachronicLadder
s
(
emptyPhylo'
^.
phylo_diaSimScan
)
Set
.
empty
)
emptyPhylo'
emptyPhylo'
::
Phylo
emptyPhylo'
::
Phylo
emptyPhylo'
=
scanSimilarity
1
emptyPhylo'
=
joinRoots
$
joinRootsToGroups
$
findSeaLadder
$
appendGroups
clusterToGroup
1
seriesOfClustering
emptyPhylo
$
appendGroups
clusterToGroup
1
seriesOfClustering
emptyPhylo
---------------------------------------------
---------------------------------------------
...
@@ -83,11 +78,9 @@ docsByPeriods = groupDocsByPeriod date periods docs
...
@@ -83,11 +78,9 @@ docsByPeriods = groupDocsByPeriod date periods docs
-- | STEP 1 | -- Init the Phylo
-- | STEP 1 | -- Init the Phylo
---------------------------------
---------------------------------
emptyPhylo
::
Phylo
emptyPhylo
::
Phylo
emptyPhylo
=
initPhylo
docs
config
emptyPhylo
=
initPhylo
docs
config
phyloCooc
::
Map
Date
Cooc
phyloCooc
::
Map
Date
Cooc
phyloCooc
=
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
)
phyloCooc
=
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
)
...
@@ -106,7 +99,7 @@ config :: PhyloConfig
...
@@ -106,7 +99,7 @@ config :: PhyloConfig
config
=
config
=
defaultConfig
{
phyloName
=
"Cesar et Cleopatre"
defaultConfig
{
phyloName
=
"Cesar et Cleopatre"
,
phyloScale
=
2
,
phyloScale
=
2
,
seaElevation
=
Adaptative
4
,
seaElevation
=
Evolving
True
,
exportFilter
=
[
ByBranchSize
0
]
,
exportFilter
=
[
ByBranchSize
0
]
,
clique
=
MaxClique
0
15
ByNeighbours
}
,
clique
=
MaxClique
0
15
ByNeighbours
}
...
...
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
View file @
386ad673
...
@@ -214,13 +214,13 @@ exportToDot phylo export =
...
@@ -214,13 +214,13 @@ exportToDot phylo export =
{-- home made attributes -}
{-- home made attributes -}
<>
[(
toAttr
(
fromStrict
"phyloFoundations"
)
$
pack
$
show
(
length
$
Vector
.
toList
$
getRoots
phylo
))
<>
[(
toAttr
(
fromStrict
"phyloFoundations"
)
$
pack
$
show
(
length
$
Vector
.
toList
$
getRoots
phylo
))
,(
toAttr
(
fromStrict
"phyloTerms"
)
$
pack
$
show
(
length
$
nub
$
concat
$
map
(
\
g
->
g
^.
phylo_groupNgrams
)
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"phyloTerms"
)
$
pack
$
show
(
length
$
nub
$
concat
$
map
(
\
g
->
g
^.
phylo_groupNgrams
)
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"phyloDocs"
)
$
pack
$
show
(
sum
$
elems
$
phylo
^.
phylo_timeDocs
))
,(
toAttr
(
fromStrict
"phyloDocs"
)
$
pack
$
show
(
sum
$
elems
$
getDocsByDate
phylo
))
,(
toAttr
(
fromStrict
"phyloPeriods"
)
$
pack
$
show
(
length
$
elems
$
phylo
^.
phylo_periods
))
,(
toAttr
(
fromStrict
"phyloPeriods"
)
$
pack
$
show
(
length
$
elems
$
phylo
^.
phylo_periods
))
,(
toAttr
(
fromStrict
"phyloBranches"
)
$
pack
$
show
(
length
$
export
^.
export_branches
))
,(
toAttr
(
fromStrict
"phyloBranches"
)
$
pack
$
show
(
length
$
export
^.
export_branches
))
,(
toAttr
(
fromStrict
"phyloGroups"
)
$
pack
$
show
(
length
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"phyloGroups"
)
$
pack
$
show
(
length
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"phyloSources"
)
$
pack
$
show
(
Vector
.
toList
$
getSources
phylo
))
,(
toAttr
(
fromStrict
"phyloSources"
)
$
pack
$
show
(
Vector
.
toList
$
getSources
phylo
))
,(
toAttr
(
fromStrict
"phyloTimeScale"
)
$
pack
$
getTimeScale
phylo
)
,(
toAttr
(
fromStrict
"phyloTimeScale"
)
$
pack
$
getTimeScale
phylo
)
,(
toAttr
(
fromStrict
"PhyloScale"
)
$
pack
$
show
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"PhyloScale"
)
$
pack
$
show
(
getLevel
phylo
))
,(
toAttr
(
fromStrict
"phyloQuality"
)
$
pack
$
show
(
phylo
^.
phylo_quality
))
,(
toAttr
(
fromStrict
"phyloQuality"
)
$
pack
$
show
(
phylo
^.
phylo_quality
))
,(
toAttr
(
fromStrict
"phyloSeaRiseStart"
)
$
pack
$
show
(
getPhyloSeaRiseStart
phylo
))
,(
toAttr
(
fromStrict
"phyloSeaRiseStart"
)
$
pack
$
show
(
getPhyloSeaRiseStart
phylo
))
,(
toAttr
(
fromStrict
"phyloSeaRiseSteps"
)
$
pack
$
show
(
getPhyloSeaRiseSteps
phylo
))
,(
toAttr
(
fromStrict
"phyloSeaRiseSteps"
)
$
pack
$
show
(
getPhyloSeaRiseSteps
phylo
))
...
@@ -375,7 +375,8 @@ processSort sort' elev export = case sort' of
...
@@ -375,7 +375,8 @@ processSort sort' elev export = case sort' of
ByBirthDate
o
->
sortByBirthDate
o
export
ByBirthDate
o
->
sortByBirthDate
o
export
ByHierarchy
_
->
case
elev
of
ByHierarchy
_
->
case
elev
of
Constante
s
s'
->
export
&
export_branches
.~
(
branchToIso'
s
s'
$
sortByHierarchy
0
(
export
^.
export_branches
))
Constante
s
s'
->
export
&
export_branches
.~
(
branchToIso'
s
s'
$
sortByHierarchy
0
(
export
^.
export_branches
))
Adaptative
_
->
export
&
export_branches
.~
(
branchToIso
$
sortByHierarchy
0
(
export
^.
export_branches
))
Adaptative
_
->
export
&
export_branches
.~
(
branchToIso
$
sortByHierarchy
0
(
export
^.
export_branches
))
Evolving
_
->
export
&
export_branches
.~
(
branchToIso
$
sortByHierarchy
0
(
export
^.
export_branches
))
-----------------
-----------------
-- | Metrics | --
-- | Metrics | --
...
@@ -416,7 +417,7 @@ ngramsMetrics phylo export =
...
@@ -416,7 +417,7 @@ ngramsMetrics phylo export =
&
phylo_groupMeta
%~
insert
"inclusion"
&
phylo_groupMeta
%~
insert
"inclusion"
(
map
(
\
n
->
inclusion
(
g
^.
phylo_groupCooc
)
((
g
^.
phylo_groupNgrams
)
\\
[
n
])
n
)
$
g
^.
phylo_groupNgrams
)
(
map
(
\
n
->
inclusion
(
g
^.
phylo_groupCooc
)
((
g
^.
phylo_groupNgrams
)
\\
[
n
])
n
)
$
g
^.
phylo_groupNgrams
)
&
phylo_groupMeta
%~
insert
"frequence"
&
phylo_groupMeta
%~
insert
"frequence"
(
map
(
\
n
->
getInMap
n
(
phylo
^.
phylo_lastTermFreq
))
$
g
^.
phylo_groupNgrams
)
(
map
(
\
n
->
getInMap
n
(
getLastRootsFreq
phylo
))
$
g
^.
phylo_groupNgrams
)
)
export
)
export
...
@@ -643,12 +644,13 @@ toHorizon phylo =
...
@@ -643,12 +644,13 @@ toHorizon phylo =
heads
=
filter
(
\
g
->
(
not
.
null
)
$
(
g
^.
phylo_groupPeriodChilds
))
heads
=
filter
(
\
g
->
(
not
.
null
)
$
(
g
^.
phylo_groupPeriodChilds
))
$
filter
(
\
g
->
null
(
g
^.
phylo_groupPeriodParents
)
&&
(
notElem
(
getGroupId
g
)
childs
))
groups
$
filter
(
\
g
->
null
(
g
^.
phylo_groupPeriodParents
)
&&
(
notElem
(
getGroupId
g
)
childs
))
groups
noHeads
=
groups
\\
heads
noHeads
=
groups
\\
heads
nbDocs
=
sum
$
elems
$
filterDocs
(
phylo
^.
phylo_timeDocs
)
[
prd
]
nbDocs
=
sum
$
elems
$
filterDocs
(
getDocsByDate
phylo
)
[
prd
]
diago
=
reduceDiagos
$
filterDiago
(
phylo
^.
phylo_timeCooc
)
[
prd
]
diago
=
reduceDiagos
$
filterDiago
(
getCoocByDate
phylo
)
[
prd
]
sim
=
(
similarity
$
getConfig
phylo
)
sim
=
(
similarity
$
getConfig
phylo
)
step
=
case
getSeaElevation
phylo
of
step
=
case
getSeaElevation
phylo
of
Constante
_
s
->
s
Constante
_
s
->
s
Adaptative
_
->
0
Adaptative
_
->
0
Evolving
_
->
0
-- in headsToAncestors nbDocs diago Similarity heads groups []
-- in headsToAncestors nbDocs diago Similarity heads groups []
in
map
(
\
ego
->
toAncestor
nbDocs
diago
sim
step
noHeads
ego
)
in
map
(
\
ego
->
toAncestor
nbDocs
diago
sim
step
noHeads
ego
)
$
headsToAncestors
nbDocs
diago
sim
step
heads
[]
$
headsToAncestors
nbDocs
diago
sim
step
heads
[]
...
@@ -671,7 +673,7 @@ toPhyloExport :: Phylo -> DotGraph DotId
...
@@ -671,7 +673,7 @@ toPhyloExport :: Phylo -> DotGraph DotId
toPhyloExport
phylo
=
exportToDot
phylo
toPhyloExport
phylo
=
exportToDot
phylo
$
processFilters
(
exportFilter
$
getConfig
phylo
)
(
phyloQuality
$
getConfig
phylo
)
$
processFilters
(
exportFilter
$
getConfig
phylo
)
(
phyloQuality
$
getConfig
phylo
)
$
processSort
(
exportSort
$
getConfig
phylo
)
(
getSeaElevation
phylo
)
$
processSort
(
exportSort
$
getConfig
phylo
)
(
getSeaElevation
phylo
)
$
processLabels
(
exportLabel
$
getConfig
phylo
)
(
getRoots
phylo
)
(
_phylo_lastTerm
Freq
phylo
)
$
processLabels
(
exportLabel
$
getConfig
phylo
)
(
getRoots
phylo
)
(
getLastRoots
Freq
phylo
)
$
processMetrics
phylo
export
$
processMetrics
phylo
export
where
where
export
::
PhyloExport
export
::
PhyloExport
...
@@ -711,7 +713,7 @@ tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map
...
@@ -711,7 +713,7 @@ tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map
tracePhyloInfo
::
Phylo
->
Phylo
tracePhyloInfo
::
Phylo
->
Phylo
tracePhyloInfo
phylo
=
trace
(
"
\n
"
<>
"##########################"
<>
"
\n\n
"
<>
"-- | Phylo with λ = "
tracePhyloInfo
phylo
=
trace
(
"
\n
"
<>
"##########################"
<>
"
\n\n
"
<>
"-- | Phylo with λ = "
<>
show
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
)
<>
" applied to "
<>
show
(
getLevel
phylo
)
<>
" applied to "
<>
show
(
length
$
Vector
.
toList
$
getRoots
phylo
)
<>
" foundations"
<>
show
(
length
$
Vector
.
toList
$
getRoots
phylo
)
<>
" foundations"
)
phylo
)
phylo
...
...
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
386ad673
...
@@ -16,7 +16,7 @@ import Control.DeepSeq (NFData)
...
@@ -16,7 +16,7 @@ import Control.DeepSeq (NFData)
import
Control.Lens
hiding
(
Level
)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Data.List
(
concat
,
nub
,
partition
,
sort
,
(
++
),
group
,
intersect
,
null
,
sortOn
,
groupBy
,
tail
)
import
Data.List
(
concat
,
nub
,
partition
,
sort
,
(
++
),
group
,
intersect
,
null
,
sortOn
,
groupBy
,
tail
)
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
,
empty
,
toList
,
elems
,
(
!
),
restrictKeys
,
foldlWithKey
,
insert
)
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
,
empty
,
toList
,
elems
,
(
!
),
restrictKeys
,
insert
)
import
Data.Set
(
Set
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
Data.Vector
(
Vector
)
import
Data.Vector
(
Vector
)
...
@@ -30,7 +30,7 @@ import Gargantext.Core.Viz.Phylo
...
@@ -30,7 +30,7 @@ import Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toHorizon
)
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toHorizon
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
import
Gargantext.Core.Viz.Phylo.PhyloTools
import
Gargantext.Core.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Gargantext.Core.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
temporalMatching
,
getNextPeriods
,
filterDocs
,
filterDiago
,
reduceDiagos
,
toSimilarity
)
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
t
oPhyloQuality
,
t
emporalMatching
,
getNextPeriods
,
filterDocs
,
filterDiago
,
reduceDiagos
,
toSimilarity
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
qualified
Data.Set
as
Set
import
qualified
Data.Set
as
Set
...
@@ -55,8 +55,7 @@ toPhylo' (PhyloBase phylo) = toPhylo
...
@@ -55,8 +55,7 @@ toPhylo' (PhyloBase phylo) = toPhylo
-- TODO an adaptative synchronic clustering with a slider
-- TODO an adaptative synchronic clustering with a slider
toPhylo
::
Phylo
->
Phylo
toPhylo
::
Phylo
->
Phylo
toPhylo
phylowithoutLink
=
trace
(
"# flatPhylo groups "
<>
show
(
length
$
getGroupsFromScale
1
flatPhylo
))
toPhylo
phylowithoutLink
=
traceToPhylo
(
phyloScale
$
getConfig
phylowithoutLink
)
$
$
traceToPhylo
(
phyloScale
$
getConfig
phylowithoutLink
)
$
if
(
phyloScale
$
getConfig
phylowithoutLink
)
>
1
if
(
phyloScale
$
getConfig
phylowithoutLink
)
>
1
then
foldl'
(
\
phylo'
_
->
synchronicClustering
phylo'
)
phyloAncestors
[
2
..
(
phyloScale
$
getConfig
phylowithoutLink
)]
then
foldl'
(
\
phylo'
_
->
synchronicClustering
phylo'
)
phyloAncestors
[
2
..
(
phyloScale
$
getConfig
phylowithoutLink
)]
else
phyloAncestors
else
phyloAncestors
...
@@ -65,11 +64,11 @@ toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGrou
...
@@ -65,11 +64,11 @@ toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGrou
phyloAncestors
::
Phylo
phyloAncestors
::
Phylo
phyloAncestors
=
phyloAncestors
=
if
(
findAncestors
$
getConfig
phylowithoutLink
)
if
(
findAncestors
$
getConfig
phylowithoutLink
)
then
toHorizon
flatPhylo
then
toHorizon
phyloWithLinks
else
flatPhylo
else
phyloWithLinks
--------------------------------------
--------------------------------------
flatPhylo
::
Phylo
phyloWithLinks
::
Phylo
flatPhylo
=
addTemporalLinksToPhylo
phylowithoutLink
phyloWithLinks
=
temporalMatching
(
getLadder
phylowithoutLink
)
phylowithoutLink
--------------------------------------
--------------------------------------
...
@@ -86,60 +85,111 @@ squareLadder ladder = List.map (\x -> x * x) ladder
...
@@ -86,60 +85,111 @@ squareLadder ladder = List.map (\x -> x * x) ladder
{-
{-
-- create an adaptative
diachronic
'sea elevation' ladder
-- create an adaptative 'sea elevation' ladder
-}
-}
adapt
Diachronic
Ladder
::
Double
->
Set
Double
->
Set
Double
->
[
Double
]
adapt
Sea
Ladder
::
Double
->
Set
Double
->
Set
Double
->
[
Double
]
adapt
Diachronic
Ladder
curr
similarities
ladder
=
adapt
Sea
Ladder
curr
similarities
ladder
=
if
curr
<=
0
||
Set
.
null
similarities
if
curr
<=
0
||
Set
.
null
similarities
then
Set
.
toList
ladder
then
Set
.
toList
ladder
else
else
let
idx
=
((
Set
.
size
similarities
)
`
div
`
(
floor
curr
))
-
1
let
idx
=
((
Set
.
size
similarities
)
`
div
`
(
floor
curr
))
-
1
thr
=
Set
.
elemAt
idx
similarities
thr
=
Set
.
elemAt
idx
similarities
-- we use a sliding methods 1/10, then 1/9, then ... 1/2
-- we use a sliding methods 1/10, then 1/9, then ... 1/2
in
adapt
Diachronic
Ladder
(
curr
-
1
)
(
Set
.
filter
(
>
thr
)
similarities
)
(
Set
.
insert
thr
ladder
)
in
adapt
Sea
Ladder
(
curr
-
1
)
(
Set
.
filter
(
>
thr
)
similarities
)
(
Set
.
insert
thr
ladder
)
{-
{-
-- create a constante
diachronic
'sea elevation' ladder
-- create a constante 'sea elevation' ladder
-}
-}
const
Diachronic
Ladder
::
Double
->
Double
->
Set
Double
->
[
Double
]
const
Sea
Ladder
::
Double
->
Double
->
Set
Double
->
[
Double
]
const
Diachronic
Ladder
curr
step
ladder
=
const
Sea
Ladder
curr
step
ladder
=
if
curr
>
1
if
curr
>
1
then
Set
.
toList
ladder
then
Set
.
toList
ladder
else
constDiachronicLadder
(
curr
+
step
)
step
(
Set
.
insert
curr
ladder
)
else
constSeaLadder
(
curr
+
step
)
step
(
Set
.
insert
curr
ladder
)
{-
{-
-- process an initial scanning of the kinship links
-- create an evolving 'sea elevation' ladder based on estimated & local quality maxima
-}
-}
scanSimilarity
::
Scale
->
Phylo
->
Phylo
evolvSeaLadder
::
Double
->
Double
->
Map
Int
Double
->
Set
Double
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
->
[
Double
]
scanSimilarity
lvl
phylo
=
evolvSeaLadder
nbFdt
lambda
freq
similarities
graph
=
map
snd
let
proximity
=
similarity
$
getConfig
phylo
$
filter
fst
scanning
=
foldlWithKey
(
\
acc
pId
pds
->
$
zip
maxima
(
map
fst
qua'
)
-- 1) process period by period
-- 3) find the corresponding measures of similarity and create the ladder
let
egos
=
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
where
$
elems
--------
$
view
(
phylo_periodScales
-- 2) find the local maxima in the quality distribution
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_scaleScale
==
lvl
)
maxima
::
[
Bool
]
.
phylo_scaleGroups
)
pds
maxima
=
[
snd
(
List
.
head
qua'
)
>
snd
(
List
.
head
$
List
.
tail
qua'
)]
++
(
findMaxima
qua'
)
++
[
snd
(
List
.
head
$
reverse
qua'
)
>
snd
(
List
.
head
$
List
.
tail
$
reverse
qua'
)]
next
=
getNextPeriods
ToParents
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
pId
(
keys
$
phylo
^.
phylo_periods
)
--------
targets
=
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
getGroupsFromScalePeriods
lvl
next
phylo
-- 1.2)
docs
=
filterDocs
(
phylo
^.
phylo_timeDocs
)
([
pId
]
++
next
)
qua'
::
[(
Double
,
Double
)]
diagos
=
filterDiago
(
phylo
^.
phylo_timeCooc
)
([
pId
]
++
next
)
qua'
=
foldl
(
\
acc
(
s
,
q
)
->
-- 2) compute the pairs in parallel
if
length
acc
==
0
pairs
=
map
(
\
(
id
,
ngrams
)
->
then
[(
s
,
q
)]
map
(
\
(
id'
,
ngrams'
)
->
else
if
(
snd
(
List
.
last
acc
))
==
q
let
nbDocs
=
(
sum
.
elems
)
$
filterDocs
docs
([
idToPrd
id
,
idToPrd
id'
])
then
acc
diago
=
reduceDiagos
$
filterDiago
diagos
([
idToPrd
id
,
idToPrd
id'
])
else
acc
++
[(
s
,
q
)]
in
((
id
,
id'
),
toSimilarity
nbDocs
diago
proximity
ngrams
ngrams'
ngrams'
)
)
[]
$
zip
(
Set
.
toList
similarities
)
qua
)
$
filter
(
\
(
_
,
ngrams'
)
->
(
not
.
null
)
$
intersect
ngrams
ngrams'
)
targets
--------
)
egos
-- 1.1) for each measure of similarity, prune the flat phylo, compute the branches and estimate the quality
pairs'
=
pairs
`
using
`
parList
rdeepseq
qua
::
[
Double
]
in
acc
++
(
concat
pairs'
)
qua
=
map
(
\
thr
->
)
[]
$
phylo
^.
phylo_periods
let
edges
=
filter
(
\
edge
->
snd
edge
>=
thr
)
graph
in
phylo
&
phylo_diaSimScan
.~
Set
.
fromList
(
traceGroupsProxi
$
map
snd
scanning
)
nodes
=
nub
$
concat
$
map
(
\
((
n
,
n'
),
_
)
->
[
n
,
n'
])
edges
branches
=
toRelatedComponents
nodes
edges
in
toPhyloQuality
nbFdt
lambda
freq
branches
)
$
(
Set
.
toList
similarities
)
{-
-- find a similarity ladder regarding the "sea elevation" strategy
-}
findSeaLadder
::
Phylo
->
Phylo
findSeaLadder
phylo
=
case
getSeaElevation
phylo
of
Constante
start
gap
->
phylo
&
phylo_seaLadder
.~
(
constSeaLadder
start
gap
Set
.
empty
)
Adaptative
steps
->
phylo
&
phylo_seaLadder
.~
(
squareLadder
$
adaptSeaLadder
steps
similarities
Set
.
empty
)
Evolving
_
->
let
ladder
=
evolvSeaLadder
(
fromIntegral
$
Vector
.
length
$
getRoots
phylo
)
(
getLevel
phylo
)
(
getRootsFreq
phylo
)
similarities
simGraph
in
phylo
&
phylo_seaLadder
.~
(
if
length
ladder
>
0
then
ladder
-- if we don't find any local maxima with the evolving strategy
else
constSeaLadder
0.1
0.1
Set
.
empty
)
where
--------
-- 2) extract the values of the kinship links
similarities
::
Set
Double
similarities
=
Set
.
fromList
$
sort
$
map
snd
simGraph
--------
-- 1) we process an initial calculation of the kinship links
-- this initial calculation is used to estimate the real sea ladder
simGraph
::
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
simGraph
=
foldl'
(
\
acc
period
->
-- 1.1) process period by period
let
sources
=
getGroupsFromScalePeriods
1
[
period
]
phylo
next
=
getNextPeriods
ToParents
3
period
(
keys
$
phylo
^.
phylo_periods
)
targets
=
getGroupsFromScalePeriods
1
next
phylo
docs
=
filterDocs
(
getDocsByDate
phylo
)
([
period
]
++
next
)
diagos
=
filterDiago
(
getCoocByDate
phylo
)
([
period
]
++
next
)
-- 1.2) compute the kinship similarities between pairs of source & target in parallel
pairs
=
map
(
\
source
->
let
candidates
=
filter
(
\
target
->
(
>
2
)
$
length
$
intersect
(
getGroupNgrams
source
)
(
getGroupNgrams
target
))
targets
in
map
(
\
target
->
let
nbDocs
=
(
sum
.
elems
)
$
filterDocs
docs
([
idToPrd
(
getGroupId
source
),
idToPrd
(
getGroupId
target
)])
diago
=
reduceDiagos
$
filterDiago
diagos
([
idToPrd
(
getGroupId
source
),
idToPrd
(
getGroupId
target
)])
in
((
source
,
target
),
toSimilarity
nbDocs
diago
(
getSimilarity
phylo
)
(
getGroupNgrams
source
)
(
getGroupNgrams
target
)
(
getGroupNgrams
target
))
)
candidates
)
sources
pairs'
=
pairs
`
using
`
parList
rdeepseq
in
acc
++
(
concat
pairs'
)
)
[]
$
keys
$
phylo
^.
phylo_periods
appendGroups
::
(
a
->
Period
->
(
Text
,
Text
)
->
Scale
->
Int
->
[
Cooc
]
->
PhyloGroup
)
->
Scale
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
::
(
a
->
Period
->
(
Text
,
Text
)
->
Scale
->
Int
->
[
Cooc
]
->
PhyloGroup
)
->
Scale
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
f
lvl
m
phylo
=
trace
(
"
\n
"
<>
"-- | Append "
<>
show
(
length
$
concat
$
elems
m
)
<>
" groups to Level "
<>
show
(
lvl
)
<>
"
\n
"
)
appendGroups
f
lvl
m
phylo
=
trace
(
"
\n
"
<>
"-- | Append "
<>
show
(
length
$
concat
$
elems
m
)
<>
" groups to Level "
<>
show
(
lvl
)
<>
"
\n
"
)
...
@@ -156,7 +206,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
...
@@ -156,7 +206,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
&
phylo_scaleGroups
.~
(
fromList
$
foldl
(
\
groups
obj
->
&
phylo_scaleGroups
.~
(
fromList
$
foldl
(
\
groups
obj
->
groups
++
[
(((
pId
,
lvl
),
length
groups
)
groups
++
[
(((
pId
,
lvl
),
length
groups
)
,
f
obj
pId
pId'
lvl
(
length
groups
)
,
f
obj
pId
pId'
lvl
(
length
groups
)
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[
pId
]))
(
elems
$
restrictKeys
(
getCoocByDate
phylo
)
$
periodsToYears
[
pId
]))
]
)
[]
phyloCUnit
)
]
)
[]
phyloCUnit
)
else
else
phyloLvl
)
phyloLvl
)
...
@@ -174,16 +224,6 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
...
@@ -174,16 +224,6 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
(
fromList
[(
"breaks"
,[
0
]),(
"seaLevels"
,[
0
])])
(
fromList
[(
"breaks"
,[
0
]),(
"seaLevels"
,[
0
])])
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
{-
-- enhance the phylo with temporal links
-}
addTemporalLinksToPhylo
::
Phylo
->
Phylo
addTemporalLinksToPhylo
phylowithoutLink
=
case
strategy
of
Constante
start
gap
->
temporalMatching
(
constDiachronicLadder
start
gap
Set
.
empty
)
phylowithoutLink
Adaptative
steps
->
temporalMatching
(
squareLadder
$
adaptDiachronicLadder
steps
(
phylowithoutLink
^.
phylo_diaSimScan
)
Set
.
empty
)
phylowithoutLink
where
strategy
::
SeaElevation
strategy
=
getSeaElevation
phylowithoutLink
-----------------------
-----------------------
-- | To Phylo Step | --
-- | To Phylo Step | --
...
@@ -203,8 +243,8 @@ indexDates' m = map (\docs ->
...
@@ -203,8 +243,8 @@ indexDates' m = map (\docs ->
-- create a map of roots and group ids
-- create a map of roots and group ids
joinRoots
ToGroups
::
Phylo
->
Phylo
joinRoots
::
Phylo
->
Phylo
joinRoots
ToGroups
phylo
=
set
(
phylo_foundations
.
foundations_rootsInGroups
)
rootsMap
phylo
joinRoots
phylo
=
set
(
phylo_foundations
.
foundations_rootsInGroups
)
rootsMap
phylo
where
where
--------------------------------------
--------------------------------------
rootsMap
::
Map
Int
[
PhyloGroupId
]
rootsMap
::
Map
Int
[
PhyloGroupId
]
...
@@ -215,16 +255,20 @@ joinRootsToGroups phylo = set (phylo_foundations . foundations_rootsInGroups) ro
...
@@ -215,16 +255,20 @@ joinRootsToGroups phylo = set (phylo_foundations . foundations_rootsInGroups) ro
$
getGroupsFromScale
1
phylo
$
getGroupsFromScale
1
phylo
maybeDefaultParams
::
Phylo
->
Phylo
maybeDefaultParams
phylo
=
if
(
defaultMode
(
getConfig
phylo
))
then
findDefaultLevel
phylo
else
phylo
-- To build the first phylo step from docs and terms
-- To build the first phylo step from docs and terms
-- QL: backend entre phyloBase et Clustering
-- QL: backend entre phyloBase et Clustering
-- tophylowithoutLink
-- tophylowithoutLink
toPhyloWithoutLink
::
[
Document
]
->
PhyloConfig
->
Phylo
toPhyloWithoutLink
::
[
Document
]
->
PhyloConfig
->
Phylo
toPhyloWithoutLink
docs
conf
=
case
(
getSeaElevation
phyloBase
)
of
toPhyloWithoutLink
docs
conf
=
joinRoots
Constante
_
_
->
joinRootsToGroups
$
findSeaLadder
$
appendGroups
clusterToGroup
1
seriesOfClustering
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
$
maybeDefaultParams
Adaptative
_
->
joinRootsToGroups
$
appendGroups
clusterToGroup
1
seriesOfClustering
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
$
scanSimilarity
1
$
appendGroups
clusterToGroup
1
seriesOfClustering
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
where
where
--------------------------------------
--------------------------------------
seriesOfClustering
::
Map
(
Date
,
Date
)
[
Clustering
]
seriesOfClustering
::
Map
(
Date
,
Date
)
[
Clustering
]
...
@@ -340,6 +384,7 @@ docsToTimeScaleCooc docs fdt =
...
@@ -340,6 +384,7 @@ docsToTimeScaleCooc docs fdt =
-----------------------
-----------------------
-- | to Phylo Base | --
-- | to Phylo Base | --
-----------------------
-----------------------
-- TODO anoe
-- TODO anoe
groupDocsByPeriodRec
::
(
NFData
doc
,
Ord
date
,
Enum
date
)
=>
(
doc
->
date
)
->
[(
date
,
date
)]
->
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
groupDocsByPeriodRec
::
(
NFData
doc
,
Ord
date
,
Enum
date
)
=>
(
doc
->
date
)
->
[(
date
,
date
)]
->
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
groupDocsByPeriodRec
f
prds
docs
acc
=
groupDocsByPeriodRec
f
prds
docs
acc
=
...
@@ -394,6 +439,14 @@ docsToTermFreq docs fdt =
...
@@ -394,6 +439,14 @@ docsToTermFreq docs fdt =
sumFreqs
=
sum
$
elems
freqs
sumFreqs
=
sum
$
elems
freqs
in
map
(
/
sumFreqs
)
freqs
in
map
(
/
sumFreqs
)
freqs
docsToTermCount
::
[
Document
]
->
Vector
Ngrams
->
Map
Int
Double
docsToTermCount
docs
roots
=
fromList
$
map
(
\
lst
->
(
head'
"docsToTermCount"
lst
,
fromIntegral
$
length
lst
))
$
group
$
sort
$
concat
$
map
(
\
d
->
nub
$
ngramsToIdx
(
text
d
)
roots
)
docs
docsToLastTermFreq
::
Int
->
[
Document
]
->
Vector
Ngrams
->
Map
Int
Double
docsToLastTermFreq
::
Int
->
[
Document
]
->
Vector
Ngrams
->
Map
Int
Double
docsToLastTermFreq
n
docs
fdt
=
docsToLastTermFreq
n
docs
fdt
=
let
last
=
take
n
$
reverse
$
sort
$
map
date
docs
let
last
=
take
n
$
reverse
$
sort
$
map
date
docs
...
@@ -420,24 +473,43 @@ initPhyloScales lvlMax pId =
...
@@ -420,24 +473,43 @@ initPhyloScales lvlMax pId =
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
PhyloScale
pId
(
""
,
""
)
lvl
empty
))
[
1
..
lvlMax
]
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
PhyloScale
pId
(
""
,
""
)
lvl
empty
))
[
1
..
lvlMax
]
setDefault
::
PhyloConfig
->
PhyloConfig
setDefault
conf
=
conf
{
phyloScale
=
2
,
similarity
=
WeightedLogJaccard
0.5
2
,
findAncestors
=
True
,
phyloSynchrony
=
ByProximityThreshold
0.6
0
SiblingBranches
MergeAllGroups
,
phyloQuality
=
Quality
0.5
3
,
timeUnit
=
Year
3
1
3
,
clique
=
MaxClique
5
30
ByNeighbours
,
exportLabel
=
[
BranchLabel
MostEmergentTfIdf
2
,
GroupLabel
MostEmergentInclusive
2
],
exportSort
=
ByHierarchy
Desc
,
exportFilter
=
[
ByBranchSize
3
]
}
-- Init the basic elements of a Phylo
-- Init the basic elements of a Phylo
--
--
initPhylo
::
[
Document
]
->
PhyloConfig
->
Phylo
initPhylo
::
[
Document
]
->
PhyloConfig
->
Phylo
initPhylo
docs
conf
=
initPhylo
docs
conf
=
let
roots
=
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
let
roots
=
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
foundations
=
PhyloFoundations
roots
empty
foundations
=
PhyloFoundations
roots
empty
docsSources
=
PhyloSources
(
Vector
.
fromList
$
nub
$
concat
$
map
sources
docs
)
docsSources
=
PhyloSources
(
Vector
.
fromList
$
nub
$
concat
$
map
sources
docs
)
params
=
defaultPhyloParam
{
_phyloParam_config
=
conf
}
docsCounts
=
PhyloCounts
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
(
docsToTimeScaleNb
docs
)
(
docsToTermCount
docs
(
foundations
^.
foundations_roots
))
(
docsToTermFreq
docs
(
foundations
^.
foundations_roots
))
(
docsToLastTermFreq
(
getTimePeriod
$
timeUnit
conf
)
docs
(
foundations
^.
foundations_roots
))
params
=
if
(
defaultMode
conf
)
then
defaultPhyloParam
{
_phyloParam_config
=
setDefault
conf
}
else
defaultPhyloParam
{
_phyloParam_config
=
conf
}
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
conf
)
(
getTimeStep
$
timeUnit
conf
)
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
conf
)
(
getTimeStep
$
timeUnit
conf
)
in
trace
(
"
\n
"
<>
"-- | Init a phylo out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
in
trace
(
"
\n
"
<>
"-- | Init a phylo out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
$
Phylo
foundations
$
Phylo
foundations
docsSources
docsSources
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
docsCounts
(
docsToTimeScaleNb
docs
)
[]
(
docsToTermFreq
docs
(
foundations
^.
foundations_roots
))
(
docsToLastTermFreq
(
getTimePeriod
$
timeUnit
conf
)
docs
(
foundations
^.
foundations_roots
))
Set
.
empty
params
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
""
,
""
)
(
initPhyloScales
1
prd
)))
periods
)
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
""
,
""
)
(
initPhyloScales
1
prd
)))
periods
)
0
0
(
_qua_granularity
$
phyloQuality
$
conf
)
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
View file @
386ad673
...
@@ -13,7 +13,7 @@ Portability : POSIX
...
@@ -13,7 +13,7 @@ Portability : POSIX
module
Gargantext.Core.Viz.Phylo.PhyloTools
where
module
Gargantext.Core.Viz.Phylo.PhyloTools
where
import
Control.Lens
hiding
(
Level
)
import
Control.Lens
hiding
(
Level
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
partition
,
tails
,
nubBy
,
group
,
notElem
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
iterate
,
transpose
,
partition
,
tails
,
nubBy
,
group
,
notElem
,
(
!!
)
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
,
restrictKeys
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
,
restrictKeys
)
import
Data.Set
(
Set
,
disjoint
)
import
Data.Set
(
Set
,
disjoint
)
import
Data.String
(
String
)
import
Data.String
(
String
)
...
@@ -28,6 +28,7 @@ import qualified Data.List as List
...
@@ -28,6 +28,7 @@ import qualified Data.List as List
import
qualified
Data.Set
as
Set
import
qualified
Data.Set
as
Set
import
qualified
Data.Text
as
Text
import
qualified
Data.Text
as
Text
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Map
as
Map
------------
------------
-- | Io | --
-- | Io | --
...
@@ -301,6 +302,9 @@ getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
...
@@ -301,6 +302,9 @@ getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
coocToDiago
::
Cooc
->
Cooc
coocToDiago
::
Cooc
->
Cooc
coocToDiago
cooc
=
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
cooc
coocToDiago
cooc
=
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
cooc
coocToAdjacency
::
Cooc
->
Cooc
coocToAdjacency
cooc
=
Map
.
map
(
\
_
->
1
)
cooc
-- | To build the local cooc matrix of each phylogroup
-- | To build the local cooc matrix of each phylogroup
ngramsToCooc
::
[
Int
]
->
[
Cooc
]
->
Cooc
ngramsToCooc
::
[
Int
]
->
[
Cooc
]
->
Cooc
ngramsToCooc
ngrams
coocs
=
ngramsToCooc
ngrams
coocs
=
...
@@ -309,6 +313,75 @@ ngramsToCooc ngrams coocs =
...
@@ -309,6 +313,75 @@ ngramsToCooc ngrams coocs =
in
filterWithKey
(
\
k
_
->
elem
k
pairs
)
cooc
in
filterWithKey
(
\
k
_
->
elem
k
pairs
)
cooc
------------------
-- | Defaults | --
------------------
-- | find the local maxima in a list of values
findMaxima
::
[(
Double
,
Double
)]
->
[
Bool
]
findMaxima
lst
=
map
(
hasMax
)
$
toChunk
3
lst
where
------
hasMax
::
[(
Double
,
Double
)]
->
Bool
hasMax
chunk
=
if
(
length
chunk
)
/=
3
then
False
else
(
snd
(
chunk
!!
0
)
<
snd
(
chunk
!!
1
))
&&
(
snd
(
chunk
!!
2
)
<
snd
(
chunk
!!
1
))
-- | split a list into chunks of size n
toChunk
::
Int
->
[
a
]
->
[[
a
]]
toChunk
n
=
takeWhile
((
==
n
)
.
length
)
.
transpose
.
take
n
.
iterate
tail
-- | To compute the average degree from a cooc matrix
-- http://networksciencebook.com/chapter/2#degree
toAverageDegree
::
Cooc
->
Vector
Ngrams
->
Double
toAverageDegree
cooc
roots
=
2
*
(
fromIntegral
$
Map
.
size
cooc
)
/
(
fromIntegral
$
Vector
.
length
roots
)
-- | Use the giant component regime to estimate the default level
-- http://networksciencebook.com/chapter/3#networks-supercritical
regimeToDefaultLevel
::
Cooc
->
Vector
Ngrams
->
Double
regimeToDefaultLevel
cooc
roots
|
avg
==
0
=
1
|
avg
<
1
=
avg
*
0.6
|
avg
==
1
=
0.6
|
avg
<
lnN
=
(
avg
*
0.2
)
/
lnN
|
otherwise
=
0.2
where
avg
::
Double
avg
=
toAverageDegree
cooc
roots
lnN
::
Double
lnN
=
log
(
fromIntegral
$
Vector
.
length
roots
)
coocToConfidence
::
Phylo
->
Cooc
coocToConfidence
phylo
=
let
count
=
getRootsCount
phylo
cooc
=
foldl
(
\
acc
cooc'
->
sumCooc
acc
cooc'
)
empty
$
elems
$
getCoocByDate
phylo
in
Map
.
mapWithKey
(
\
(
a
,
b
)
w
->
confidence
a
b
w
count
)
cooc
where
----
-- confidence
confidence
::
Int
->
Int
->
Double
->
Map
Int
Double
->
Double
confidence
a
b
inter
card
=
maximum
[(
inter
/
card
!
a
),(
inter
/
card
!
b
)]
sumtest
::
[
Int
]
->
[
Int
]
->
Int
sumtest
l1
l2
=
(
head'
"test"
l1
)
+
(
head'
"test"
$
reverse
l2
)
findDefaultLevel
::
Phylo
->
Phylo
findDefaultLevel
phylo
=
let
confidence
=
Map
.
filterWithKey
(
\
(
a
,
b
)
_
->
a
/=
b
)
$
Map
.
filter
(
>
0.01
)
$
coocToConfidence
phylo
roots
=
getRoots
phylo
level
=
regimeToDefaultLevel
confidence
roots
in
updateLevel
level
phylo
--------------------
--------------------
-- | PhyloGroup | --
-- | PhyloGroup | --
--------------------
--------------------
...
@@ -401,21 +474,46 @@ getScales phylo = nub
...
@@ -401,21 +474,46 @@ getScales phylo = nub
getSeaElevation
::
Phylo
->
SeaElevation
getSeaElevation
::
Phylo
->
SeaElevation
getSeaElevation
phylo
=
seaElevation
(
getConfig
phylo
)
getSeaElevation
phylo
=
seaElevation
(
getConfig
phylo
)
getSimilarity
::
Phylo
->
Similarity
getSimilarity
phylo
=
similarity
(
getConfig
phylo
)
getPhyloSeaRiseStart
::
Phylo
->
Double
getPhyloSeaRiseStart
::
Phylo
->
Double
getPhyloSeaRiseStart
phylo
=
case
(
getSeaElevation
phylo
)
of
getPhyloSeaRiseStart
phylo
=
case
(
getSeaElevation
phylo
)
of
Constante
s
_
->
s
Constante
s
_
->
s
Adaptative
_
->
0
Adaptative
_
->
0
Evolving
_
->
0
getPhyloSeaRiseSteps
::
Phylo
->
Double
getPhyloSeaRiseSteps
::
Phylo
->
Double
getPhyloSeaRiseSteps
phylo
=
case
(
getSeaElevation
phylo
)
of
getPhyloSeaRiseSteps
phylo
=
case
(
getSeaElevation
phylo
)
of
Constante
_
s
->
s
Constante
_
s
->
s
Adaptative
s
->
s
Adaptative
s
->
s
Evolving
_
->
0.1
getConfig
::
Phylo
->
PhyloConfig
getConfig
::
Phylo
->
PhyloConfig
getConfig
phylo
=
(
phylo
^.
phylo_param
)
^.
phyloParam_config
getConfig
phylo
=
(
phylo
^.
phylo_param
)
^.
phyloParam_config
getLevel
::
Phylo
->
Double
getLevel
phylo
=
_phylo_level
phylo
getLadder
::
Phylo
->
[
Double
]
getLadder
phylo
=
phylo
^.
phylo_seaLadder
getCoocByDate
::
Phylo
->
Map
Date
Cooc
getCoocByDate
phylo
=
coocByDate
(
phylo
^.
phylo_counts
)
getDocsByDate
::
Phylo
->
Map
Date
Double
getDocsByDate
phylo
=
docsByDate
(
phylo
^.
phylo_counts
)
getRootsCount
::
Phylo
->
Map
Int
Double
getRootsCount
phylo
=
rootsCount
(
phylo
^.
phylo_counts
)
getRootsFreq
::
Phylo
->
Map
Int
Double
getRootsFreq
phylo
=
rootsFreq
(
phylo
^.
phylo_counts
)
getLastRootsFreq
::
Phylo
->
Map
Int
Double
getLastRootsFreq
phylo
=
lastRootsFreq
(
phylo
^.
phylo_counts
)
setConfig
::
PhyloConfig
->
Phylo
->
Phylo
setConfig
::
PhyloConfig
->
Phylo
->
Phylo
setConfig
config
phylo
=
phylo
setConfig
config
phylo
=
phylo
...
@@ -501,8 +599,10 @@ updatePeriods periods' phylo =
...
@@ -501,8 +599,10 @@ updatePeriods periods' phylo =
)
phylo
)
phylo
updateQuality
::
Double
->
Phylo
->
Phylo
updateQuality
::
Double
->
Phylo
->
Phylo
updateQuality
quality
phylo
=
phylo
{
_phylo_quality
=
quality
}
updateQuality
quality
phylo
=
phylo
{
_phylo_quality
=
quality
}
updateLevel
::
Double
->
Phylo
->
Phylo
updateLevel
level
phylo
=
phylo
{
_phylo_level
=
level
}
traceToPhylo
::
Scale
->
Phylo
->
Phylo
traceToPhylo
::
Scale
->
Phylo
->
Phylo
traceToPhylo
lvl
phylo
=
traceToPhylo
lvl
phylo
=
...
...
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
View file @
386ad673
...
@@ -76,7 +76,7 @@ toNextScale phylo groups =
...
@@ -76,7 +76,7 @@ toNextScale phylo groups =
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
foldlWithKey
(
\
acc
id
groups'
->
$
foldlWithKey
(
\
acc
id
groups'
->
-- 4) create the parent group
-- 4) create the parent group
let
parent
=
mergeGroups
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[(
fst
.
fst
)
id
])
id
oldGroups
groups'
let
parent
=
mergeGroups
(
elems
$
restrictKeys
(
getCoocByDate
phylo
)
$
periodsToYears
[(
fst
.
fst
)
id
])
id
oldGroups
groups'
in
acc
++
[
parent
])
[]
in
acc
++
[
parent
])
[]
-- 3) group the current groups by parentId
-- 3) group the current groups by parentId
$
fromListWith
(
++
)
$
map
(
\
g
->
(
getLevelParentId
g
,
[
g
]))
groups
$
fromListWith
(
++
)
$
map
(
\
g
->
(
getLevelParentId
g
,
[
g
]))
groups
...
@@ -199,8 +199,8 @@ synchronicClustering :: Phylo -> Phylo
...
@@ -199,8 +199,8 @@ synchronicClustering :: Phylo -> Phylo
synchronicClustering
phylo
=
synchronicClustering
phylo
=
let
prox
=
similarity
$
getConfig
phylo
let
prox
=
similarity
$
getConfig
phylo
sync
=
phyloSynchrony
$
getConfig
phylo
sync
=
phyloSynchrony
$
getConfig
phylo
docs
=
phylo
^.
phylo_timeDocs
docs
=
getDocsByDate
phylo
diagos
=
map
coocToDiago
$
phylo
^.
phylo_timeCooc
diagos
=
map
coocToDiago
$
getCoocByDate
phylo
newBranches
=
map
(
\
branch
->
reduceGroups
prox
sync
docs
diagos
branch
)
newBranches
=
map
(
\
branch
->
reduceGroups
prox
sync
docs
diagos
branch
)
$
map
processDynamics
$
map
processDynamics
$
chooseClusteringStrategy
sync
$
chooseClusteringStrategy
sync
...
...
src/Gargantext/Core/Viz/Phylo/TemporalMatching.hs
View file @
386ad673
...
@@ -56,7 +56,7 @@ sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + 1/ tan (s * pi
...
@@ -56,7 +56,7 @@ sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + 1/ tan (s * pi
-- process the sumLog
-- process the sumLog
-}
-}
sumLog'
::
Double
->
Double
->
[
Double
]
->
Double
sumLog'
::
Double
->
Double
->
[
Double
]
->
Double
sumLog'
s
nb
diago
=
foldl
(
\
mem
occ
->
mem
+
(
log
(
occ
+
1
/
tan
(
s
*
pi
/
2
))
/
log
(
nb
+
1
/
tan
(
s
*
pi
/
2
))))
0
diago
sumLog'
s
nb
diago
=
foldl
(
\
mem
occ
->
mem
+
(
log
(
occ
+
1
/
tan
(
s
*
pi
/
2
))
/
log
(
nb
+
1
/
tan
(
s
*
pi
/
2
))))
0
diago
{-
{-
...
@@ -695,14 +695,14 @@ temporalMatching ladder phylo = updatePhyloGroups 1
...
@@ -695,14 +695,14 @@ temporalMatching ladder phylo = updatePhyloGroups 1
sea
::
([(
Branch
,
ShouldTry
)],
FinalQuality
)
sea
::
([(
Branch
,
ShouldTry
)],
FinalQuality
)
sea
=
seaLevelRise
(
fromIntegral
$
Vector
.
length
$
getRoots
phylo
)
sea
=
seaLevelRise
(
fromIntegral
$
Vector
.
length
$
getRoots
phylo
)
(
similarity
$
getConfig
phylo
)
(
similarity
$
getConfig
phylo
)
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
)
(
getLevel
phylo
)
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)
(
phylo
^.
phylo_termFreq
)
(
getRootsFreq
phylo
)
ladder
1
ladder
1
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
getPeriodIds
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
getDocsByDate
phylo
)
(
phylo
^.
phylo_timeCooc
)
(
getCoocByDate
phylo
)
((
phylo
^.
phylo_foundations
)
^.
foundations_rootsInGroups
)
((
phylo
^.
phylo_foundations
)
^.
foundations_rootsInGroups
)
(
reverse
$
sortOn
(
length
.
fst
)
seabed
)
(
reverse
$
sortOn
(
length
.
fst
)
seabed
)
...
@@ -714,7 +714,7 @@ temporalMatching ladder phylo = updatePhyloGroups 1
...
@@ -714,7 +714,7 @@ temporalMatching ladder phylo = updatePhyloGroups 1
(
getPeriodIds
phylo
)
(
getPeriodIds
phylo
)
(
similarity
$
getConfig
phylo
)
(
similarity
$
getConfig
phylo
)
(
List
.
head
ladder
)
(
List
.
head
ladder
)
(
phylo
^.
phylo_timeDocs
)
(
getDocsByDate
phylo
)
(
phylo
^.
phylo_timeCooc
)
(
getCoocByDate
phylo
)
((
phylo
^.
phylo_foundations
)
^.
foundations_rootsInGroups
)
((
phylo
^.
phylo_foundations
)
^.
foundations_rootsInGroups
)
(
traceTemporalMatching
$
getGroupsFromScale
1
phylo
)
(
traceTemporalMatching
$
getGroupsFromScale
1
phylo
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment