Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
386ad673
Commit
386ad673
authored
Mar 03, 2023
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
1 click phylo v1 is ok
parent
968d52b3
Pipeline
#3719
failed with stage
in 72 minutes
Changes
8
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
295 additions
and
114 deletions
+295
-114
Main.hs
bin/gargantext-phylo/Main.hs
+1
-0
Phylo.hs
src/Gargantext/Core/Viz/Phylo.hs
+24
-11
Example.hs
src/Gargantext/Core/Viz/Phylo/Example.hs
+4
-11
PhyloExport.hs
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
+10
-8
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+143
-71
PhyloTools.hs
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
+103
-3
SynchronicClustering.hs
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
+3
-3
TemporalMatching.hs
src/Gargantext/Core/Viz/Phylo/TemporalMatching.hs
+7
-7
No files found.
bin/gargantext-phylo/Main.hs
View file @
386ad673
...
...
@@ -149,6 +149,7 @@ seaToLabel :: PhyloConfig -> [Char]
seaToLabel
config
=
case
(
seaElevation
config
)
of
Constante
start
step
->
(
"sea_cst_"
<>
(
show
start
)
<>
"_"
<>
(
show
step
))
Adaptative
granularity
->
(
"sea_adapt"
<>
(
show
granularity
))
Evolving
_
->
(
"sea_evolv"
)
sensToLabel
::
PhyloConfig
->
[
Char
]
...
...
src/Gargantext/Core/Viz/Phylo.hs
View file @
386ad673
...
...
@@ -31,7 +31,6 @@ import Control.Lens (makeLenses)
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Map
(
Map
)
import
Data.Set
(
Set
)
import
Data.Swagger
import
Data.Text
(
Text
,
pack
)
import
Data.Vector
(
Vector
)
...
...
@@ -66,6 +65,8 @@ data SeaElevation =
,
_cons_gap
::
Double
}
|
Adaptative
{
_adap_steps
::
Double
}
|
Evolving
{
_evol_neighborhood
::
Bool
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
SeaElevation
...
...
@@ -180,6 +181,7 @@ data PhyloConfig =
,
phyloScale
::
Int
,
similarity
::
Similarity
,
seaElevation
::
SeaElevation
,
defaultMode
::
Bool
,
findAncestors
::
Bool
,
phyloSynchrony
::
Synchrony
,
phyloQuality
::
Quality
...
...
@@ -224,6 +226,7 @@ defaultConfig =
,
phyloScale
=
2
,
similarity
=
WeightedLogJaccard
0.5
1
,
seaElevation
=
Constante
0.1
0.1
,
defaultMode
=
True
,
findAncestors
=
False
,
phyloSynchrony
=
ByProximityThreshold
0.5
0
AllBranches
MergeAllGroups
,
phyloQuality
=
Quality
0.5
1
...
...
@@ -365,14 +368,21 @@ data PhyloFoundations = PhyloFoundations
,
_foundations_rootsInGroups
::
Map
Int
[
PhyloGroupId
]
-- map of roots associated to groups
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
PhyloFoundations
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_foundations_"
)
data
PhyloCounts
=
PhyloCounts
{
coocByDate
::
!
(
Map
Date
Cooc
)
,
docsByDate
::
!
(
Map
Date
Double
)
,
rootsCount
::
!
(
Map
Int
Double
)
,
rootsFreq
::
!
(
Map
Int
Double
)
,
lastRootsFreq
::
!
(
Map
Int
Double
)
}
deriving
(
Generic
,
Show
,
Eq
)
data
PhyloSources
=
PhyloSources
{
_sources
::
!
(
Vector
Text
)
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
PhyloFoundations
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_foundations_"
)
instance
ToSchema
PhyloCounts
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_"
)
instance
ToSchema
PhyloSources
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_"
)
...
...
@@ -396,6 +406,8 @@ type Period = (Date,Date)
type
PeriodStr
=
(
DateStr
,
DateStr
)
-- | Phylo datatype of a phylomemy
-- foundations : the foundations of the phylo
-- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
...
...
@@ -405,14 +417,12 @@ type PeriodStr = (DateStr,DateStr)
data
Phylo
=
Phylo
{
_phylo_foundations
::
PhyloFoundations
,
_phylo_sources
::
PhyloSources
,
_phylo_timeCooc
::
!
(
Map
Date
Cooc
)
,
_phylo_timeDocs
::
!
(
Map
Date
Double
)
,
_phylo_termFreq
::
!
(
Map
Int
Double
)
,
_phylo_lastTermFreq
::
!
(
Map
Int
Double
)
,
_phylo_diaSimScan
::
Set
Double
,
_phylo_counts
::
PhyloCounts
,
_phylo_seaLadder
::
[
Double
]
,
_phylo_param
::
PhyloParam
,
_phylo_periods
::
Map
Period
PhyloPeriod
,
_phylo_quality
::
Double
,
_phylo_level
::
Double
}
deriving
(
Generic
,
Show
,
Eq
)
...
...
@@ -620,6 +630,9 @@ instance ToJSON PhyloSources
instance
FromJSON
PhyloParam
instance
ToJSON
PhyloParam
instance
FromJSON
PhyloCounts
instance
ToJSON
PhyloCounts
instance
FromJSON
PhyloPeriod
instance
ToJSON
PhyloPeriod
...
...
src/Gargantext/Core/Viz/Phylo/Example.hs
View file @
386ad673
...
...
@@ -30,7 +30,6 @@ import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
temporalMatching
)
import
Gargantext.Prelude
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Set
as
Set
import
qualified
Data.Map
as
Map
---------------------------------
...
...
@@ -55,15 +54,11 @@ phyloCleopatre = synchronicClustering $ toHorizon flatPhylo
-----------------------------------------------
flatPhylo
::
Phylo
flatPhylo
=
case
(
getSeaElevation
emptyPhylo
)
of
Constante
s
g
->
temporalMatching
(
constDiachronicLadder
s
g
Set
.
empty
)
$
scanSimilarity
1
$
appendGroups
clusterToGroup
1
seriesOfClustering
emptyPhylo
Adaptative
s
->
temporalMatching
(
adaptDiachronicLadder
s
(
emptyPhylo'
^.
phylo_diaSimScan
)
Set
.
empty
)
emptyPhylo'
flatPhylo
=
temporalMatching
(
getLadder
emptyPhylo'
)
emptyPhylo'
emptyPhylo'
::
Phylo
emptyPhylo'
=
scanSimilarity
1
$
joinRootsToGroups
emptyPhylo'
=
joinRoots
$
findSeaLadder
$
appendGroups
clusterToGroup
1
seriesOfClustering
emptyPhylo
---------------------------------------------
...
...
@@ -83,11 +78,9 @@ docsByPeriods = groupDocsByPeriod date periods docs
-- | STEP 1 | -- Init the Phylo
---------------------------------
emptyPhylo
::
Phylo
emptyPhylo
=
initPhylo
docs
config
phyloCooc
::
Map
Date
Cooc
phyloCooc
=
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
)
...
...
@@ -106,7 +99,7 @@ config :: PhyloConfig
config
=
defaultConfig
{
phyloName
=
"Cesar et Cleopatre"
,
phyloScale
=
2
,
seaElevation
=
Adaptative
4
,
seaElevation
=
Evolving
True
,
exportFilter
=
[
ByBranchSize
0
]
,
clique
=
MaxClique
0
15
ByNeighbours
}
...
...
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
View file @
386ad673
...
...
@@ -214,13 +214,13 @@ exportToDot phylo export =
{-- home made attributes -}
<>
[(
toAttr
(
fromStrict
"phyloFoundations"
)
$
pack
$
show
(
length
$
Vector
.
toList
$
getRoots
phylo
))
,(
toAttr
(
fromStrict
"phyloTerms"
)
$
pack
$
show
(
length
$
nub
$
concat
$
map
(
\
g
->
g
^.
phylo_groupNgrams
)
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"phyloDocs"
)
$
pack
$
show
(
sum
$
elems
$
phylo
^.
phylo_timeDocs
))
,(
toAttr
(
fromStrict
"phyloDocs"
)
$
pack
$
show
(
sum
$
elems
$
getDocsByDate
phylo
))
,(
toAttr
(
fromStrict
"phyloPeriods"
)
$
pack
$
show
(
length
$
elems
$
phylo
^.
phylo_periods
))
,(
toAttr
(
fromStrict
"phyloBranches"
)
$
pack
$
show
(
length
$
export
^.
export_branches
))
,(
toAttr
(
fromStrict
"phyloGroups"
)
$
pack
$
show
(
length
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"phyloSources"
)
$
pack
$
show
(
Vector
.
toList
$
getSources
phylo
))
,(
toAttr
(
fromStrict
"phyloTimeScale"
)
$
pack
$
getTimeScale
phylo
)
,(
toAttr
(
fromStrict
"PhyloScale"
)
$
pack
$
show
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"PhyloScale"
)
$
pack
$
show
(
getLevel
phylo
))
,(
toAttr
(
fromStrict
"phyloQuality"
)
$
pack
$
show
(
phylo
^.
phylo_quality
))
,(
toAttr
(
fromStrict
"phyloSeaRiseStart"
)
$
pack
$
show
(
getPhyloSeaRiseStart
phylo
))
,(
toAttr
(
fromStrict
"phyloSeaRiseSteps"
)
$
pack
$
show
(
getPhyloSeaRiseSteps
phylo
))
...
...
@@ -376,6 +376,7 @@ processSort sort' elev export = case sort' of
ByHierarchy
_
->
case
elev
of
Constante
s
s'
->
export
&
export_branches
.~
(
branchToIso'
s
s'
$
sortByHierarchy
0
(
export
^.
export_branches
))
Adaptative
_
->
export
&
export_branches
.~
(
branchToIso
$
sortByHierarchy
0
(
export
^.
export_branches
))
Evolving
_
->
export
&
export_branches
.~
(
branchToIso
$
sortByHierarchy
0
(
export
^.
export_branches
))
-----------------
-- | Metrics | --
...
...
@@ -416,7 +417,7 @@ ngramsMetrics phylo export =
&
phylo_groupMeta
%~
insert
"inclusion"
(
map
(
\
n
->
inclusion
(
g
^.
phylo_groupCooc
)
((
g
^.
phylo_groupNgrams
)
\\
[
n
])
n
)
$
g
^.
phylo_groupNgrams
)
&
phylo_groupMeta
%~
insert
"frequence"
(
map
(
\
n
->
getInMap
n
(
phylo
^.
phylo_lastTermFreq
))
$
g
^.
phylo_groupNgrams
)
(
map
(
\
n
->
getInMap
n
(
getLastRootsFreq
phylo
))
$
g
^.
phylo_groupNgrams
)
)
export
...
...
@@ -643,12 +644,13 @@ toHorizon phylo =
heads
=
filter
(
\
g
->
(
not
.
null
)
$
(
g
^.
phylo_groupPeriodChilds
))
$
filter
(
\
g
->
null
(
g
^.
phylo_groupPeriodParents
)
&&
(
notElem
(
getGroupId
g
)
childs
))
groups
noHeads
=
groups
\\
heads
nbDocs
=
sum
$
elems
$
filterDocs
(
phylo
^.
phylo_timeDocs
)
[
prd
]
diago
=
reduceDiagos
$
filterDiago
(
phylo
^.
phylo_timeCooc
)
[
prd
]
nbDocs
=
sum
$
elems
$
filterDocs
(
getDocsByDate
phylo
)
[
prd
]
diago
=
reduceDiagos
$
filterDiago
(
getCoocByDate
phylo
)
[
prd
]
sim
=
(
similarity
$
getConfig
phylo
)
step
=
case
getSeaElevation
phylo
of
Constante
_
s
->
s
Adaptative
_
->
0
Evolving
_
->
0
-- in headsToAncestors nbDocs diago Similarity heads groups []
in
map
(
\
ego
->
toAncestor
nbDocs
diago
sim
step
noHeads
ego
)
$
headsToAncestors
nbDocs
diago
sim
step
heads
[]
...
...
@@ -671,7 +673,7 @@ toPhyloExport :: Phylo -> DotGraph DotId
toPhyloExport
phylo
=
exportToDot
phylo
$
processFilters
(
exportFilter
$
getConfig
phylo
)
(
phyloQuality
$
getConfig
phylo
)
$
processSort
(
exportSort
$
getConfig
phylo
)
(
getSeaElevation
phylo
)
$
processLabels
(
exportLabel
$
getConfig
phylo
)
(
getRoots
phylo
)
(
_phylo_lastTerm
Freq
phylo
)
$
processLabels
(
exportLabel
$
getConfig
phylo
)
(
getRoots
phylo
)
(
getLastRoots
Freq
phylo
)
$
processMetrics
phylo
export
where
export
::
PhyloExport
...
...
@@ -711,7 +713,7 @@ tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map
tracePhyloInfo
::
Phylo
->
Phylo
tracePhyloInfo
phylo
=
trace
(
"
\n
"
<>
"##########################"
<>
"
\n\n
"
<>
"-- | Phylo with λ = "
<>
show
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
)
<>
" applied to "
<>
show
(
getLevel
phylo
)
<>
" applied to "
<>
show
(
length
$
Vector
.
toList
$
getRoots
phylo
)
<>
" foundations"
)
phylo
...
...
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
386ad673
...
...
@@ -16,7 +16,7 @@ import Control.DeepSeq (NFData)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Data.List
(
concat
,
nub
,
partition
,
sort
,
(
++
),
group
,
intersect
,
null
,
sortOn
,
groupBy
,
tail
)
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
,
empty
,
toList
,
elems
,
(
!
),
restrictKeys
,
foldlWithKey
,
insert
)
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
,
empty
,
toList
,
elems
,
(
!
),
restrictKeys
,
insert
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
)
import
Data.Vector
(
Vector
)
...
...
@@ -30,7 +30,7 @@ import Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toHorizon
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
import
Gargantext.Core.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
temporalMatching
,
getNextPeriods
,
filterDocs
,
filterDiago
,
reduceDiagos
,
toSimilarity
)
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
t
oPhyloQuality
,
t
emporalMatching
,
getNextPeriods
,
filterDocs
,
filterDiago
,
reduceDiagos
,
toSimilarity
)
import
Gargantext.Prelude
import
qualified
Data.Set
as
Set
...
...
@@ -55,8 +55,7 @@ toPhylo' (PhyloBase phylo) = toPhylo
-- TODO an adaptative synchronic clustering with a slider
toPhylo
::
Phylo
->
Phylo
toPhylo
phylowithoutLink
=
trace
(
"# flatPhylo groups "
<>
show
(
length
$
getGroupsFromScale
1
flatPhylo
))
$
traceToPhylo
(
phyloScale
$
getConfig
phylowithoutLink
)
$
toPhylo
phylowithoutLink
=
traceToPhylo
(
phyloScale
$
getConfig
phylowithoutLink
)
$
if
(
phyloScale
$
getConfig
phylowithoutLink
)
>
1
then
foldl'
(
\
phylo'
_
->
synchronicClustering
phylo'
)
phyloAncestors
[
2
..
(
phyloScale
$
getConfig
phylowithoutLink
)]
else
phyloAncestors
...
...
@@ -65,11 +64,11 @@ toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGrou
phyloAncestors
::
Phylo
phyloAncestors
=
if
(
findAncestors
$
getConfig
phylowithoutLink
)
then
toHorizon
flatPhylo
else
flatPhylo
then
toHorizon
phyloWithLinks
else
phyloWithLinks
--------------------------------------
flatPhylo
::
Phylo
flatPhylo
=
addTemporalLinksToPhylo
phylowithoutLink
phyloWithLinks
::
Phylo
phyloWithLinks
=
temporalMatching
(
getLadder
phylowithoutLink
)
phylowithoutLink
--------------------------------------
...
...
@@ -86,60 +85,111 @@ squareLadder ladder = List.map (\x -> x * x) ladder
{-
-- create an adaptative
diachronic
'sea elevation' ladder
-- create an adaptative 'sea elevation' ladder
-}
adapt
Diachronic
Ladder
::
Double
->
Set
Double
->
Set
Double
->
[
Double
]
adapt
Diachronic
Ladder
curr
similarities
ladder
=
adapt
Sea
Ladder
::
Double
->
Set
Double
->
Set
Double
->
[
Double
]
adapt
Sea
Ladder
curr
similarities
ladder
=
if
curr
<=
0
||
Set
.
null
similarities
then
Set
.
toList
ladder
else
let
idx
=
((
Set
.
size
similarities
)
`
div
`
(
floor
curr
))
-
1
thr
=
Set
.
elemAt
idx
similarities
-- we use a sliding methods 1/10, then 1/9, then ... 1/2
in
adapt
Diachronic
Ladder
(
curr
-
1
)
(
Set
.
filter
(
>
thr
)
similarities
)
(
Set
.
insert
thr
ladder
)
in
adapt
Sea
Ladder
(
curr
-
1
)
(
Set
.
filter
(
>
thr
)
similarities
)
(
Set
.
insert
thr
ladder
)
{-
-- create a constante
diachronic
'sea elevation' ladder
-- create a constante 'sea elevation' ladder
-}
const
Diachronic
Ladder
::
Double
->
Double
->
Set
Double
->
[
Double
]
const
Diachronic
Ladder
curr
step
ladder
=
const
Sea
Ladder
::
Double
->
Double
->
Set
Double
->
[
Double
]
const
Sea
Ladder
curr
step
ladder
=
if
curr
>
1
then
Set
.
toList
ladder
else
constDiachronicLadder
(
curr
+
step
)
step
(
Set
.
insert
curr
ladder
)
else
constSeaLadder
(
curr
+
step
)
step
(
Set
.
insert
curr
ladder
)
{-
--
process an initial scanning of the kinship links
--
create an evolving 'sea elevation' ladder based on estimated & local quality maxima
-}
scanSimilarity
::
Scale
->
Phylo
->
Phylo
scanSimilarity
lvl
phylo
=
let
proximity
=
similarity
$
getConfig
phylo
scanning
=
foldlWithKey
(
\
acc
pId
pds
->
-- 1) process period by period
let
egos
=
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
elems
$
view
(
phylo_periodScales
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_scaleScale
==
lvl
)
.
phylo_scaleGroups
)
pds
next
=
getNextPeriods
ToParents
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
pId
(
keys
$
phylo
^.
phylo_periods
)
targets
=
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
getGroupsFromScalePeriods
lvl
next
phylo
docs
=
filterDocs
(
phylo
^.
phylo_timeDocs
)
([
pId
]
++
next
)
diagos
=
filterDiago
(
phylo
^.
phylo_timeCooc
)
([
pId
]
++
next
)
-- 2) compute the pairs in parallel
pairs
=
map
(
\
(
id
,
ngrams
)
->
map
(
\
(
id'
,
ngrams'
)
->
let
nbDocs
=
(
sum
.
elems
)
$
filterDocs
docs
([
idToPrd
id
,
idToPrd
id'
])
diago
=
reduceDiagos
$
filterDiago
diagos
([
idToPrd
id
,
idToPrd
id'
])
in
((
id
,
id'
),
toSimilarity
nbDocs
diago
proximity
ngrams
ngrams'
ngrams'
)
)
$
filter
(
\
(
_
,
ngrams'
)
->
(
not
.
null
)
$
intersect
ngrams
ngrams'
)
targets
)
egos
pairs'
=
pairs
`
using
`
parList
rdeepseq
in
acc
++
(
concat
pairs'
)
)
[]
$
phylo
^.
phylo_periods
in
phylo
&
phylo_diaSimScan
.~
Set
.
fromList
(
traceGroupsProxi
$
map
snd
scanning
)
evolvSeaLadder
::
Double
->
Double
->
Map
Int
Double
->
Set
Double
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
->
[
Double
]
evolvSeaLadder
nbFdt
lambda
freq
similarities
graph
=
map
snd
$
filter
fst
$
zip
maxima
(
map
fst
qua'
)
-- 3) find the corresponding measures of similarity and create the ladder
where
--------
-- 2) find the local maxima in the quality distribution
maxima
::
[
Bool
]
maxima
=
[
snd
(
List
.
head
qua'
)
>
snd
(
List
.
head
$
List
.
tail
qua'
)]
++
(
findMaxima
qua'
)
++
[
snd
(
List
.
head
$
reverse
qua'
)
>
snd
(
List
.
head
$
List
.
tail
$
reverse
qua'
)]
--------
-- 1.2)
qua'
::
[(
Double
,
Double
)]
qua'
=
foldl
(
\
acc
(
s
,
q
)
->
if
length
acc
==
0
then
[(
s
,
q
)]
else
if
(
snd
(
List
.
last
acc
))
==
q
then
acc
else
acc
++
[(
s
,
q
)]
)
[]
$
zip
(
Set
.
toList
similarities
)
qua
--------
-- 1.1) for each measure of similarity, prune the flat phylo, compute the branches and estimate the quality
qua
::
[
Double
]
qua
=
map
(
\
thr
->
let
edges
=
filter
(
\
edge
->
snd
edge
>=
thr
)
graph
nodes
=
nub
$
concat
$
map
(
\
((
n
,
n'
),
_
)
->
[
n
,
n'
])
edges
branches
=
toRelatedComponents
nodes
edges
in
toPhyloQuality
nbFdt
lambda
freq
branches
)
$
(
Set
.
toList
similarities
)
{-
-- find a similarity ladder regarding the "sea elevation" strategy
-}
findSeaLadder
::
Phylo
->
Phylo
findSeaLadder
phylo
=
case
getSeaElevation
phylo
of
Constante
start
gap
->
phylo
&
phylo_seaLadder
.~
(
constSeaLadder
start
gap
Set
.
empty
)
Adaptative
steps
->
phylo
&
phylo_seaLadder
.~
(
squareLadder
$
adaptSeaLadder
steps
similarities
Set
.
empty
)
Evolving
_
->
let
ladder
=
evolvSeaLadder
(
fromIntegral
$
Vector
.
length
$
getRoots
phylo
)
(
getLevel
phylo
)
(
getRootsFreq
phylo
)
similarities
simGraph
in
phylo
&
phylo_seaLadder
.~
(
if
length
ladder
>
0
then
ladder
-- if we don't find any local maxima with the evolving strategy
else
constSeaLadder
0.1
0.1
Set
.
empty
)
where
--------
-- 2) extract the values of the kinship links
similarities
::
Set
Double
similarities
=
Set
.
fromList
$
sort
$
map
snd
simGraph
--------
-- 1) we process an initial calculation of the kinship links
-- this initial calculation is used to estimate the real sea ladder
simGraph
::
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
simGraph
=
foldl'
(
\
acc
period
->
-- 1.1) process period by period
let
sources
=
getGroupsFromScalePeriods
1
[
period
]
phylo
next
=
getNextPeriods
ToParents
3
period
(
keys
$
phylo
^.
phylo_periods
)
targets
=
getGroupsFromScalePeriods
1
next
phylo
docs
=
filterDocs
(
getDocsByDate
phylo
)
([
period
]
++
next
)
diagos
=
filterDiago
(
getCoocByDate
phylo
)
([
period
]
++
next
)
-- 1.2) compute the kinship similarities between pairs of source & target in parallel
pairs
=
map
(
\
source
->
let
candidates
=
filter
(
\
target
->
(
>
2
)
$
length
$
intersect
(
getGroupNgrams
source
)
(
getGroupNgrams
target
))
targets
in
map
(
\
target
->
let
nbDocs
=
(
sum
.
elems
)
$
filterDocs
docs
([
idToPrd
(
getGroupId
source
),
idToPrd
(
getGroupId
target
)])
diago
=
reduceDiagos
$
filterDiago
diagos
([
idToPrd
(
getGroupId
source
),
idToPrd
(
getGroupId
target
)])
in
((
source
,
target
),
toSimilarity
nbDocs
diago
(
getSimilarity
phylo
)
(
getGroupNgrams
source
)
(
getGroupNgrams
target
)
(
getGroupNgrams
target
))
)
candidates
)
sources
pairs'
=
pairs
`
using
`
parList
rdeepseq
in
acc
++
(
concat
pairs'
)
)
[]
$
keys
$
phylo
^.
phylo_periods
appendGroups
::
(
a
->
Period
->
(
Text
,
Text
)
->
Scale
->
Int
->
[
Cooc
]
->
PhyloGroup
)
->
Scale
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
f
lvl
m
phylo
=
trace
(
"
\n
"
<>
"-- | Append "
<>
show
(
length
$
concat
$
elems
m
)
<>
" groups to Level "
<>
show
(
lvl
)
<>
"
\n
"
)
...
...
@@ -156,7 +206,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
&
phylo_scaleGroups
.~
(
fromList
$
foldl
(
\
groups
obj
->
groups
++
[
(((
pId
,
lvl
),
length
groups
)
,
f
obj
pId
pId'
lvl
(
length
groups
)
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[
pId
]))
(
elems
$
restrictKeys
(
getCoocByDate
phylo
)
$
periodsToYears
[
pId
]))
]
)
[]
phyloCUnit
)
else
phyloLvl
)
...
...
@@ -174,16 +224,6 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
(
fromList
[(
"breaks"
,[
0
]),(
"seaLevels"
,[
0
])])
[]
[]
[]
[]
[]
[]
[]
{-
-- enhance the phylo with temporal links
-}
addTemporalLinksToPhylo
::
Phylo
->
Phylo
addTemporalLinksToPhylo
phylowithoutLink
=
case
strategy
of
Constante
start
gap
->
temporalMatching
(
constDiachronicLadder
start
gap
Set
.
empty
)
phylowithoutLink
Adaptative
steps
->
temporalMatching
(
squareLadder
$
adaptDiachronicLadder
steps
(
phylowithoutLink
^.
phylo_diaSimScan
)
Set
.
empty
)
phylowithoutLink
where
strategy
::
SeaElevation
strategy
=
getSeaElevation
phylowithoutLink
-----------------------
-- | To Phylo Step | --
...
...
@@ -203,8 +243,8 @@ indexDates' m = map (\docs ->
-- create a map of roots and group ids
joinRoots
ToGroups
::
Phylo
->
Phylo
joinRoots
ToGroups
phylo
=
set
(
phylo_foundations
.
foundations_rootsInGroups
)
rootsMap
phylo
joinRoots
::
Phylo
->
Phylo
joinRoots
phylo
=
set
(
phylo_foundations
.
foundations_rootsInGroups
)
rootsMap
phylo
where
--------------------------------------
rootsMap
::
Map
Int
[
PhyloGroupId
]
...
...
@@ -215,15 +255,19 @@ joinRootsToGroups phylo = set (phylo_foundations . foundations_rootsInGroups) ro
$
getGroupsFromScale
1
phylo
maybeDefaultParams
::
Phylo
->
Phylo
maybeDefaultParams
phylo
=
if
(
defaultMode
(
getConfig
phylo
))
then
findDefaultLevel
phylo
else
phylo
-- To build the first phylo step from docs and terms
-- QL: backend entre phyloBase et Clustering
-- tophylowithoutLink
toPhyloWithoutLink
::
[
Document
]
->
PhyloConfig
->
Phylo
toPhyloWithoutLink
docs
conf
=
case
(
getSeaElevation
phyloBase
)
of
Constante
_
_
->
joinRootsToGroups
$
appendGroups
clusterToGroup
1
seriesOfClustering
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
Adaptative
_
->
joinRootsToGroups
$
scanSimilarity
1
toPhyloWithoutLink
docs
conf
=
joinRoots
$
findSeaLadder
$
maybeDefaultParams
$
appendGroups
clusterToGroup
1
seriesOfClustering
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
where
--------------------------------------
...
...
@@ -340,6 +384,7 @@ docsToTimeScaleCooc docs fdt =
-----------------------
-- | to Phylo Base | --
-----------------------
-- TODO anoe
groupDocsByPeriodRec
::
(
NFData
doc
,
Ord
date
,
Enum
date
)
=>
(
doc
->
date
)
->
[(
date
,
date
)]
->
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
groupDocsByPeriodRec
f
prds
docs
acc
=
...
...
@@ -394,6 +439,14 @@ docsToTermFreq docs fdt =
sumFreqs
=
sum
$
elems
freqs
in
map
(
/
sumFreqs
)
freqs
docsToTermCount
::
[
Document
]
->
Vector
Ngrams
->
Map
Int
Double
docsToTermCount
docs
roots
=
fromList
$
map
(
\
lst
->
(
head'
"docsToTermCount"
lst
,
fromIntegral
$
length
lst
))
$
group
$
sort
$
concat
$
map
(
\
d
->
nub
$
ngramsToIdx
(
text
d
)
roots
)
docs
docsToLastTermFreq
::
Int
->
[
Document
]
->
Vector
Ngrams
->
Map
Int
Double
docsToLastTermFreq
n
docs
fdt
=
let
last
=
take
n
$
reverse
$
sort
$
map
date
docs
...
...
@@ -420,6 +473,20 @@ initPhyloScales lvlMax pId =
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
PhyloScale
pId
(
""
,
""
)
lvl
empty
))
[
1
..
lvlMax
]
setDefault
::
PhyloConfig
->
PhyloConfig
setDefault
conf
=
conf
{
phyloScale
=
2
,
similarity
=
WeightedLogJaccard
0.5
2
,
findAncestors
=
True
,
phyloSynchrony
=
ByProximityThreshold
0.6
0
SiblingBranches
MergeAllGroups
,
phyloQuality
=
Quality
0.5
3
,
timeUnit
=
Year
3
1
3
,
clique
=
MaxClique
5
30
ByNeighbours
,
exportLabel
=
[
BranchLabel
MostEmergentTfIdf
2
,
GroupLabel
MostEmergentInclusive
2
],
exportSort
=
ByHierarchy
Desc
,
exportFilter
=
[
ByBranchSize
3
]
}
-- Init the basic elements of a Phylo
--
...
...
@@ -428,16 +495,21 @@ initPhylo docs conf =
let
roots
=
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
foundations
=
PhyloFoundations
roots
empty
docsSources
=
PhyloSources
(
Vector
.
fromList
$
nub
$
concat
$
map
sources
docs
)
params
=
defaultPhyloParam
{
_phyloParam_config
=
conf
}
docsCounts
=
PhyloCounts
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
(
docsToTimeScaleNb
docs
)
(
docsToTermCount
docs
(
foundations
^.
foundations_roots
))
(
docsToTermFreq
docs
(
foundations
^.
foundations_roots
))
(
docsToLastTermFreq
(
getTimePeriod
$
timeUnit
conf
)
docs
(
foundations
^.
foundations_roots
))
params
=
if
(
defaultMode
conf
)
then
defaultPhyloParam
{
_phyloParam_config
=
setDefault
conf
}
else
defaultPhyloParam
{
_phyloParam_config
=
conf
}
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
conf
)
(
getTimeStep
$
timeUnit
conf
)
in
trace
(
"
\n
"
<>
"-- | Init a phylo out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
$
Phylo
foundations
docsSources
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
(
docsToTimeScaleNb
docs
)
(
docsToTermFreq
docs
(
foundations
^.
foundations_roots
))
(
docsToLastTermFreq
(
getTimePeriod
$
timeUnit
conf
)
docs
(
foundations
^.
foundations_roots
))
Set
.
empty
docsCounts
[]
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
""
,
""
)
(
initPhyloScales
1
prd
)))
periods
)
0
(
_qua_granularity
$
phyloQuality
$
conf
)
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
View file @
386ad673
...
...
@@ -13,7 +13,7 @@ Portability : POSIX
module
Gargantext.Core.Viz.Phylo.PhyloTools
where
import
Control.Lens
hiding
(
Level
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
partition
,
tails
,
nubBy
,
group
,
notElem
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
iterate
,
transpose
,
partition
,
tails
,
nubBy
,
group
,
notElem
,
(
!!
)
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
,
restrictKeys
)
import
Data.Set
(
Set
,
disjoint
)
import
Data.String
(
String
)
...
...
@@ -28,6 +28,7 @@ import qualified Data.List as List
import
qualified
Data.Set
as
Set
import
qualified
Data.Text
as
Text
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Map
as
Map
------------
-- | Io | --
...
...
@@ -301,6 +302,9 @@ getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
coocToDiago
::
Cooc
->
Cooc
coocToDiago
cooc
=
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
cooc
coocToAdjacency
::
Cooc
->
Cooc
coocToAdjacency
cooc
=
Map
.
map
(
\
_
->
1
)
cooc
-- | To build the local cooc matrix of each phylogroup
ngramsToCooc
::
[
Int
]
->
[
Cooc
]
->
Cooc
ngramsToCooc
ngrams
coocs
=
...
...
@@ -309,6 +313,75 @@ ngramsToCooc ngrams coocs =
in
filterWithKey
(
\
k
_
->
elem
k
pairs
)
cooc
------------------
-- | Defaults | --
------------------
-- | find the local maxima in a list of values
findMaxima
::
[(
Double
,
Double
)]
->
[
Bool
]
findMaxima
lst
=
map
(
hasMax
)
$
toChunk
3
lst
where
------
hasMax
::
[(
Double
,
Double
)]
->
Bool
hasMax
chunk
=
if
(
length
chunk
)
/=
3
then
False
else
(
snd
(
chunk
!!
0
)
<
snd
(
chunk
!!
1
))
&&
(
snd
(
chunk
!!
2
)
<
snd
(
chunk
!!
1
))
-- | split a list into chunks of size n
toChunk
::
Int
->
[
a
]
->
[[
a
]]
toChunk
n
=
takeWhile
((
==
n
)
.
length
)
.
transpose
.
take
n
.
iterate
tail
-- | To compute the average degree from a cooc matrix
-- http://networksciencebook.com/chapter/2#degree
toAverageDegree
::
Cooc
->
Vector
Ngrams
->
Double
toAverageDegree
cooc
roots
=
2
*
(
fromIntegral
$
Map
.
size
cooc
)
/
(
fromIntegral
$
Vector
.
length
roots
)
-- | Use the giant component regime to estimate the default level
-- http://networksciencebook.com/chapter/3#networks-supercritical
regimeToDefaultLevel
::
Cooc
->
Vector
Ngrams
->
Double
regimeToDefaultLevel
cooc
roots
|
avg
==
0
=
1
|
avg
<
1
=
avg
*
0.6
|
avg
==
1
=
0.6
|
avg
<
lnN
=
(
avg
*
0.2
)
/
lnN
|
otherwise
=
0.2
where
avg
::
Double
avg
=
toAverageDegree
cooc
roots
lnN
::
Double
lnN
=
log
(
fromIntegral
$
Vector
.
length
roots
)
coocToConfidence
::
Phylo
->
Cooc
coocToConfidence
phylo
=
let
count
=
getRootsCount
phylo
cooc
=
foldl
(
\
acc
cooc'
->
sumCooc
acc
cooc'
)
empty
$
elems
$
getCoocByDate
phylo
in
Map
.
mapWithKey
(
\
(
a
,
b
)
w
->
confidence
a
b
w
count
)
cooc
where
----
-- confidence
confidence
::
Int
->
Int
->
Double
->
Map
Int
Double
->
Double
confidence
a
b
inter
card
=
maximum
[(
inter
/
card
!
a
),(
inter
/
card
!
b
)]
sumtest
::
[
Int
]
->
[
Int
]
->
Int
sumtest
l1
l2
=
(
head'
"test"
l1
)
+
(
head'
"test"
$
reverse
l2
)
findDefaultLevel
::
Phylo
->
Phylo
findDefaultLevel
phylo
=
let
confidence
=
Map
.
filterWithKey
(
\
(
a
,
b
)
_
->
a
/=
b
)
$
Map
.
filter
(
>
0.01
)
$
coocToConfidence
phylo
roots
=
getRoots
phylo
level
=
regimeToDefaultLevel
confidence
roots
in
updateLevel
level
phylo
--------------------
-- | PhyloGroup | --
--------------------
...
...
@@ -401,21 +474,46 @@ getScales phylo = nub
getSeaElevation
::
Phylo
->
SeaElevation
getSeaElevation
phylo
=
seaElevation
(
getConfig
phylo
)
getSimilarity
::
Phylo
->
Similarity
getSimilarity
phylo
=
similarity
(
getConfig
phylo
)
getPhyloSeaRiseStart
::
Phylo
->
Double
getPhyloSeaRiseStart
phylo
=
case
(
getSeaElevation
phylo
)
of
Constante
s
_
->
s
Adaptative
_
->
0
Evolving
_
->
0
getPhyloSeaRiseSteps
::
Phylo
->
Double
getPhyloSeaRiseSteps
phylo
=
case
(
getSeaElevation
phylo
)
of
Constante
_
s
->
s
Adaptative
s
->
s
Evolving
_
->
0.1
getConfig
::
Phylo
->
PhyloConfig
getConfig
phylo
=
(
phylo
^.
phylo_param
)
^.
phyloParam_config
getLevel
::
Phylo
->
Double
getLevel
phylo
=
_phylo_level
phylo
getLadder
::
Phylo
->
[
Double
]
getLadder
phylo
=
phylo
^.
phylo_seaLadder
getCoocByDate
::
Phylo
->
Map
Date
Cooc
getCoocByDate
phylo
=
coocByDate
(
phylo
^.
phylo_counts
)
getDocsByDate
::
Phylo
->
Map
Date
Double
getDocsByDate
phylo
=
docsByDate
(
phylo
^.
phylo_counts
)
getRootsCount
::
Phylo
->
Map
Int
Double
getRootsCount
phylo
=
rootsCount
(
phylo
^.
phylo_counts
)
getRootsFreq
::
Phylo
->
Map
Int
Double
getRootsFreq
phylo
=
rootsFreq
(
phylo
^.
phylo_counts
)
getLastRootsFreq
::
Phylo
->
Map
Int
Double
getLastRootsFreq
phylo
=
lastRootsFreq
(
phylo
^.
phylo_counts
)
setConfig
::
PhyloConfig
->
Phylo
->
Phylo
setConfig
config
phylo
=
phylo
...
...
@@ -503,6 +601,8 @@ updatePeriods periods' phylo =
updateQuality
::
Double
->
Phylo
->
Phylo
updateQuality
quality
phylo
=
phylo
{
_phylo_quality
=
quality
}
updateLevel
::
Double
->
Phylo
->
Phylo
updateLevel
level
phylo
=
phylo
{
_phylo_level
=
level
}
traceToPhylo
::
Scale
->
Phylo
->
Phylo
traceToPhylo
lvl
phylo
=
...
...
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
View file @
386ad673
...
...
@@ -76,7 +76,7 @@ toNextScale phylo groups =
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
foldlWithKey
(
\
acc
id
groups'
->
-- 4) create the parent group
let
parent
=
mergeGroups
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[(
fst
.
fst
)
id
])
id
oldGroups
groups'
let
parent
=
mergeGroups
(
elems
$
restrictKeys
(
getCoocByDate
phylo
)
$
periodsToYears
[(
fst
.
fst
)
id
])
id
oldGroups
groups'
in
acc
++
[
parent
])
[]
-- 3) group the current groups by parentId
$
fromListWith
(
++
)
$
map
(
\
g
->
(
getLevelParentId
g
,
[
g
]))
groups
...
...
@@ -199,8 +199,8 @@ synchronicClustering :: Phylo -> Phylo
synchronicClustering
phylo
=
let
prox
=
similarity
$
getConfig
phylo
sync
=
phyloSynchrony
$
getConfig
phylo
docs
=
phylo
^.
phylo_timeDocs
diagos
=
map
coocToDiago
$
phylo
^.
phylo_timeCooc
docs
=
getDocsByDate
phylo
diagos
=
map
coocToDiago
$
getCoocByDate
phylo
newBranches
=
map
(
\
branch
->
reduceGroups
prox
sync
docs
diagos
branch
)
$
map
processDynamics
$
chooseClusteringStrategy
sync
...
...
src/Gargantext/Core/Viz/Phylo/TemporalMatching.hs
View file @
386ad673
...
...
@@ -56,7 +56,7 @@ sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + 1/ tan (s * pi
-- process the sumLog
-}
sumLog'
::
Double
->
Double
->
[
Double
]
->
Double
sumLog'
s
nb
diago
=
foldl
(
\
mem
occ
->
mem
+
(
log
(
occ
+
1
/
tan
(
s
*
pi
/
2
))
/
log
(
nb
+
1
/
tan
(
s
*
pi
/
2
))))
0
diago
sumLog'
s
nb
diago
=
foldl
(
\
mem
occ
->
mem
+
(
log
(
occ
+
1
/
tan
(
s
*
pi
/
2
))
/
log
(
nb
+
1
/
tan
(
s
*
pi
/
2
))))
0
diago
{-
...
...
@@ -695,14 +695,14 @@ temporalMatching ladder phylo = updatePhyloGroups 1
sea
::
([(
Branch
,
ShouldTry
)],
FinalQuality
)
sea
=
seaLevelRise
(
fromIntegral
$
Vector
.
length
$
getRoots
phylo
)
(
similarity
$
getConfig
phylo
)
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
)
(
getLevel
phylo
)
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)
(
phylo
^.
phylo_termFreq
)
(
getRootsFreq
phylo
)
ladder
1
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
phylo
^.
phylo_timeCooc
)
(
getDocsByDate
phylo
)
(
getCoocByDate
phylo
)
((
phylo
^.
phylo_foundations
)
^.
foundations_rootsInGroups
)
(
reverse
$
sortOn
(
length
.
fst
)
seabed
)
...
...
@@ -714,7 +714,7 @@ temporalMatching ladder phylo = updatePhyloGroups 1
(
getPeriodIds
phylo
)
(
similarity
$
getConfig
phylo
)
(
List
.
head
ladder
)
(
phylo
^.
phylo_timeDocs
)
(
phylo
^.
phylo_timeCooc
)
(
getDocsByDate
phylo
)
(
getCoocByDate
phylo
)
((
phylo
^.
phylo_foundations
)
^.
foundations_rootsInGroups
)
(
traceTemporalMatching
$
getGroupsFromScale
1
phylo
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment