Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Julien Moutinho
haskell-gargantext
Commits
815ab543
Commit
815ab543
authored
Jun 14, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add parallelism
parent
7832afe9
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
211 additions
and
169 deletions
+211
-169
package.yaml
package.yaml
+2
-0
Phylo.hs
src/Gargantext/Viz/Phylo.hs
+6
-2
Cluster.hs
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
+14
-12
BranchMaker.hs
src/Gargantext/Viz/Phylo/BranchMaker.hs
+21
-31
Example.hs
src/Gargantext/Viz/Phylo/Example.hs
+1
-1
LevelMaker.hs
src/Gargantext/Viz/Phylo/LevelMaker.hs
+28
-13
LinkMaker.hs
src/Gargantext/Viz/Phylo/LinkMaker.hs
+116
-82
Clustering.hs
src/Gargantext/Viz/Phylo/Metrics/Clustering.hs
+11
-27
Tools.hs
src/Gargantext/Viz/Phylo/Tools.hs
+11
-1
stack.yaml
stack.yaml
+1
-0
No files found.
package.yaml
View file @
815ab543
...
@@ -103,6 +103,7 @@ library:
...
@@ -103,6 +103,7 @@ library:
-
contravariant
-
contravariant
-
crawlerPubMed
-
crawlerPubMed
-
data-time-segment
-
data-time-segment
-
deepseq
-
directory
-
directory
-
duckling
-
duckling
-
exceptions
-
exceptions
...
@@ -138,6 +139,7 @@ library:
...
@@ -138,6 +139,7 @@ library:
-
natural-transformation
-
natural-transformation
-
opaleye
-
opaleye
-
pandoc
-
pandoc
-
parallel
-
parsec
-
parsec
-
patches-class
-
patches-class
-
patches-map
-
patches-map
...
...
src/Gargantext/Viz/Phylo.hs
View file @
815ab543
...
@@ -22,7 +22,7 @@ one 8, e54847.
...
@@ -22,7 +22,7 @@ one 8, e54847.
-}
-}
{-# LANGUAGE DeriveGeneric
#-}
{-# LANGUAGE DeriveGeneric
, DeriveAnyClass
#-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE MultiParamTypeClasses #-}
...
@@ -44,6 +44,8 @@ import Gargantext.Core.Utils.Prefix (unPrefix)
...
@@ -44,6 +44,8 @@ import Gargantext.Core.Utils.Prefix (unPrefix)
import
Gargantext.Text.Context
(
TermList
)
import
Gargantext.Text.Context
(
TermList
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Control.DeepSeq
--------------------
--------------------
-- | PhyloParam | --
-- | PhyloParam | --
--------------------
--------------------
...
@@ -161,7 +163,9 @@ data PhyloGroup =
...
@@ -161,7 +163,9 @@ data PhyloGroup =
,
_phylo_groupLevelParents
::
[
Pointer
]
,
_phylo_groupLevelParents
::
[
Pointer
]
,
_phylo_groupLevelChilds
::
[
Pointer
]
,
_phylo_groupLevelChilds
::
[
Pointer
]
}
}
deriving
(
Generic
,
Show
,
Eq
,
Ord
)
deriving
(
Generic
,
NFData
,
Show
,
Eq
,
Ord
)
-- instance NFData PhyloGroup
-- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
-- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
...
...
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
View file @
815ab543
...
@@ -13,11 +13,13 @@ Portability : POSIX
...
@@ -13,11 +13,13 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Viz.Phylo.Aggregates.Cluster
module
Gargantext.Viz.Phylo.Aggregates.Cluster
where
where
import
Data.List
(
null
,
tail
,
concat
,
sort
,
intersect
)
import
Control.Parallel.Strategies
import
Data.List
(
null
,
concat
,
sort
,
intersect
,(
++
))
import
Data.Map
(
Map
)
import
Data.Map
(
Map
)
import
Data.Tuple
(
fst
)
import
Data.Tuple
(
fst
)
import
Gargantext.Prelude
import
Gargantext.Prelude
...
@@ -44,17 +46,19 @@ getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g
...
@@ -44,17 +46,19 @@ getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g
graphToClusters
::
Cluster
->
GroupGraph
->
[
PhyloCluster
]
graphToClusters
::
Cluster
->
GroupGraph
->
[
PhyloCluster
]
graphToClusters
clust
(
nodes
,
edges
)
=
case
clust
of
graphToClusters
clust
(
nodes
,
edges
)
=
case
clust
of
Louvain
(
LouvainParams
_
)
->
undefined
Louvain
(
LouvainParams
_
)
->
undefined
RelatedComponents
(
RCParams
_
)
->
relatedComp
0
(
head'
"graphToClusters"
nodes
)
(
tail
nodes
,
edges
)
[]
[]
RelatedComponents
(
RCParams
_
)
->
relatedComp
$
((
map
(
\
((
g
,
g'
),
_
)
->
[
g
,
g'
])
edges
)
++
(
map
(
\
g
->
[
g
])
nodes
))
_
->
panic
"[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
_
->
panic
"[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
-- | To transform a list of PhyloGroups into a Graph of Proximity
-- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph
::
Double
->
Proximity
->
[
PhyloGroup
]
->
([
GroupNode
],[
GroupEdge
])
groupsToGraph
::
Double
->
Proximity
->
[
PhyloGroup
]
->
([
GroupNode
],[
GroupEdge
])
groupsToGraph
nbDocs
prox
gs
=
case
prox
of
groupsToGraph
nbDocs
prox
gs
=
case
prox
of
WeightedLogJaccard
(
WLJParams
_
sens
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
weightedLogJaccard
sens
nbDocs
(
getGroupCooc
x
)
(
getGroupCooc
y
)
(
getGroupNgrams
x
)
(
getGroupNgrams
y
)))
WeightedLogJaccard
(
WLJParams
_
sens
)
->
(
gs
,
let
candidates
=
map
(
\
(
x
,
y
)
->
((
x
,
y
),
weightedLogJaccard
sens
nbDocs
(
getGroupCooc
x
)
(
getGroupCooc
y
)
(
getGroupNgrams
x
)
(
getGroupNgrams
y
)))
$
getCandidates
gs
)
$
getCandidates
gs
candidates'
=
candidates
`
using
`
parList
rdeepseq
in
candidates'
)
Hamming
(
HammingParams
_
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
hamming
(
getGroupCooc
x
)
(
getGroupCooc
y
)))
$
getCandidates
gs
)
Hamming
(
HammingParams
_
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
hamming
(
getGroupCooc
x
)
(
getGroupCooc
y
)))
$
getCandidates
gs
)
_
->
undefined
_
->
undefined
-- | To filter a Graph of Proximity using a given threshold
-- | To filter a Graph of Proximity using a given threshold
...
@@ -78,9 +82,11 @@ phyloToClusters lvl clus p = Map.fromList
...
@@ -78,9 +82,11 @@ phyloToClusters lvl clus p = Map.fromList
graphs'
=
traceGraphFiltered
lvl
graphs'
=
traceGraphFiltered
lvl
$
map
(
\
g
->
filterGraph
prox
g
)
graphs
$
map
(
\
g
->
filterGraph
prox
g
)
graphs
--------------------------------------
--------------------------------------
graphs
::
[([
GroupNode
],[
GroupEdge
])]
graphs
::
[([
GroupNode
],[
GroupEdge
])]
graphs
=
traceGraph
lvl
(
getThreshold
prox
)
graphs
=
traceGraph
lvl
(
getThreshold
prox
)
$
map
(
\
prd
->
groupsToGraph
(
periodsToNbDocs
[
prd
]
p
)
prox
(
getGroupsWithFilters
lvl
prd
p
))
periods
$
let
gs
=
map
(
\
prd
->
groupsToGraph
(
periodsToNbDocs
[
prd
]
p
)
prox
(
getGroupsWithFilters
lvl
prd
p
))
periods
gs'
=
gs
`
using
`
parList
rdeepseq
in
gs'
--------------------------------------
--------------------------------------
prox
::
Proximity
prox
::
Proximity
prox
=
getProximity
clus
prox
=
getProximity
clus
...
@@ -115,7 +121,3 @@ traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <>
...
@@ -115,7 +121,3 @@ traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <>
<>
show
(
percentile
90
(
VS
.
fromList
lst
))
<>
" (90%)
\n
"
)
g
<>
show
(
percentile
90
(
VS
.
fromList
lst
))
<>
" (90%)
\n
"
)
g
where
where
lst
=
sort
$
map
snd
$
concat
$
map
snd
g
lst
=
sort
$
map
snd
$
concat
$
map
snd
g
-- traceSim :: PhyloGroup -> PhyloGroup -> Phylo -> Double -> Double
-- traceSim g g' p sim = trace (show (getGroupText g p) <> " [vs] " <> show (getGroupText g' p) <> " = " <> show (sim) <> "\n") sim
src/Gargantext/Viz/Phylo/BranchMaker.hs
View file @
815ab543
...
@@ -17,6 +17,7 @@ Portability : POSIX
...
@@ -17,6 +17,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.BranchMaker
module
Gargantext.Viz.Phylo.BranchMaker
where
where
import
Control.Parallel.Strategies
import
Control.Lens
hiding
(
both
,
Level
)
import
Control.Lens
hiding
(
both
,
Level
)
import
Data.List
(
concat
,
nub
,(
++
),
tail
,
sortOn
,
take
,
reverse
,
sort
,
null
,
intersect
,
union
)
import
Data.List
(
concat
,
nub
,(
++
),
tail
,
sortOn
,
take
,
reverse
,
sort
,
null
,
intersect
,
union
)
import
Data.Map
(
Map
)
import
Data.Map
(
Map
)
...
@@ -82,13 +83,14 @@ findSimBranches frame thr nth p (id,gs) bs
...
@@ -82,13 +83,14 @@ findSimBranches frame thr nth p (id,gs) bs
pks
=
getGroupsPeaks
gs
nth
p
pks
=
getGroupsPeaks
gs
nth
p
--------------------------------------
--------------------------------------
findBestPointer
::
Phylo
->
Proximity
->
[
PhyloGroup
]
->
[
PhyloGroup
]
->
[(
PhyloGroupId
,
Pointer
)]
findBestPointer
::
Phylo
->
Proximity
->
[
PhyloGroup
]
->
[
PhyloGroup
]
->
[(
PhyloGroupId
,
Pointer
)]
findBestPointer
p
prox
gs
gs'
=
take
1
findBestPointer
p
prox
gs
gs'
=
$
reverse
let
candidates
=
map
(
\
g
->
let
pts
=
findBestCandidates'
prox
gs'
g
p
$
sortOn
(
snd
.
snd
)
in
map
(
\
pt
->
(
getGroupId
g
,
pt
))
pts
)
gs
$
concat
candidates'
=
candidates
`
using
`
parList
rdeepseq
$
map
(
\
g
->
let
pts
=
findBestCandidates'
prox
gs'
g
p
in
take
1
$
reverse
$
sortOn
(
snd
.
snd
)
$
concat
candidates'
in
map
(
\
pt
->
(
getGroupId
g
,
pt
))
pts
)
gs
makeBranchLinks
::
Phylo
->
Proximity
->
(
PhyloBranchId
,[
PhyloGroup
])
->
[(
PhyloBranchId
,[
PhyloGroup
])]
->
[(
PhyloGroupId
,
Pointer
)]
->
[(
PhyloGroupId
,
Pointer
)]
makeBranchLinks
::
Phylo
->
Proximity
->
(
PhyloBranchId
,[
PhyloGroup
])
->
[(
PhyloBranchId
,[
PhyloGroup
])]
->
[(
PhyloGroupId
,
Pointer
)]
->
[(
PhyloGroupId
,
Pointer
)]
makeBranchLinks
p
prox
(
id
,
gs
)
bs
pts
makeBranchLinks
p
prox
(
id
,
gs
)
bs
pts
...
@@ -126,34 +128,22 @@ linkPhyloBranches lvl prox p = setPhyloBranches lvl
...
@@ -126,34 +128,22 @@ linkPhyloBranches lvl prox p = setPhyloBranches lvl
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
graphToBranches
::
Level
->
GroupGraph
->
Phylo
->
[(
Int
,
PhyloGroupId
)]
graphToBranches
::
[
PhyloGroup
]
->
Phylo
->
[(
Int
,
PhyloGroupId
)]
graphToBranches
_lvl
(
nodes
,
edges
)
_p
=
concat
graphToBranches
groups
p
=
concat
$
map
(
\
(
idx
,
gs
)
->
map
(
\
g
->
(
idx
,
getGroupId
g
))
gs
)
$
map
(
\
(
idx
,
gs
)
->
map
(
\
g
->
(
idx
,
getGroupId
g
))
gs
)
$
zip
[
1
..
]
$
zip
[
1
..
]
$
relatedComp
0
(
head'
"branchMaker"
nodes
)
(
tail
nodes
,
edges
)
[]
[]
$
relatedComp
$
map
(
\
g
->
nub
$
[
g
]
++
(
getGroupParents
g
p
)
++
(
getGroupChilds
g
p
))
groups
-- | To build a graph using the parents and childs pointers
makeGraph
::
[
PhyloGroup
]
->
Phylo
->
GroupGraph
makeGraph
gs
p
=
(
gs
,
edges
)
where
edges
::
[
GroupEdge
]
edges
=
(
nub
.
concat
)
$
map
(
\
g
->
(
map
(
\
g'
->
((
g'
,
g
),
1
))
$
getGroupParents
g
p
)
++
(
map
(
\
g'
->
((
g
,
g'
),
1
))
$
getGroupChilds
g
p
))
gs
-- | To set all the PhyloBranches for a given Level in a Phylo
-- | To set all the PhyloBranches for a given Level in a Phylo
setPhyloBranches
::
Level
->
Phylo
->
Phylo
setPhyloBranches
::
Level
->
Phylo
->
Phylo
setPhyloBranches
lvl
p
=
alterGroupWithLevel
(
\
g
->
let
bIdx
=
(
fst
$
head'
"branchMaker"
$
filter
(
\
b
->
snd
b
==
getGroupId
g
)
bs
)
setPhyloBranches
lvl
p
=
alterGroupWithLevel
(
\
g
->
in
over
(
phylo_groupBranchId
)
(
\
_
->
Just
(
lvl
,
bIdx
))
g
)
lvl
p
let
bIdx
=
(
fst
$
head'
"branchMaker"
where
$
filter
(
\
b
->
snd
b
==
getGroupId
g
)
branches
)
--------------------------------------
in
over
(
phylo_groupBranchId
)
(
\
_
->
Just
(
lvl
,
bIdx
))
g
)
lvl
p
bs
::
[(
Int
,
PhyloGroupId
)]
where
bs
=
graphToBranches
lvl
graph
p
--------------------------------------
--------------------------------------
graph
::
GroupGraph
branches
::
[(
Int
,
PhyloGroupId
)]
graph
=
makeGraph
(
getGroupsWithLevel
lvl
p
)
p
branches
=
graphToBranches
(
getGroupsWithLevel
lvl
p
)
p
--------------------------------------
--------------------------------------
src/Gargantext/Viz/Phylo/Example.hs
View file @
815ab543
...
@@ -105,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre"
...
@@ -105,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild
::
PhyloQueryBuild
phyloQueryBuild
::
PhyloQueryBuild
phyloQueryBuild
=
PhyloQueryBuild
"Cesar et Cleôpatre"
"An example of Phylomemy (french without accent)"
phyloQueryBuild
=
PhyloQueryBuild
"Cesar et Cleôpatre"
"An example of Phylomemy (french without accent)"
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
1
20
)
5
0.5
4
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.3
0
)
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
6
20
)
5
0.5
4
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.4
0
)
...
...
src/Gargantext/Viz/Phylo/LevelMaker.hs
View file @
815ab543
...
@@ -19,6 +19,7 @@ Portability : POSIX
...
@@ -19,6 +19,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.LevelMaker
module
Gargantext.Viz.Phylo.LevelMaker
where
where
import
Control.Parallel.Strategies
import
Control.Lens
hiding
(
both
,
Level
)
import
Control.Lens
hiding
(
both
,
Level
)
import
Data.List
((
++
),
sort
,
concat
,
nub
,
zip
,
last
)
import
Data.List
((
++
),
sort
,
concat
,
nub
,
zip
,
last
)
import
Data.Map
(
Map
,
(
!
),
empty
,
singleton
)
import
Data.Map
(
Map
,
(
!
),
empty
,
singleton
)
...
@@ -61,7 +62,10 @@ instance PhyloLevelMaker PhyloCluster
...
@@ -61,7 +62,10 @@ instance PhyloLevelMaker PhyloCluster
|
otherwise
=
panic
(
"[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2"
)
|
otherwise
=
panic
(
"[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2"
)
--------------------------------------
--------------------------------------
-- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
-- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
toPhyloGroups
lvl
(
d
,
d'
)
l
m
p
=
map
(
\
(
idx
,
cluster
)
->
clusterToGroup
(
d
,
d'
)
lvl
idx
""
cluster
m
p
)
$
zip
[
1
..
]
l
toPhyloGroups
lvl
(
d
,
d'
)
l
m
p
=
let
clusters
=
map
(
\
(
idx
,
cluster
)
->
clusterToGroup
(
d
,
d'
)
lvl
idx
""
cluster
m
p
)
$
zip
[
1
..
]
l
clusters'
=
clusters
`
using
`
parList
rdeepseq
in
clusters'
--------------------------------------
--------------------------------------
...
@@ -74,7 +78,10 @@ instance PhyloLevelMaker PhyloFis
...
@@ -74,7 +78,10 @@ instance PhyloLevelMaker PhyloFis
|
otherwise
=
panic
(
"[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1"
)
|
otherwise
=
panic
(
"[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1"
)
--------------------------------------
--------------------------------------
-- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
-- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups
lvl
(
d
,
d'
)
l
_
p
=
map
(
\
(
idx
,
fis
)
->
cliqueToGroup
(
d
,
d'
)
lvl
idx
""
fis
p
)
$
zip
[
1
..
]
l
toPhyloGroups
lvl
(
d
,
d'
)
l
_
p
=
let
groups
=
map
(
\
(
idx
,
fis
)
->
cliqueToGroup
(
d
,
d'
)
lvl
idx
""
fis
p
)
$
zip
[
1
..
]
l
groups'
=
groups
`
using
`
parList
rdeepseq
in
groups'
--------------------------------------
--------------------------------------
...
@@ -87,8 +94,7 @@ instance PhyloLevelMaker Document
...
@@ -87,8 +94,7 @@ instance PhyloLevelMaker Document
|
otherwise
=
panic
(
"[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0"
)
|
otherwise
=
panic
(
"[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0"
)
--------------------------------------
--------------------------------------
-- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
-- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups
lvl
(
d
,
d'
)
l
_m
p
=
map
(
\
(
idx
,
ngram
)
->
ngramsToGroup
(
d
,
d'
)
lvl
idx
ngram
[
ngram
]
p
)
toPhyloGroups
lvl
(
d
,
d'
)
l
_m
p
=
map
(
\
ngram
->
ngramsToGroup
(
d
,
d'
)
lvl
(
getIdxInRoots
ngram
p
)
ngram
[
ngram
]
p
)
$
zip
[
1
..
]
$
(
nub
.
concat
)
$
(
nub
.
concat
)
$
map
text
l
$
map
text
l
--------------------------------------
--------------------------------------
...
@@ -100,11 +106,13 @@ clusterToGroup prd lvl idx lbl groups _m p =
...
@@ -100,11 +106,13 @@ clusterToGroup prd lvl idx lbl groups _m p =
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
empty
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
empty
Nothing
Nothing
(
getMiniCooc
(
listToFullCombi
ngrams
)
(
periodsToYears
[
prd
])
(
getPhyloCooc
p
))
(
getMiniCooc
(
listToFullCombi
ngrams
)
(
periodsToYears
[
prd
])
(
getPhyloCooc
p
))
[]
[]
[]
childs
ascLink
desLink
[]
childs
where
where
--------------------------------------
--------------------------------------
childs
::
[
Pointer
]
childs
::
[
Pointer
]
childs
=
map
(
\
g
->
(
getGroupId
g
,
1
))
groups
childs
=
map
(
\
g
->
(
getGroupId
g
,
1
))
groups
ascLink
=
concat
$
map
getGroupPeriodParents
groups
desLink
=
concat
$
map
getGroupPeriodChilds
groups
--------------------------------------
--------------------------------------
ngrams
::
[
Int
]
ngrams
::
[
Int
]
ngrams
=
(
sort
.
nub
.
concat
)
$
map
getGroupNgrams
groups
ngrams
=
(
sort
.
nub
.
concat
)
$
map
getGroupNgrams
groups
...
@@ -116,7 +124,7 @@ cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> P
...
@@ -116,7 +124,7 @@ cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> P
cliqueToGroup
prd
lvl
idx
lbl
fis
p
=
cliqueToGroup
prd
lvl
idx
lbl
fis
p
=
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
(
singleton
"support"
(
fromIntegral
$
getSupport
fis
))
Nothing
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
(
singleton
"support"
(
fromIntegral
$
getSupport
fis
))
Nothing
(
getMiniCooc
(
listToFullCombi
ngrams
)
(
periodsToYears
[
prd
])
(
getPhyloCooc
p
))
(
getMiniCooc
(
listToFullCombi
ngrams
)
(
periodsToYears
[
prd
])
(
getPhyloCooc
p
))
[]
[]
[]
[]
[]
[]
[]
childs
where
where
--------------------------------------
--------------------------------------
ngrams
::
[
Int
]
ngrams
::
[
Int
]
...
@@ -124,6 +132,9 @@ cliqueToGroup prd lvl idx lbl fis p =
...
@@ -124,6 +132,9 @@ cliqueToGroup prd lvl idx lbl fis p =
$
Set
.
toList
$
Set
.
toList
$
getClique
fis
$
getClique
fis
--------------------------------------
--------------------------------------
childs
::
[
Pointer
]
childs
=
map
(
\
n
->
(((
prd
,
lvl
-
1
),
n
),
1
))
ngrams
--------------------------------------
-- | To transform a list of Ngrams into a PhyloGroup
-- | To transform a list of Ngrams into a PhyloGroup
...
@@ -152,6 +163,7 @@ toNthLevel lvlMax prox clus p
...
@@ -152,6 +163,7 @@ toNthLevel lvlMax prox clus p
$
traceBranches
(
lvl
+
1
)
$
traceBranches
(
lvl
+
1
)
$
setPhyloBranches
(
lvl
+
1
)
$
setPhyloBranches
(
lvl
+
1
)
$
transposePeriodLinks
(
lvl
+
1
)
$
transposePeriodLinks
(
lvl
+
1
)
$
tracePhyloN
(
lvl
+
1
)
$
setLevelLinks
(
lvl
,
lvl
+
1
)
$
setLevelLinks
(
lvl
,
lvl
+
1
)
$
addPhyloLevel
(
lvl
+
1
)
$
addPhyloLevel
(
lvl
+
1
)
(
clusters
)
p
(
clusters
)
p
...
@@ -169,15 +181,15 @@ toNthLevel lvlMax prox clus p
...
@@ -169,15 +181,15 @@ toNthLevel lvlMax prox clus p
toPhylo1
::
Cluster
->
Proximity
->
Map
(
Date
,
Date
)
[
Document
]
->
Phylo
->
Phylo
toPhylo1
::
Cluster
->
Proximity
->
Map
(
Date
,
Date
)
[
Document
]
->
Phylo
->
Phylo
toPhylo1
clus
prox
d
p
=
case
clus
of
toPhylo1
clus
prox
d
p
=
case
clus
of
Fis
(
FisParams
k
s
t
)
->
traceReBranches
1
Fis
(
FisParams
k
s
t
)
->
traceReBranches
1
$
linkPhyloBranches
1
prox
--
$ linkPhyloBranches 1 prox
$
traceBranches
1
$
traceBranches
1
$
setPhyloBranches
1
$
setPhyloBranches
1
$
traceTempoMatching
Descendant
1
$
traceTempoMatching
Descendant
1
$
interTempoMatching
Descendant
1
prox
$
interTempoMatching
Descendant
1
prox
$
traceTempoMatching
Ascendant
1
$
traceTempoMatching
Ascendant
1
$
interTempoMatching
Ascendant
1
prox
$
interTempoMatching
Ascendant
1
prox
$
tracePhylo1
$
setLevelLinks
(
0
,
1
)
$
setLevelLinks
(
0
,
1
)
$
setLevelLinks
(
1
,
0
)
$
addPhyloLevel
1
phyloFis
phylo'
$
addPhyloLevel
1
phyloFis
phylo'
where
where
--------------------------------------
--------------------------------------
...
@@ -212,7 +224,7 @@ instance PhyloMaker [(Date, Text)]
...
@@ -212,7 +224,7 @@ instance PhyloMaker [(Date, Text)]
phylo1
=
toPhylo1
(
getContextualUnit
q
)
(
getInterTemporalMatching
q
)
phyloDocs
phylo0
phylo1
=
toPhylo1
(
getContextualUnit
q
)
(
getInterTemporalMatching
q
)
phyloDocs
phylo0
--------------------------------------
--------------------------------------
phylo0
::
Phylo
phylo0
::
Phylo
phylo0
=
toPhylo0
phyloDocs
phyloBase
phylo0
=
t
racePhylo0
$
t
oPhylo0
phyloDocs
phyloBase
--------------------------------------
--------------------------------------
phyloDocs
::
Map
(
Date
,
Date
)
[
Document
]
phyloDocs
::
Map
(
Date
,
Date
)
[
Document
]
phyloDocs
=
corpusToDocs
c
phyloBase
phyloDocs
=
corpusToDocs
c
phyloBase
...
@@ -251,7 +263,7 @@ instance PhyloMaker [Document]
...
@@ -251,7 +263,7 @@ instance PhyloMaker [Document]
phylo1
=
toPhylo1
(
getContextualUnit
q
)
(
getInterTemporalMatching
q
)
phyloDocs
phylo0
phylo1
=
toPhylo1
(
getContextualUnit
q
)
(
getInterTemporalMatching
q
)
phyloDocs
phylo0
--------------------------------------
--------------------------------------
phylo0
::
Phylo
phylo0
::
Phylo
phylo0
=
toPhylo0
phyloDocs
phyloBase
phylo0
=
t
racePhylo0
$
t
oPhylo0
phyloDocs
phyloBase
--------------------------------------
--------------------------------------
phyloDocs
::
Map
(
Date
,
Date
)
[
Document
]
phyloDocs
::
Map
(
Date
,
Date
)
[
Document
]
phyloDocs
=
corpusToDocs
c
phyloBase
phyloDocs
=
corpusToDocs
c
phyloBase
...
@@ -286,13 +298,16 @@ instance PhyloMaker [Document]
...
@@ -286,13 +298,16 @@ instance PhyloMaker [Document]
tracePhylo0
::
Phylo
->
Phylo
tracePhylo0
::
Phylo
->
Phylo
tracePhylo0
p
=
trace
(
"
\n
---------------
\n
--| Phylo 0 |--
\n
---------------
\n\n
"
)
p
tracePhylo0
p
=
trace
(
"
\n
---------------
\n
--| Phylo 0 |--
\n
---------------
\n\n
"
<>
show
(
length
$
getGroupsWithLevel
0
p
)
<>
" groups created
\n
"
)
p
tracePhylo1
::
Phylo
->
Phylo
tracePhylo1
::
Phylo
->
Phylo
tracePhylo1
p
=
trace
(
"
\n
---------------
\n
--| Phylo 1 |--
\n
---------------
\n\n
"
)
p
tracePhylo1
p
=
trace
(
"
\n
---------------
\n
--| Phylo 1 |--
\n
---------------
\n\n
"
<>
show
(
length
$
getGroupsWithLevel
1
p
)
<>
" groups created
\n
"
)
p
tracePhyloN
::
Level
->
Phylo
->
Phylo
tracePhyloN
::
Level
->
Phylo
->
Phylo
tracePhyloN
lvl
p
=
trace
(
"
\n
---------------
\n
--| Phylo "
<>
show
(
lvl
)
<>
" |--
\n
---------------
\n\n
"
)
p
tracePhyloN
lvl
p
=
trace
(
"
\n
---------------
\n
--| Phylo "
<>
show
(
lvl
)
<>
" |--
\n
---------------
\n\n
"
<>
show
(
length
$
getGroupsWithLevel
lvl
p
)
<>
" groups created
\n
"
)
p
tracePhyloBase
::
Phylo
->
Phylo
tracePhyloBase
::
Phylo
->
Phylo
...
...
src/Gargantext/Viz/Phylo/LinkMaker.hs
View file @
815ab543
...
@@ -17,8 +17,9 @@ Portability : POSIX
...
@@ -17,8 +17,9 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.LinkMaker
module
Gargantext.Viz.Phylo.LinkMaker
where
where
import
Control.Parallel.Strategies
import
Control.Lens
hiding
(
both
,
Level
)
import
Control.Lens
hiding
(
both
,
Level
)
import
Data.List
((
++
),
sortOn
,
null
,
tail
,
splitAt
,
elem
,
concat
,
sort
,
delete
,
intersect
,
nub
,
groupBy
,
union
)
import
Data.List
((
++
),
sortOn
,
null
,
tail
,
splitAt
,
elem
,
concat
,
delete
,
intersect
,
nub
,
groupBy
,
union
,
inits
,
scanl
,
find
)
import
Data.Tuple.Extra
import
Data.Tuple.Extra
import
Data.Map
(
Map
,(
!
),
fromListWith
,
elems
,
restrictKeys
,
unionWith
,
member
)
import
Data.Map
(
Map
,(
!
),
fromListWith
,
elems
,
restrictKeys
,
unionWith
,
member
)
import
Gargantext.Prelude
import
Gargantext.Prelude
...
@@ -38,43 +39,23 @@ import Numeric.Statistics (percentile)
...
@@ -38,43 +39,23 @@ import Numeric.Statistics (percentile)
-----------------------------
-----------------------------
-- | To choose a LevelLink strategy based an a given Level
shouldLink
::
(
Level
,
Level
)
->
PhyloGroup
->
PhyloGroup
->
Bool
shouldLink
(
lvl
,
lvl'
)
g
g'
|
(
lvl
<=
1
)
&&
(
lvl'
<=
1
)
=
doesContainsOrd
(
getGroupNgrams
g
)
(
getGroupNgrams
g'
)
|
otherwise
=
elem
(
getGroupId
g
)
(
getGroupLevelChildsId
g'
)
-- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
-- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
linkGroupToGroups
::
(
Level
,
Level
)
->
PhyloGroup
->
[
PhyloGroup
]
->
PhyloGroup
linkGroupToGroups
::
PhyloGroup
->
[
PhyloGroup
]
->
PhyloGroup
linkGroupToGroups
(
lvl
,
lvl'
)
current
targets
linkGroupToGroups
current
targets
=
over
(
phylo_groupLevelParents
)
addPointers
current
|
lvl
<
lvl'
=
setLevelParents
current
|
lvl
>
lvl'
=
setLevelChilds
current
|
otherwise
=
current
where
where
--------------------------------------
setLevelChilds
::
PhyloGroup
->
PhyloGroup
setLevelChilds
=
over
(
phylo_groupLevelChilds
)
addPointers
--------------------------------------
setLevelParents
::
PhyloGroup
->
PhyloGroup
setLevelParents
=
over
(
phylo_groupLevelParents
)
addPointers
--------------------------------------
--------------------------------------
addPointers
::
[
Pointer
]
->
[
Pointer
]
addPointers
::
[
Pointer
]
->
[
Pointer
]
addPointers
lp
=
lp
++
Maybe
.
mapMaybe
(
\
target
->
addPointers
lp
=
lp
++
Maybe
.
mapMaybe
(
\
target
->
if
shouldLink
(
lvl
,
lvl'
)
current
target
if
(
elem
(
getGroupId
current
)
(
getGroupLevelChildsId
target
))
then
Just
((
getGroupId
target
),
1
)
then
Just
((
getGroupId
target
),
1
)
else
Nothing
)
targets
else
Nothing
)
targets
--------------------------------------
--------------------------------------
-- | To set the LevelLink of all the PhyloGroups of a Phylo
setLevelLinks
::
(
Level
,
Level
)
->
Phylo
->
Phylo
setLevelLinks
::
(
Level
,
Level
)
->
Phylo
->
Phylo
setLevelLinks
(
lvl
,
lvl'
)
p
=
alterPhyloGroups
(
\
groups
->
setLevelLinks
(
lvl
,
lvl'
)
p
=
alterGroupWithLevel
(
\
group
->
linkGroupToGroups
group
map
(
\
group
->
if
getGroupLevel
group
==
lvl
$
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
getGroupNgrams
group
)
(
getGroupNgrams
g'
))
then
linkGroupToGroups
(
lvl
,
lvl'
)
group
$
getGroupsWithFilters
lvl'
(
getGroupPeriod
group
)
p
)
lvl
p
$
filterCandidates
group
$
getGroupsWithFilters
lvl'
(
getGroupPeriod
group
)
p
else
group
)
groups
)
p
-------------------------------
-------------------------------
...
@@ -83,10 +64,10 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (\groups ->
...
@@ -83,10 +64,10 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (\groups ->
-- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
-- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
getNextPeriods
::
Filiation
->
PhyloPeriodId
->
[
PhyloPeriodId
]
->
[
PhyloPeriodId
]
getNextPeriods
::
Filiation
->
Int
->
PhyloPeriodId
->
[
PhyloPeriodId
]
->
[
PhyloPeriodId
]
getNextPeriods
to'
id
l
=
case
to'
of
getNextPeriods
to'
limit
id
l
=
case
to'
of
Descendant
->
(
tail
.
snd
)
next
Descendant
->
take
limit
$
(
tail
.
snd
)
next
Ascendant
->
(
reverse
.
fst
)
next
Ascendant
->
take
limit
$
(
reverse
.
fst
)
next
_
->
panic
(
"[ERR][Viz.Phylo.Example.getNextPeriods] Filiation type not defined"
)
_
->
panic
(
"[ERR][Viz.Phylo.Example.getNextPeriods] Filiation type not defined"
)
where
where
--------------------------------------
--------------------------------------
...
@@ -115,45 +96,102 @@ processProximity proximity nbDocs cooc cooc' ngrams ngrams' = case proximity of
...
@@ -115,45 +96,102 @@ processProximity proximity nbDocs cooc cooc' ngrams ngrams' = case proximity of
_
->
panic
"[ERR][Viz.Phylo.LinkMaker.processProximity] Unknown proximity"
_
->
panic
"[ERR][Viz.Phylo.LinkMaker.processProximity] Unknown proximity"
filterProximity
::
Double
->
Proximity
->
Bool
filterProximity
score
prox
=
case
prox
of
WeightedLogJaccard
(
WLJParams
thr
_
)
->
score
>=
thr
Hamming
(
HammingParams
thr
)
->
score
<=
thr
_
->
panic
"[ERR][Viz.Phylo.LinkMaker.filterProximity] Unknown proximity"
makePairs
::
[(
Date
,
Date
)]
->
PhyloGroup
->
Phylo
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs
prds
g
p
=
filter
(
\
pair
->
((
last'
"makePairs"
prds
)
==
(
getGroupPeriod
$
fst
pair
))
||
((
last'
"makePairs"
prds
)
==
(
getGroupPeriod
$
snd
pair
)))
$
listToPairs
$
filter
(
\
g'
->
(
elem
(
getGroupPeriod
g'
)
prds
)
&&
((
not
.
null
)
$
intersect
(
getGroupNgrams
g
)
(
getGroupNgrams
g'
))
&&
(((
last'
"makePairs"
prds
)
==
(
getGroupPeriod
g
))
||
((
matchWithPairs
g
(
g
,
g'
)
p
)
>=
(
getThreshold
$
getPhyloProximity
p
))))
$
getGroupsWithLevel
(
getGroupLevel
g
)
p
-- | Find the best candidates to be time-linked with a group g1 (recursively until the limit of periods is reached)
-- | Find the best candidates to be time-linked with a group g1 (recursively until the limit of periods is reached)
-- | 1) find the next periods and get the mini cooc matrix of g1
-- | 1) find the next periods and get the mini cooc matrix of g1
-- | 2) build the pairs of candidates (single groups or tuples)
-- | 2) build the pairs of candidates (single groups or tuples)
-- | 3) process the proximity mesure and select the best ones to create the pointers (ie: all the max)
-- | 3) process the proximity mesure and select the best ones to create the pointers (ie: all the max)
findBestCandidates
::
Filiation
->
Int
->
Int
->
Proximity
->
[(
Date
,
Date
)]
->
[
PhyloGroup
]
->
PhyloGroup
->
Phylo
->
([
Pointer
],[
Double
])
findBestCandidates
::
Filiation
->
Int
->
Int
->
Proximity
->
[(
Date
,
Date
)]
->
PhyloGroup
->
Phylo
->
([
Pointer
],[
Double
])
findBestCandidates
filiation
depth
limit
proximity
periods
candidates
g1
phylo
findBestCandidates
filiation
depth
limit
proximity
periods
g1
phylo
|
depth
>
limit
||
null
nextPeriods
=
(
[]
,
[]
)
|
depth
>
limit
||
null
nextPeriods
=
(
[]
,
[]
)
|
(
not
.
null
)
pointers
=
(
head'
"findBestCandidates"
$
groupBy
(
\
x
y
->
snd
x
==
snd
y
)
pointers
|
(
not
.
null
)
pointers
=
(
head'
"findBestCandidates"
$
groupBy
(
\
x
y
->
snd
x
==
snd
y
)
pointers
,
map
snd
similarities
)
,
map
snd
similarities
)
|
otherwise
=
findBestCandidates
filiation
(
depth
+
1
)
limit
proximity
periods
candidates
g1
phylo
|
otherwise
=
findBestCandidates
filiation
(
depth
+
1
)
limit
proximity
periods
g1
phylo
where
where
--------------------------------------
--------------------------------------
pointers
::
[(
PhyloGroupId
,
Double
)]
pointers
::
[(
PhyloGroupId
,
Double
)]
pointers
=
reverse
$
sortOn
snd
$
filter
(
\
(
_
,
score
)
->
case
proximity
of
pointers
=
reverse
$
sortOn
snd
$
filter
(
\
(
_
,
score
)
->
filterProximity
score
proximity
)
similarities
WeightedLogJaccard
(
WLJParams
thr
_
)
->
score
>=
thr
Hamming
(
HammingParams
thr
)
->
score
<=
thr
_
->
panic
"[ERR][Viz.Phylo.LinkMaker.findBestCandidates] Unknown proximity"
)
similarities
--------------------------------------
--------------------------------------
similarities
::
[(
PhyloGroupId
,
Double
)]
similarities
::
[(
PhyloGroupId
,
Double
)]
similarities
=
concat
$
map
(
\
(
g2
,
g3
)
->
let
nbDocs
=
periodsToNbDocs
[(
getGroupPeriod
g1
),(
getGroupPeriod
g2
),(
getGroupPeriod
g3
)]
phylo
similarities
=
concat
$
map
(
\
(
g2
,
g3
)
->
cooc'
=
unionWith
(
+
)
(
getGroupCooc
g2
)
(
getGroupCooc
g3
)
let
nbDocs
=
periodsToNbDocs
[(
getGroupPeriod
g1
),(
getGroupPeriod
g2
),(
getGroupPeriod
g3
)]
phylo
ngrams'
=
union
(
getGroupNgrams
g2
)
(
getGroupNgrams
g3
)
cooc'
=
if
(
g2
==
g3
)
score
=
processProximity
proximity
nbDocs
cooc
cooc'
ngrams
ngrams'
then
getGroupCooc
g2
in
nub
$
[(
getGroupId
g2
,
score
),(
getGroupId
g3
,
score
)])
pairsOfCandidates
else
unionWith
(
+
)
(
getGroupCooc
g2
)
(
getGroupCooc
g3
)
ngrams'
=
if
(
g2
==
g3
)
then
getGroupNgrams
g2
else
union
(
getGroupNgrams
g2
)
(
getGroupNgrams
g3
)
score
=
processProximity
proximity
nbDocs
(
getGroupCooc
g1
)
cooc'
(
getGroupNgrams
g1
)
ngrams'
in
if
(
g2
==
g3
)
then
[(
getGroupId
g2
,
score
)]
else
[(
getGroupId
g2
,
score
),(
getGroupId
g3
,
score
)]
)
pairsOfCandidates
--------------------------------------
--------------------------------------
pairsOfCandidates
::
[(
PhyloGroup
,
PhyloGroup
)]
pairsOfCandidates
::
[(
PhyloGroup
,
PhyloGroup
)]
pairsOfCandidates
=
listToFullCombi
$
filter
(
\
g
->
elem
(
getGroupPeriod
g
)
nextPeriods
)
candidates
pairsOfCandidates
=
makePairs
nextPeriods
g1
phylo
--------------------------------------
cooc
::
Map
(
Int
,
Int
)
Double
cooc
=
getGroupCooc
g1
--------------------------------------
ngrams
::
[
Int
]
ngrams
=
getGroupNgrams
g1
--------------------------------------
--------------------------------------
nextPeriods
::
[(
Date
,
Date
)]
nextPeriods
::
[(
Date
,
Date
)]
nextPeriods
=
take
depth
periods
nextPeriods
=
take
depth
periods
--------------------------------------
--------------------------------------
matchWithPairs
::
PhyloGroup
->
(
PhyloGroup
,
PhyloGroup
)
->
Phylo
->
Double
matchWithPairs
g1
(
g2
,
g3
)
p
=
let
nbDocs
=
periodsToNbDocs
[(
getGroupPeriod
g1
),(
getGroupPeriod
g2
),(
getGroupPeriod
g3
)]
p
cooc
=
if
(
g2
==
g3
)
then
getGroupCooc
g2
else
unionWith
(
+
)
(
getGroupCooc
g2
)
(
getGroupCooc
g3
)
ngrams
=
if
(
g2
==
g3
)
then
getGroupNgrams
g2
else
union
(
getGroupNgrams
g2
)
(
getGroupNgrams
g3
)
in
processProximity
(
getPhyloProximity
p
)
nbDocs
(
getGroupCooc
g1
)
cooc
(
getGroupNgrams
g1
)
ngrams
phyloGroupMatching
::
[
PhyloPeriodId
]
->
PhyloGroup
->
Phylo
->
[
Pointer
]
phyloGroupMatching
periods
g
p
=
case
pointers
of
Nothing
->
[]
Just
pts
->
head'
"phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
reverse
$
sortOn
snd
pts
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
where
--------------------------------------
pointers
::
Maybe
[
Pointer
]
pointers
=
find
(
not
.
null
)
-- | For each time frame, process the Proximity on relevant pairs of targeted groups
$
scanl
(
\
acc
frame
->
let
pairs
=
makePairs
frame
g
p
in
acc
++
(
filter
(
\
(
_
,
proxi
)
->
filterProximity
proxi
(
getPhyloProximity
p
))
$
concat
$
map
(
\
(
t
,
t'
)
->
let
proxi
=
matchWithPairs
g
(
t
,
t'
)
p
in
if
(
t
==
t'
)
then
[(
getGroupId
t
,
proxi
)]
else
[(
getGroupId
t
,
proxi
),(
getGroupId
t'
,
proxi
)]
)
pairs
)
)
[]
-- | [[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years
$
inits
periods
--------------------------------------
findBestCandidates'
::
Proximity
->
[
PhyloGroup
]
->
PhyloGroup
->
Phylo
->
[
Pointer
]
findBestCandidates'
::
Proximity
->
[
PhyloGroup
]
->
PhyloGroup
->
Phylo
->
[
Pointer
]
...
@@ -204,9 +242,10 @@ updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if ((getGroupLeve
...
@@ -204,9 +242,10 @@ updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if ((getGroupLeve
-- | Optimisation : to keep only the groups that have at least one ngrams in commons with the target
-- | Optimisation : to keep only the groups that have at least one ngrams in commons with the target
filterCandidates
::
PhyloGroup
->
[
PhyloGroup
]
->
[
PhyloGroup
]
initCandidates
::
PhyloGroup
->
[
PhyloPeriodId
]
->
[
PhyloGroup
]
->
[
PhyloGroup
]
filterCandidates
g
gs
=
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
getGroupNgrams
g
)
(
getGroupNgrams
g'
))
initCandidates
g
prds
gs
=
filter
(
\
g'
->
elem
(
getGroupPeriod
g'
)
prds
)
$
delete
g
gs
$
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
getGroupNgrams
g
)
(
getGroupNgrams
g'
))
$
delete
g
gs
-- | a init avec la [[head groups]] et la tail groups
-- | a init avec la [[head groups]] et la tail groups
...
@@ -236,26 +275,24 @@ toBranches mem gs
...
@@ -236,26 +275,24 @@ toBranches mem gs
-- | 2) for each branch, for each group find the best candidates (by Filiation and Proximity) and create the corresponding pointers
-- | 2) for each branch, for each group find the best candidates (by Filiation and Proximity) and create the corresponding pointers
-- | 3) update all the groups with the new pointers if they exist
-- | 3) update all the groups with the new pointers if they exist
interTempoMatching
::
Filiation
->
Level
->
Proximity
->
Phylo
->
Phylo
interTempoMatching
::
Filiation
->
Level
->
Proximity
->
Phylo
->
Phylo
interTempoMatching
fil
lvl
prox
p
=
traceMatching
fil
lvl
(
getThreshold
prox
)
debug
$
updateGroups
fil
lvl
pointersMap
p
interTempoMatching
fil
lvl
_
p
=
updateGroups
fil
lvl
(
Map
.
fromList
pointers
)
p
where
where
--------------------------------------
--------------------------------------
debug
::
[
Double
]
-- debug :: [Pointers
]
debug
=
sort
$
concat
$
map
(
snd
.
snd
)
pointers
-- debug = concat $ map (
snd) pointers
--------------------------------------
--------------------------------------
pointersMap
::
Map
PhyloGroupId
[
Pointer
]
--
pointersMap :: Map PhyloGroupId [Pointer]
pointersMap
=
Map
.
fromList
$
map
(
\
(
id
,
x
)
->
(
id
,
fst
x
))
pointers
--
pointersMap = Map.fromList $ map (\(id,x) -> (id,fst x)) pointers
--------------------------------------
--------------------------------------
pointers
::
[(
PhyloGroupId
,
([
Pointer
],[
Double
])
)]
pointers
::
[(
PhyloGroupId
,
[
Pointer
]
)]
pointers
=
concat
pointers
=
$
map
(
\
branche
->
let
pts
=
map
(
\
g
->
let
periods
=
getNextPeriods
fil
(
getPhyloMatchingFrame
p
)
(
getGroupPeriod
g
)
(
getPhyloPeriods
p
)
map
(
\
g
->
(
getGroupId
g
in
(
getGroupId
g
,
phyloGroupMatching
periods
g
p
))
groups
,
findBestCandidates
fil
1
(
getPhyloMatchingFrame
p
)
prox
(
getNextPeriods
fil
(
getGroupPeriod
g
)
(
getPhyloPeriods
p
))
(
filterCandidates
g
branche
)
g
p
)
pts'
=
pts
`
using
`
parList
rdeepseq
)
branche
)
branches
in
pts'
--------------------------------------
--------------------------------------
branches
::
[[
PhyloGroup
]]
groups
::
[
PhyloGroup
]
branches
=
tracePreBranches
groups
=
getGroupsWithLevel
lvl
p
$
toBranches
[[
head'
"interTempoMatching"
(
getGroupsWithLevel
lvl
p
)]]
$
tail
(
getGroupsWithLevel
lvl
p
)
--------------------------------------
--------------------------------------
...
@@ -265,12 +302,10 @@ interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) de
...
@@ -265,12 +302,10 @@ interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) de
toLevelUp
::
[
Pointer
]
->
Phylo
->
[
Pointer
]
toLevelUp
::
[
Pointer
]
->
Phylo
->
[
Pointer
]
toLevelUp
lst
p
=
Map
.
toList
toLevelUp
lst
p
=
Map
.
toList
$
map
(
\
ws
->
maximum
ws
)
$
map
(
\
ws
->
maximum
ws
)
$
fromListWith
(
++
)
[(
id
,
[
w
])
|
(
id
,
w
)
<-
pointers
]
$
fromListWith
(
++
)
[(
id
,
[
w
])
|
(
id
,
w
)
<-
where
let
pointers
=
map
(
\
(
id
,
v
)
->
(
getGroupLevelParentId
$
getGroupFromId
id
p
,
v
))
lst
--------------------------------------
pointers'
=
pointers
`
using
`
parList
rdeepseq
pointers
::
[
Pointer
]
in
pointers'
]
pointers
=
map
(
\
(
id
,
v
)
->
(
getGroupLevelParentId
$
getGroupFromId
id
p
,
v
))
lst
--------------------------------------
-- | Transpose the parent/child pointers from one level to another
-- | Transpose the parent/child pointers from one level to another
...
@@ -278,16 +313,14 @@ transposePeriodLinks :: Level -> Phylo -> Phylo
...
@@ -278,16 +313,14 @@ transposePeriodLinks :: Level -> Phylo -> Phylo
transposePeriodLinks
lvl
p
=
alterGroupWithLevel
transposePeriodLinks
lvl
p
=
alterGroupWithLevel
(
\
g
->
(
\
g
->
--------------------------------------
--------------------------------------
let
childs
=
getGroupsFromIds
(
map
fst
$
getGroupLevelChilds
g
)
p
let
ascLink
=
toLevelUp
(
getGroupPeriodParents
g
)
p
ascLink
=
toLevelUp
(
concat
$
map
getGroupPeriodParents
childs
)
p
desLink
=
toLevelUp
(
getGroupPeriodChilds
g
)
p
desLink
=
toLevelUp
(
concat
$
map
getGroupPeriodChilds
childs
)
p
--------------------------------------
--------------------------------------
in
g
&
phylo_groupPeriodParents
%~
(
++
ascLink
)
in
g
&
phylo_groupPeriodParents
.~
ascLink
&
phylo_groupPeriodChilds
%~
(
++
desLink
)
&
phylo_groupPeriodChilds
.~
desLink
--------------------------------------
--------------------------------------
)
lvl
p
)
lvl
p
----------------
----------------
-- | Tracer | --
-- | Tracer | --
----------------
----------------
...
@@ -301,6 +334,7 @@ traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered
...
@@ -301,6 +334,7 @@ traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered
<>
show
(
percentile
75
(
VS
.
fromList
lst
))
<>
" (75%) "
<>
show
(
percentile
75
(
VS
.
fromList
lst
))
<>
" (75%) "
<>
show
(
percentile
90
(
VS
.
fromList
lst
))
<>
" (90%)
\n
"
)
p
<>
show
(
percentile
90
(
VS
.
fromList
lst
))
<>
" (90%)
\n
"
)
p
tracePreBranches
::
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
tracePreBranches
::
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
tracePreBranches
bs
=
trace
(
show
(
length
bs
)
<>
" pre-branches"
<>
"
\n
"
tracePreBranches
bs
=
trace
(
show
(
length
bs
)
<>
" pre-branches"
<>
"
\n
"
<>
"with sizes : "
<>
show
(
map
length
bs
)
<>
"
\n
"
)
bs
<>
"with sizes : "
<>
show
(
map
length
bs
)
<>
"
\n
"
)
bs
...
...
src/Gargantext/Viz/Phylo/Metrics/Clustering.hs
View file @
815ab543
...
@@ -18,36 +18,20 @@ module Gargantext.Viz.Phylo.Metrics.Clustering
...
@@ -18,36 +18,20 @@ module Gargantext.Viz.Phylo.Metrics.Clustering
where
where
import
Data.Graph.Clustering.Louvain.CplusPlus
import
Data.Graph.Clustering.Louvain.CplusPlus
import
Data.List
(
last
,
concat
,
null
,
nub
,(
++
),
init
,
tail
,
elemIndex
,
groupBy
,(
!!
)
)
import
Data.List
(
concat
,
null
,
nub
,(
++
),
elemIndex
,
groupBy
,(
!!
),
(
\\
),
union
,
intersect
)
import
Data.Map
(
fromList
,
mapKeys
)
import
Data.Map
(
fromList
,
mapKeys
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
relatedComp
::
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
-- | To apply the related components method to a PhyloGraph
relatedComp
graphs
=
foldl'
(
\
mem
groups
->
-- curr = the current PhyloGroup
if
(
null
mem
)
-- (nodes,edges) = the initial PhyloGraph minus the current PhyloGroup
then
mem
++
[
groups
]
-- next = the next PhyloGroups to be added in the cluster
else
-- memo = the memory of the allready created clusters
let
related
=
filter
(
\
groups'
->
(
not
.
null
)
$
intersect
groups
groups'
)
mem
relatedComp
::
Int
->
PhyloGroup
->
GroupGraph
->
[
PhyloGroup
]
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
in
if
(
null
related
)
relatedComp
idx
curr
(
nodes
,
edges
)
next
memo
then
mem
++
[
groups
]
|
null
nodes'
&&
null
next'
=
memo'
else
(
mem
\\
related
)
++
[
union
groups
(
nub
$
concat
related
)]
)
[]
graphs
|
(
not
.
null
)
next'
=
relatedComp
idx
(
head'
"relatedComp1"
next'
)
(
nodes'
,
edges
)
(
tail
next'
)
memo'
|
otherwise
=
relatedComp
(
idx
+
1
)
(
head'
"relatedComp2"
nodes'
)
(
tail
nodes'
,
edges
)
[]
memo'
where
--------------------------------------
memo'
::
[[
PhyloGroup
]]
memo'
|
null
memo
=
[[
curr
]]
|
idx
==
((
length
memo
)
-
1
)
=
(
init
memo
)
++
[(
last
memo
)
++
[
curr
]]
|
otherwise
=
memo
++
[[
curr
]]
--------------------------------------
next'
::
[
PhyloGroup
]
next'
=
filter
(
\
x
->
not
$
elem
x
$
concat
memo
)
$
nub
$
next
++
(
getNeighbours
False
curr
edges
)
--------------------------------------
nodes'
::
[
PhyloGroup
]
nodes'
=
filter
(
\
x
->
not
$
elem
x
next'
)
nodes
--------------------------------------
louvain
::
([
GroupNode
],[
GroupEdge
])
->
IO
[[
PhyloGroup
]]
louvain
::
([
GroupNode
],[
GroupEdge
])
->
IO
[[
PhyloGroup
]]
...
...
src/Gargantext/Viz/Phylo/Tools.hs
View file @
815ab543
...
@@ -110,6 +110,13 @@ listToDirectedCombi :: Eq a => [a] -> [(a,a)]
...
@@ -110,6 +110,13 @@ listToDirectedCombi :: Eq a => [a] -> [(a,a)]
listToDirectedCombi
l
=
[(
x
,
y
)
|
x
<-
l
,
y
<-
l
,
x
/=
y
]
listToDirectedCombi
l
=
[(
x
,
y
)
|
x
<-
l
,
y
<-
l
,
x
/=
y
]
listToEqualCombi
::
Eq
a
=>
[
a
]
->
[(
a
,
a
)]
listToEqualCombi
l
=
[(
x
,
y
)
|
x
<-
l
,
y
<-
l
,
x
==
y
]
listToPairs
::
Eq
a
=>
[
a
]
->
[(
a
,
a
)]
listToPairs
l
=
(
listToEqualCombi
l
)
++
(
listToUnDirectedCombi
l
)
-- | To get all combinations of a list and apply a function to the resulting list of pairs
-- | To get all combinations of a list and apply a function to the resulting list of pairs
listToDirectedCombiWith
::
Eq
a
=>
forall
b
.
(
a
->
b
)
->
[
a
]
->
[(
b
,
b
)]
listToDirectedCombiWith
::
Eq
a
=>
forall
b
.
(
a
->
b
)
->
[
a
]
->
[(
b
,
b
)]
listToDirectedCombiWith
f
l
=
[(
f
x
,
f
y
)
|
x
<-
l
,
y
<-
l
,
x
/=
y
]
listToDirectedCombiWith
f
l
=
[(
f
x
,
f
y
)
|
x
<-
l
,
y
<-
l
,
x
/=
y
]
...
@@ -195,6 +202,9 @@ getPhyloDescription p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
...
@@ -195,6 +202,9 @@ getPhyloDescription p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
getPhyloMatchingFrame
::
Phylo
->
Int
getPhyloMatchingFrame
::
Phylo
->
Int
getPhyloMatchingFrame
p
=
_q_interTemporalMatchingFrame
$
_phyloParam_query
$
getPhyloParams
p
getPhyloMatchingFrame
p
=
_q_interTemporalMatchingFrame
$
_phyloParam_query
$
getPhyloParams
p
getPhyloProximity
::
Phylo
->
Proximity
getPhyloProximity
p
=
_q_interTemporalMatching
$
_phyloParam_query
$
getPhyloParams
p
getPhyloReBranchThr
::
Phylo
->
Double
getPhyloReBranchThr
::
Phylo
->
Double
getPhyloReBranchThr
p
=
_q_reBranchThr
$
_phyloParam_query
$
getPhyloParams
p
getPhyloReBranchThr
p
=
_q_reBranchThr
$
_phyloParam_query
$
getPhyloParams
p
...
@@ -243,7 +253,7 @@ alterGroupWithLevel f lvl p = over ( phylo_periods
...
@@ -243,7 +253,7 @@ alterGroupWithLevel f lvl p = over ( phylo_periods
.
traverse
.
traverse
)
(
\
g
->
if
getGroupLevel
g
==
lvl
)
(
\
g
->
if
getGroupLevel
g
==
lvl
then
f
g
then
f
g
else
g
)
p
else
g
)
p
-- | To alter each list of PhyloGroups following a given function
-- | To alter each list of PhyloGroups following a given function
...
...
stack.yaml
View file @
815ab543
...
@@ -35,6 +35,7 @@ extra-deps:
...
@@ -35,6 +35,7 @@ extra-deps:
-
KMP-0.1.0.2
-
KMP-0.1.0.2
-
accelerate-1.2.0.0
-
accelerate-1.2.0.0
-
aeson-lens-0.5.0.0
-
aeson-lens-0.5.0.0
-
deepseq-th-0.1.0.4
-
duckling-0.1.3.0
-
duckling-0.1.3.0
-
full-text-search-0.2.1.4
-
full-text-search-0.2.1.4
-
fullstop-0.1.4
-
fullstop-0.1.4
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment