Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
815ab543
Commit
815ab543
authored
Jun 14, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add parallelism
parent
7832afe9
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
211 additions
and
169 deletions
+211
-169
package.yaml
package.yaml
+2
-0
Phylo.hs
src/Gargantext/Viz/Phylo.hs
+6
-2
Cluster.hs
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
+14
-12
BranchMaker.hs
src/Gargantext/Viz/Phylo/BranchMaker.hs
+21
-31
Example.hs
src/Gargantext/Viz/Phylo/Example.hs
+1
-1
LevelMaker.hs
src/Gargantext/Viz/Phylo/LevelMaker.hs
+28
-13
LinkMaker.hs
src/Gargantext/Viz/Phylo/LinkMaker.hs
+116
-82
Clustering.hs
src/Gargantext/Viz/Phylo/Metrics/Clustering.hs
+11
-27
Tools.hs
src/Gargantext/Viz/Phylo/Tools.hs
+11
-1
stack.yaml
stack.yaml
+1
-0
No files found.
package.yaml
View file @
815ab543
...
...
@@ -103,6 +103,7 @@ library:
-
contravariant
-
crawlerPubMed
-
data-time-segment
-
deepseq
-
directory
-
duckling
-
exceptions
...
...
@@ -138,6 +139,7 @@ library:
-
natural-transformation
-
opaleye
-
pandoc
-
parallel
-
parsec
-
patches-class
-
patches-map
...
...
src/Gargantext/Viz/Phylo.hs
View file @
815ab543
...
...
@@ -22,7 +22,7 @@ one 8, e54847.
-}
{-# LANGUAGE DeriveGeneric
#-}
{-# LANGUAGE DeriveGeneric
, DeriveAnyClass
#-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE MultiParamTypeClasses #-}
...
...
@@ -44,6 +44,8 @@ import Gargantext.Core.Utils.Prefix (unPrefix)
import
Gargantext.Text.Context
(
TermList
)
import
Gargantext.Prelude
import
Control.DeepSeq
--------------------
-- | PhyloParam | --
--------------------
...
...
@@ -161,7 +163,9 @@ data PhyloGroup =
,
_phylo_groupLevelParents
::
[
Pointer
]
,
_phylo_groupLevelChilds
::
[
Pointer
]
}
deriving
(
Generic
,
Show
,
Eq
,
Ord
)
deriving
(
Generic
,
NFData
,
Show
,
Eq
,
Ord
)
-- instance NFData PhyloGroup
-- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
...
...
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
View file @
815ab543
...
...
@@ -13,11 +13,13 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Viz.Phylo.Aggregates.Cluster
where
import
Data.List
(
null
,
tail
,
concat
,
sort
,
intersect
)
import
Control.Parallel.Strategies
import
Data.List
(
null
,
concat
,
sort
,
intersect
,(
++
))
import
Data.Map
(
Map
)
import
Data.Tuple
(
fst
)
import
Gargantext.Prelude
...
...
@@ -44,17 +46,19 @@ getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g
graphToClusters
::
Cluster
->
GroupGraph
->
[
PhyloCluster
]
graphToClusters
clust
(
nodes
,
edges
)
=
case
clust
of
Louvain
(
LouvainParams
_
)
->
undefined
RelatedComponents
(
RCParams
_
)
->
relatedComp
0
(
head'
"graphToClusters"
nodes
)
(
tail
nodes
,
edges
)
[]
[]
RelatedComponents
(
RCParams
_
)
->
relatedComp
$
((
map
(
\
((
g
,
g'
),
_
)
->
[
g
,
g'
])
edges
)
++
(
map
(
\
g
->
[
g
])
nodes
))
_
->
panic
"[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
-- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph
::
Double
->
Proximity
->
[
PhyloGroup
]
->
([
GroupNode
],[
GroupEdge
])
groupsToGraph
nbDocs
prox
gs
=
case
prox
of
WeightedLogJaccard
(
WLJParams
_
sens
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
weightedLogJaccard
sens
nbDocs
(
getGroupCooc
x
)
(
getGroupCooc
y
)
(
getGroupNgrams
x
)
(
getGroupNgrams
y
)))
$
getCandidates
gs
)
WeightedLogJaccard
(
WLJParams
_
sens
)
->
(
gs
,
let
candidates
=
map
(
\
(
x
,
y
)
->
((
x
,
y
),
weightedLogJaccard
sens
nbDocs
(
getGroupCooc
x
)
(
getGroupCooc
y
)
(
getGroupNgrams
x
)
(
getGroupNgrams
y
)))
$
getCandidates
gs
candidates'
=
candidates
`
using
`
parList
rdeepseq
in
candidates'
)
Hamming
(
HammingParams
_
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
hamming
(
getGroupCooc
x
)
(
getGroupCooc
y
)))
$
getCandidates
gs
)
_
->
undefined
_
->
undefined
-- | To filter a Graph of Proximity using a given threshold
...
...
@@ -78,9 +82,11 @@ phyloToClusters lvl clus p = Map.fromList
graphs'
=
traceGraphFiltered
lvl
$
map
(
\
g
->
filterGraph
prox
g
)
graphs
--------------------------------------
graphs
::
[([
GroupNode
],[
GroupEdge
])]
graphs
=
traceGraph
lvl
(
getThreshold
prox
)
$
map
(
\
prd
->
groupsToGraph
(
periodsToNbDocs
[
prd
]
p
)
prox
(
getGroupsWithFilters
lvl
prd
p
))
periods
graphs
::
[([
GroupNode
],[
GroupEdge
])]
graphs
=
traceGraph
lvl
(
getThreshold
prox
)
$
let
gs
=
map
(
\
prd
->
groupsToGraph
(
periodsToNbDocs
[
prd
]
p
)
prox
(
getGroupsWithFilters
lvl
prd
p
))
periods
gs'
=
gs
`
using
`
parList
rdeepseq
in
gs'
--------------------------------------
prox
::
Proximity
prox
=
getProximity
clus
...
...
@@ -115,7 +121,3 @@ traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <>
<>
show
(
percentile
90
(
VS
.
fromList
lst
))
<>
" (90%)
\n
"
)
g
where
lst
=
sort
$
map
snd
$
concat
$
map
snd
g
-- traceSim :: PhyloGroup -> PhyloGroup -> Phylo -> Double -> Double
-- traceSim g g' p sim = trace (show (getGroupText g p) <> " [vs] " <> show (getGroupText g' p) <> " = " <> show (sim) <> "\n") sim
src/Gargantext/Viz/Phylo/BranchMaker.hs
View file @
815ab543
...
...
@@ -17,6 +17,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.BranchMaker
where
import
Control.Parallel.Strategies
import
Control.Lens
hiding
(
both
,
Level
)
import
Data.List
(
concat
,
nub
,(
++
),
tail
,
sortOn
,
take
,
reverse
,
sort
,
null
,
intersect
,
union
)
import
Data.Map
(
Map
)
...
...
@@ -82,13 +83,14 @@ findSimBranches frame thr nth p (id,gs) bs
pks
=
getGroupsPeaks
gs
nth
p
--------------------------------------
findBestPointer
::
Phylo
->
Proximity
->
[
PhyloGroup
]
->
[
PhyloGroup
]
->
[(
PhyloGroupId
,
Pointer
)]
findBestPointer
p
prox
gs
gs'
=
take
1
$
reverse
$
sortOn
(
snd
.
snd
)
$
concat
$
map
(
\
g
->
let
pts
=
findBestCandidates'
prox
gs'
g
p
in
map
(
\
pt
->
(
getGroupId
g
,
pt
))
pts
)
gs
findBestPointer
p
prox
gs
gs'
=
let
candidates
=
map
(
\
g
->
let
pts
=
findBestCandidates'
prox
gs'
g
p
in
map
(
\
pt
->
(
getGroupId
g
,
pt
))
pts
)
gs
candidates'
=
candidates
`
using
`
parList
rdeepseq
in
take
1
$
reverse
$
sortOn
(
snd
.
snd
)
$
concat
candidates'
makeBranchLinks
::
Phylo
->
Proximity
->
(
PhyloBranchId
,[
PhyloGroup
])
->
[(
PhyloBranchId
,[
PhyloGroup
])]
->
[(
PhyloGroupId
,
Pointer
)]
->
[(
PhyloGroupId
,
Pointer
)]
makeBranchLinks
p
prox
(
id
,
gs
)
bs
pts
...
...
@@ -126,34 +128,22 @@ linkPhyloBranches lvl prox p = setPhyloBranches lvl
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
graphToBranches
::
Level
->
GroupGraph
->
Phylo
->
[(
Int
,
PhyloGroupId
)]
graphToBranches
_lvl
(
nodes
,
edges
)
_p
=
concat
$
map
(
\
(
idx
,
gs
)
->
map
(
\
g
->
(
idx
,
getGroupId
g
))
gs
)
$
zip
[
1
..
]
$
relatedComp
0
(
head'
"branchMaker"
nodes
)
(
tail
nodes
,
edges
)
[]
[]
-- | To build a graph using the parents and childs pointers
makeGraph
::
[
PhyloGroup
]
->
Phylo
->
GroupGraph
makeGraph
gs
p
=
(
gs
,
edges
)
where
edges
::
[
GroupEdge
]
edges
=
(
nub
.
concat
)
$
map
(
\
g
->
(
map
(
\
g'
->
((
g'
,
g
),
1
))
$
getGroupParents
g
p
)
++
(
map
(
\
g'
->
((
g
,
g'
),
1
))
$
getGroupChilds
g
p
))
gs
graphToBranches
::
[
PhyloGroup
]
->
Phylo
->
[(
Int
,
PhyloGroupId
)]
graphToBranches
groups
p
=
concat
$
map
(
\
(
idx
,
gs
)
->
map
(
\
g
->
(
idx
,
getGroupId
g
))
gs
)
$
zip
[
1
..
]
$
relatedComp
$
map
(
\
g
->
nub
$
[
g
]
++
(
getGroupParents
g
p
)
++
(
getGroupChilds
g
p
))
groups
-- | To set all the PhyloBranches for a given Level in a Phylo
setPhyloBranches
::
Level
->
Phylo
->
Phylo
setPhyloBranches
lvl
p
=
alterGroupWithLevel
(
\
g
->
let
bIdx
=
(
fst
$
head'
"branchMaker"
$
filter
(
\
b
->
snd
b
==
getGroupId
g
)
bs
)
in
over
(
phylo_groupBranchId
)
(
\
_
->
Just
(
lvl
,
bIdx
))
g
)
lvl
p
where
--------------------------------------
bs
::
[(
Int
,
PhyloGroupId
)]
bs
=
graphToBranches
lvl
graph
p
setPhyloBranches
lvl
p
=
alterGroupWithLevel
(
\
g
->
let
bIdx
=
(
fst
$
head'
"branchMaker"
$
filter
(
\
b
->
snd
b
==
getGroupId
g
)
branches
)
in
over
(
phylo_groupBranchId
)
(
\
_
->
Just
(
lvl
,
bIdx
))
g
)
lvl
p
where
--------------------------------------
graph
::
GroupGraph
graph
=
makeGraph
(
getGroupsWithLevel
lvl
p
)
p
branches
::
[(
Int
,
PhyloGroupId
)]
branches
=
graphToBranches
(
getGroupsWithLevel
lvl
p
)
p
--------------------------------------
src/Gargantext/Viz/Phylo/Example.hs
View file @
815ab543
...
...
@@ -105,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild
::
PhyloQueryBuild
phyloQueryBuild
=
PhyloQueryBuild
"Cesar et Cleôpatre"
"An example of Phylomemy (french without accent)"
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
1
20
)
5
0.5
4
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.3
0
)
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
6
20
)
5
0.5
4
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.4
0
)
...
...
src/Gargantext/Viz/Phylo/LevelMaker.hs
View file @
815ab543
...
...
@@ -19,6 +19,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.LevelMaker
where
import
Control.Parallel.Strategies
import
Control.Lens
hiding
(
both
,
Level
)
import
Data.List
((
++
),
sort
,
concat
,
nub
,
zip
,
last
)
import
Data.Map
(
Map
,
(
!
),
empty
,
singleton
)
...
...
@@ -61,7 +62,10 @@ instance PhyloLevelMaker PhyloCluster
|
otherwise
=
panic
(
"[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2"
)
--------------------------------------
-- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
toPhyloGroups
lvl
(
d
,
d'
)
l
m
p
=
map
(
\
(
idx
,
cluster
)
->
clusterToGroup
(
d
,
d'
)
lvl
idx
""
cluster
m
p
)
$
zip
[
1
..
]
l
toPhyloGroups
lvl
(
d
,
d'
)
l
m
p
=
let
clusters
=
map
(
\
(
idx
,
cluster
)
->
clusterToGroup
(
d
,
d'
)
lvl
idx
""
cluster
m
p
)
$
zip
[
1
..
]
l
clusters'
=
clusters
`
using
`
parList
rdeepseq
in
clusters'
--------------------------------------
...
...
@@ -74,7 +78,10 @@ instance PhyloLevelMaker PhyloFis
|
otherwise
=
panic
(
"[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1"
)
--------------------------------------
-- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups
lvl
(
d
,
d'
)
l
_
p
=
map
(
\
(
idx
,
fis
)
->
cliqueToGroup
(
d
,
d'
)
lvl
idx
""
fis
p
)
$
zip
[
1
..
]
l
toPhyloGroups
lvl
(
d
,
d'
)
l
_
p
=
let
groups
=
map
(
\
(
idx
,
fis
)
->
cliqueToGroup
(
d
,
d'
)
lvl
idx
""
fis
p
)
$
zip
[
1
..
]
l
groups'
=
groups
`
using
`
parList
rdeepseq
in
groups'
--------------------------------------
...
...
@@ -87,8 +94,7 @@ instance PhyloLevelMaker Document
|
otherwise
=
panic
(
"[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0"
)
--------------------------------------
-- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups
lvl
(
d
,
d'
)
l
_m
p
=
map
(
\
(
idx
,
ngram
)
->
ngramsToGroup
(
d
,
d'
)
lvl
idx
ngram
[
ngram
]
p
)
$
zip
[
1
..
]
toPhyloGroups
lvl
(
d
,
d'
)
l
_m
p
=
map
(
\
ngram
->
ngramsToGroup
(
d
,
d'
)
lvl
(
getIdxInRoots
ngram
p
)
ngram
[
ngram
]
p
)
$
(
nub
.
concat
)
$
map
text
l
--------------------------------------
...
...
@@ -100,11 +106,13 @@ clusterToGroup prd lvl idx lbl groups _m p =
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
empty
Nothing
(
getMiniCooc
(
listToFullCombi
ngrams
)
(
periodsToYears
[
prd
])
(
getPhyloCooc
p
))
[]
[]
[]
childs
ascLink
desLink
[]
childs
where
--------------------------------------
childs
::
[
Pointer
]
childs
=
map
(
\
g
->
(
getGroupId
g
,
1
))
groups
ascLink
=
concat
$
map
getGroupPeriodParents
groups
desLink
=
concat
$
map
getGroupPeriodChilds
groups
--------------------------------------
ngrams
::
[
Int
]
ngrams
=
(
sort
.
nub
.
concat
)
$
map
getGroupNgrams
groups
...
...
@@ -116,7 +124,7 @@ cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> P
cliqueToGroup
prd
lvl
idx
lbl
fis
p
=
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
(
singleton
"support"
(
fromIntegral
$
getSupport
fis
))
Nothing
(
getMiniCooc
(
listToFullCombi
ngrams
)
(
periodsToYears
[
prd
])
(
getPhyloCooc
p
))
[]
[]
[]
[]
[]
[]
[]
childs
where
--------------------------------------
ngrams
::
[
Int
]
...
...
@@ -124,6 +132,9 @@ cliqueToGroup prd lvl idx lbl fis p =
$
Set
.
toList
$
getClique
fis
--------------------------------------
childs
::
[
Pointer
]
childs
=
map
(
\
n
->
(((
prd
,
lvl
-
1
),
n
),
1
))
ngrams
--------------------------------------
-- | To transform a list of Ngrams into a PhyloGroup
...
...
@@ -152,6 +163,7 @@ toNthLevel lvlMax prox clus p
$
traceBranches
(
lvl
+
1
)
$
setPhyloBranches
(
lvl
+
1
)
$
transposePeriodLinks
(
lvl
+
1
)
$
tracePhyloN
(
lvl
+
1
)
$
setLevelLinks
(
lvl
,
lvl
+
1
)
$
addPhyloLevel
(
lvl
+
1
)
(
clusters
)
p
...
...
@@ -169,15 +181,15 @@ toNthLevel lvlMax prox clus p
toPhylo1
::
Cluster
->
Proximity
->
Map
(
Date
,
Date
)
[
Document
]
->
Phylo
->
Phylo
toPhylo1
clus
prox
d
p
=
case
clus
of
Fis
(
FisParams
k
s
t
)
->
traceReBranches
1
$
linkPhyloBranches
1
prox
--
$ linkPhyloBranches 1 prox
$
traceBranches
1
$
setPhyloBranches
1
$
traceTempoMatching
Descendant
1
$
interTempoMatching
Descendant
1
prox
$
traceTempoMatching
Ascendant
1
$
interTempoMatching
Ascendant
1
prox
$
tracePhylo1
$
setLevelLinks
(
0
,
1
)
$
setLevelLinks
(
1
,
0
)
$
addPhyloLevel
1
phyloFis
phylo'
where
--------------------------------------
...
...
@@ -212,7 +224,7 @@ instance PhyloMaker [(Date, Text)]
phylo1
=
toPhylo1
(
getContextualUnit
q
)
(
getInterTemporalMatching
q
)
phyloDocs
phylo0
--------------------------------------
phylo0
::
Phylo
phylo0
=
toPhylo0
phyloDocs
phyloBase
phylo0
=
t
racePhylo0
$
t
oPhylo0
phyloDocs
phyloBase
--------------------------------------
phyloDocs
::
Map
(
Date
,
Date
)
[
Document
]
phyloDocs
=
corpusToDocs
c
phyloBase
...
...
@@ -251,7 +263,7 @@ instance PhyloMaker [Document]
phylo1
=
toPhylo1
(
getContextualUnit
q
)
(
getInterTemporalMatching
q
)
phyloDocs
phylo0
--------------------------------------
phylo0
::
Phylo
phylo0
=
toPhylo0
phyloDocs
phyloBase
phylo0
=
t
racePhylo0
$
t
oPhylo0
phyloDocs
phyloBase
--------------------------------------
phyloDocs
::
Map
(
Date
,
Date
)
[
Document
]
phyloDocs
=
corpusToDocs
c
phyloBase
...
...
@@ -286,13 +298,16 @@ instance PhyloMaker [Document]
tracePhylo0
::
Phylo
->
Phylo
tracePhylo0
p
=
trace
(
"
\n
---------------
\n
--| Phylo 0 |--
\n
---------------
\n\n
"
)
p
tracePhylo0
p
=
trace
(
"
\n
---------------
\n
--| Phylo 0 |--
\n
---------------
\n\n
"
<>
show
(
length
$
getGroupsWithLevel
0
p
)
<>
" groups created
\n
"
)
p
tracePhylo1
::
Phylo
->
Phylo
tracePhylo1
p
=
trace
(
"
\n
---------------
\n
--| Phylo 1 |--
\n
---------------
\n\n
"
)
p
tracePhylo1
p
=
trace
(
"
\n
---------------
\n
--| Phylo 1 |--
\n
---------------
\n\n
"
<>
show
(
length
$
getGroupsWithLevel
1
p
)
<>
" groups created
\n
"
)
p
tracePhyloN
::
Level
->
Phylo
->
Phylo
tracePhyloN
lvl
p
=
trace
(
"
\n
---------------
\n
--| Phylo "
<>
show
(
lvl
)
<>
" |--
\n
---------------
\n\n
"
)
p
tracePhyloN
lvl
p
=
trace
(
"
\n
---------------
\n
--| Phylo "
<>
show
(
lvl
)
<>
" |--
\n
---------------
\n\n
"
<>
show
(
length
$
getGroupsWithLevel
lvl
p
)
<>
" groups created
\n
"
)
p
tracePhyloBase
::
Phylo
->
Phylo
...
...
src/Gargantext/Viz/Phylo/LinkMaker.hs
View file @
815ab543
...
...
@@ -17,8 +17,9 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.LinkMaker
where
import
Control.Parallel.Strategies
import
Control.Lens
hiding
(
both
,
Level
)
import
Data.List
((
++
),
sortOn
,
null
,
tail
,
splitAt
,
elem
,
concat
,
sort
,
delete
,
intersect
,
nub
,
groupBy
,
union
)
import
Data.List
((
++
),
sortOn
,
null
,
tail
,
splitAt
,
elem
,
concat
,
delete
,
intersect
,
nub
,
groupBy
,
union
,
inits
,
scanl
,
find
)
import
Data.Tuple.Extra
import
Data.Map
(
Map
,(
!
),
fromListWith
,
elems
,
restrictKeys
,
unionWith
,
member
)
import
Gargantext.Prelude
...
...
@@ -38,43 +39,23 @@ import Numeric.Statistics (percentile)
-----------------------------
-- | To choose a LevelLink strategy based an a given Level
shouldLink
::
(
Level
,
Level
)
->
PhyloGroup
->
PhyloGroup
->
Bool
shouldLink
(
lvl
,
lvl'
)
g
g'
|
(
lvl
<=
1
)
&&
(
lvl'
<=
1
)
=
doesContainsOrd
(
getGroupNgrams
g
)
(
getGroupNgrams
g'
)
|
otherwise
=
elem
(
getGroupId
g
)
(
getGroupLevelChildsId
g'
)
-- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
linkGroupToGroups
::
(
Level
,
Level
)
->
PhyloGroup
->
[
PhyloGroup
]
->
PhyloGroup
linkGroupToGroups
(
lvl
,
lvl'
)
current
targets
|
lvl
<
lvl'
=
setLevelParents
current
|
lvl
>
lvl'
=
setLevelChilds
current
|
otherwise
=
current
linkGroupToGroups
::
PhyloGroup
->
[
PhyloGroup
]
->
PhyloGroup
linkGroupToGroups
current
targets
=
over
(
phylo_groupLevelParents
)
addPointers
current
where
--------------------------------------
setLevelChilds
::
PhyloGroup
->
PhyloGroup
setLevelChilds
=
over
(
phylo_groupLevelChilds
)
addPointers
--------------------------------------
setLevelParents
::
PhyloGroup
->
PhyloGroup
setLevelParents
=
over
(
phylo_groupLevelParents
)
addPointers
--------------------------------------
addPointers
::
[
Pointer
]
->
[
Pointer
]
addPointers
lp
=
lp
++
Maybe
.
mapMaybe
(
\
target
->
if
shouldLink
(
lvl
,
lvl'
)
current
target
if
(
elem
(
getGroupId
current
)
(
getGroupLevelChildsId
target
))
then
Just
((
getGroupId
target
),
1
)
else
Nothing
)
targets
--------------------------------------
-- | To set the LevelLink of all the PhyloGroups of a Phylo
setLevelLinks
::
(
Level
,
Level
)
->
Phylo
->
Phylo
setLevelLinks
(
lvl
,
lvl'
)
p
=
alterPhyloGroups
(
\
groups
->
map
(
\
group
->
if
getGroupLevel
group
==
lvl
then
linkGroupToGroups
(
lvl
,
lvl'
)
group
$
filterCandidates
group
$
getGroupsWithFilters
lvl'
(
getGroupPeriod
group
)
p
else
group
)
groups
)
p
setLevelLinks
(
lvl
,
lvl'
)
p
=
alterGroupWithLevel
(
\
group
->
linkGroupToGroups
group
$
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
getGroupNgrams
group
)
(
getGroupNgrams
g'
))
$
getGroupsWithFilters
lvl'
(
getGroupPeriod
group
)
p
)
lvl
p
-------------------------------
...
...
@@ -83,10 +64,10 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (\groups ->
-- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
getNextPeriods
::
Filiation
->
PhyloPeriodId
->
[
PhyloPeriodId
]
->
[
PhyloPeriodId
]
getNextPeriods
to'
id
l
=
case
to'
of
Descendant
->
(
tail
.
snd
)
next
Ascendant
->
(
reverse
.
fst
)
next
getNextPeriods
::
Filiation
->
Int
->
PhyloPeriodId
->
[
PhyloPeriodId
]
->
[
PhyloPeriodId
]
getNextPeriods
to'
limit
id
l
=
case
to'
of
Descendant
->
take
limit
$
(
tail
.
snd
)
next
Ascendant
->
take
limit
$
(
reverse
.
fst
)
next
_
->
panic
(
"[ERR][Viz.Phylo.Example.getNextPeriods] Filiation type not defined"
)
where
--------------------------------------
...
...
@@ -115,45 +96,102 @@ processProximity proximity nbDocs cooc cooc' ngrams ngrams' = case proximity of
_
->
panic
"[ERR][Viz.Phylo.LinkMaker.processProximity] Unknown proximity"
filterProximity
::
Double
->
Proximity
->
Bool
filterProximity
score
prox
=
case
prox
of
WeightedLogJaccard
(
WLJParams
thr
_
)
->
score
>=
thr
Hamming
(
HammingParams
thr
)
->
score
<=
thr
_
->
panic
"[ERR][Viz.Phylo.LinkMaker.filterProximity] Unknown proximity"
makePairs
::
[(
Date
,
Date
)]
->
PhyloGroup
->
Phylo
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs
prds
g
p
=
filter
(
\
pair
->
((
last'
"makePairs"
prds
)
==
(
getGroupPeriod
$
fst
pair
))
||
((
last'
"makePairs"
prds
)
==
(
getGroupPeriod
$
snd
pair
)))
$
listToPairs
$
filter
(
\
g'
->
(
elem
(
getGroupPeriod
g'
)
prds
)
&&
((
not
.
null
)
$
intersect
(
getGroupNgrams
g
)
(
getGroupNgrams
g'
))
&&
(((
last'
"makePairs"
prds
)
==
(
getGroupPeriod
g
))
||
((
matchWithPairs
g
(
g
,
g'
)
p
)
>=
(
getThreshold
$
getPhyloProximity
p
))))
$
getGroupsWithLevel
(
getGroupLevel
g
)
p
-- | Find the best candidates to be time-linked with a group g1 (recursively until the limit of periods is reached)
-- | 1) find the next periods and get the mini cooc matrix of g1
-- | 2) build the pairs of candidates (single groups or tuples)
-- | 3) process the proximity mesure and select the best ones to create the pointers (ie: all the max)
findBestCandidates
::
Filiation
->
Int
->
Int
->
Proximity
->
[(
Date
,
Date
)]
->
[
PhyloGroup
]
->
PhyloGroup
->
Phylo
->
([
Pointer
],[
Double
])
findBestCandidates
filiation
depth
limit
proximity
periods
candidates
g1
phylo
findBestCandidates
::
Filiation
->
Int
->
Int
->
Proximity
->
[(
Date
,
Date
)]
->
PhyloGroup
->
Phylo
->
([
Pointer
],[
Double
])
findBestCandidates
filiation
depth
limit
proximity
periods
g1
phylo
|
depth
>
limit
||
null
nextPeriods
=
(
[]
,
[]
)
|
(
not
.
null
)
pointers
=
(
head'
"findBestCandidates"
$
groupBy
(
\
x
y
->
snd
x
==
snd
y
)
pointers
,
map
snd
similarities
)
|
otherwise
=
findBestCandidates
filiation
(
depth
+
1
)
limit
proximity
periods
candidates
g1
phylo
|
otherwise
=
findBestCandidates
filiation
(
depth
+
1
)
limit
proximity
periods
g1
phylo
where
--------------------------------------
pointers
::
[(
PhyloGroupId
,
Double
)]
pointers
=
reverse
$
sortOn
snd
$
filter
(
\
(
_
,
score
)
->
case
proximity
of
WeightedLogJaccard
(
WLJParams
thr
_
)
->
score
>=
thr
Hamming
(
HammingParams
thr
)
->
score
<=
thr
_
->
panic
"[ERR][Viz.Phylo.LinkMaker.findBestCandidates] Unknown proximity"
)
similarities
pointers
=
reverse
$
sortOn
snd
$
filter
(
\
(
_
,
score
)
->
filterProximity
score
proximity
)
similarities
--------------------------------------
similarities
::
[(
PhyloGroupId
,
Double
)]
similarities
=
concat
$
map
(
\
(
g2
,
g3
)
->
let
nbDocs
=
periodsToNbDocs
[(
getGroupPeriod
g1
),(
getGroupPeriod
g2
),(
getGroupPeriod
g3
)]
phylo
cooc'
=
unionWith
(
+
)
(
getGroupCooc
g2
)
(
getGroupCooc
g3
)
ngrams'
=
union
(
getGroupNgrams
g2
)
(
getGroupNgrams
g3
)
score
=
processProximity
proximity
nbDocs
cooc
cooc'
ngrams
ngrams'
in
nub
$
[(
getGroupId
g2
,
score
),(
getGroupId
g3
,
score
)])
pairsOfCandidates
similarities
=
concat
$
map
(
\
(
g2
,
g3
)
->
let
nbDocs
=
periodsToNbDocs
[(
getGroupPeriod
g1
),(
getGroupPeriod
g2
),(
getGroupPeriod
g3
)]
phylo
cooc'
=
if
(
g2
==
g3
)
then
getGroupCooc
g2
else
unionWith
(
+
)
(
getGroupCooc
g2
)
(
getGroupCooc
g3
)
ngrams'
=
if
(
g2
==
g3
)
then
getGroupNgrams
g2
else
union
(
getGroupNgrams
g2
)
(
getGroupNgrams
g3
)
score
=
processProximity
proximity
nbDocs
(
getGroupCooc
g1
)
cooc'
(
getGroupNgrams
g1
)
ngrams'
in
if
(
g2
==
g3
)
then
[(
getGroupId
g2
,
score
)]
else
[(
getGroupId
g2
,
score
),(
getGroupId
g3
,
score
)]
)
pairsOfCandidates
--------------------------------------
pairsOfCandidates
::
[(
PhyloGroup
,
PhyloGroup
)]
pairsOfCandidates
=
listToFullCombi
$
filter
(
\
g
->
elem
(
getGroupPeriod
g
)
nextPeriods
)
candidates
--------------------------------------
cooc
::
Map
(
Int
,
Int
)
Double
cooc
=
getGroupCooc
g1
--------------------------------------
ngrams
::
[
Int
]
ngrams
=
getGroupNgrams
g1
pairsOfCandidates
=
makePairs
nextPeriods
g1
phylo
--------------------------------------
nextPeriods
::
[(
Date
,
Date
)]
nextPeriods
=
take
depth
periods
--------------------------------------
--------------------------------------
matchWithPairs
::
PhyloGroup
->
(
PhyloGroup
,
PhyloGroup
)
->
Phylo
->
Double
matchWithPairs
g1
(
g2
,
g3
)
p
=
let
nbDocs
=
periodsToNbDocs
[(
getGroupPeriod
g1
),(
getGroupPeriod
g2
),(
getGroupPeriod
g3
)]
p
cooc
=
if
(
g2
==
g3
)
then
getGroupCooc
g2
else
unionWith
(
+
)
(
getGroupCooc
g2
)
(
getGroupCooc
g3
)
ngrams
=
if
(
g2
==
g3
)
then
getGroupNgrams
g2
else
union
(
getGroupNgrams
g2
)
(
getGroupNgrams
g3
)
in
processProximity
(
getPhyloProximity
p
)
nbDocs
(
getGroupCooc
g1
)
cooc
(
getGroupNgrams
g1
)
ngrams
phyloGroupMatching
::
[
PhyloPeriodId
]
->
PhyloGroup
->
Phylo
->
[
Pointer
]
phyloGroupMatching
periods
g
p
=
case
pointers
of
Nothing
->
[]
Just
pts
->
head'
"phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
reverse
$
sortOn
snd
pts
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
where
--------------------------------------
pointers
::
Maybe
[
Pointer
]
pointers
=
find
(
not
.
null
)
-- | For each time frame, process the Proximity on relevant pairs of targeted groups
$
scanl
(
\
acc
frame
->
let
pairs
=
makePairs
frame
g
p
in
acc
++
(
filter
(
\
(
_
,
proxi
)
->
filterProximity
proxi
(
getPhyloProximity
p
))
$
concat
$
map
(
\
(
t
,
t'
)
->
let
proxi
=
matchWithPairs
g
(
t
,
t'
)
p
in
if
(
t
==
t'
)
then
[(
getGroupId
t
,
proxi
)]
else
[(
getGroupId
t
,
proxi
),(
getGroupId
t'
,
proxi
)]
)
pairs
)
)
[]
-- | [[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years
$
inits
periods
--------------------------------------
findBestCandidates'
::
Proximity
->
[
PhyloGroup
]
->
PhyloGroup
->
Phylo
->
[
Pointer
]
...
...
@@ -204,9 +242,10 @@ updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if ((getGroupLeve
-- | Optimisation : to keep only the groups that have at least one ngrams in commons with the target
filterCandidates
::
PhyloGroup
->
[
PhyloGroup
]
->
[
PhyloGroup
]
filterCandidates
g
gs
=
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
getGroupNgrams
g
)
(
getGroupNgrams
g'
))
$
delete
g
gs
initCandidates
::
PhyloGroup
->
[
PhyloPeriodId
]
->
[
PhyloGroup
]
->
[
PhyloGroup
]
initCandidates
g
prds
gs
=
filter
(
\
g'
->
elem
(
getGroupPeriod
g'
)
prds
)
$
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
getGroupNgrams
g
)
(
getGroupNgrams
g'
))
$
delete
g
gs
-- | a init avec la [[head groups]] et la tail groups
...
...
@@ -236,26 +275,24 @@ toBranches mem gs
-- | 2) for each branch, for each group find the best candidates (by Filiation and Proximity) and create the corresponding pointers
-- | 3) update all the groups with the new pointers if they exist
interTempoMatching
::
Filiation
->
Level
->
Proximity
->
Phylo
->
Phylo
interTempoMatching
fil
lvl
prox
p
=
traceMatching
fil
lvl
(
getThreshold
prox
)
debug
$
updateGroups
fil
lvl
pointersMap
p
interTempoMatching
fil
lvl
_
p
=
updateGroups
fil
lvl
(
Map
.
fromList
pointers
)
p
where
--------------------------------------
debug
::
[
Double
]
debug
=
sort
$
concat
$
map
(
snd
.
snd
)
pointers
-- debug :: [Pointers
]
-- debug = concat $ map (
snd) pointers
--------------------------------------
pointersMap
::
Map
PhyloGroupId
[
Pointer
]
pointersMap
=
Map
.
fromList
$
map
(
\
(
id
,
x
)
->
(
id
,
fst
x
))
pointers
--
pointersMap :: Map PhyloGroupId [Pointer]
--
pointersMap = Map.fromList $ map (\(id,x) -> (id,fst x)) pointers
--------------------------------------
pointers
::
[(
PhyloGroupId
,
([
Pointer
],[
Double
])
)]
pointers
=
concat
$
map
(
\
branche
->
map
(
\
g
->
(
getGroupId
g
,
findBestCandidates
fil
1
(
getPhyloMatchingFrame
p
)
prox
(
getNextPeriods
fil
(
getGroupPeriod
g
)
(
getPhyloPeriods
p
))
(
filterCandidates
g
branche
)
g
p
)
)
branche
)
branches
pointers
::
[(
PhyloGroupId
,
[
Pointer
]
)]
pointers
=
let
pts
=
map
(
\
g
->
let
periods
=
getNextPeriods
fil
(
getPhyloMatchingFrame
p
)
(
getGroupPeriod
g
)
(
getPhyloPeriods
p
)
in
(
getGroupId
g
,
phyloGroupMatching
periods
g
p
))
groups
pts'
=
pts
`
using
`
parList
rdeepseq
in
pts'
--------------------------------------
branches
::
[[
PhyloGroup
]]
branches
=
tracePreBranches
$
toBranches
[[
head'
"interTempoMatching"
(
getGroupsWithLevel
lvl
p
)]]
$
tail
(
getGroupsWithLevel
lvl
p
)
groups
::
[
PhyloGroup
]
groups
=
getGroupsWithLevel
lvl
p
--------------------------------------
...
...
@@ -265,12 +302,10 @@ interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) de
toLevelUp
::
[
Pointer
]
->
Phylo
->
[
Pointer
]
toLevelUp
lst
p
=
Map
.
toList
$
map
(
\
ws
->
maximum
ws
)
$
fromListWith
(
++
)
[(
id
,
[
w
])
|
(
id
,
w
)
<-
pointers
]
where
--------------------------------------
pointers
::
[
Pointer
]
pointers
=
map
(
\
(
id
,
v
)
->
(
getGroupLevelParentId
$
getGroupFromId
id
p
,
v
))
lst
--------------------------------------
$
fromListWith
(
++
)
[(
id
,
[
w
])
|
(
id
,
w
)
<-
let
pointers
=
map
(
\
(
id
,
v
)
->
(
getGroupLevelParentId
$
getGroupFromId
id
p
,
v
))
lst
pointers'
=
pointers
`
using
`
parList
rdeepseq
in
pointers'
]
-- | Transpose the parent/child pointers from one level to another
...
...
@@ -278,16 +313,14 @@ transposePeriodLinks :: Level -> Phylo -> Phylo
transposePeriodLinks
lvl
p
=
alterGroupWithLevel
(
\
g
->
--------------------------------------
let
childs
=
getGroupsFromIds
(
map
fst
$
getGroupLevelChilds
g
)
p
ascLink
=
toLevelUp
(
concat
$
map
getGroupPeriodParents
childs
)
p
desLink
=
toLevelUp
(
concat
$
map
getGroupPeriodChilds
childs
)
p
let
ascLink
=
toLevelUp
(
getGroupPeriodParents
g
)
p
desLink
=
toLevelUp
(
getGroupPeriodChilds
g
)
p
--------------------------------------
in
g
&
phylo_groupPeriodParents
%~
(
++
ascLink
)
&
phylo_groupPeriodChilds
%~
(
++
desLink
)
in
g
&
phylo_groupPeriodParents
.~
ascLink
&
phylo_groupPeriodChilds
.~
desLink
--------------------------------------
)
lvl
p
----------------
-- | Tracer | --
----------------
...
...
@@ -301,6 +334,7 @@ traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered
<>
show
(
percentile
75
(
VS
.
fromList
lst
))
<>
" (75%) "
<>
show
(
percentile
90
(
VS
.
fromList
lst
))
<>
" (90%)
\n
"
)
p
tracePreBranches
::
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
tracePreBranches
bs
=
trace
(
show
(
length
bs
)
<>
" pre-branches"
<>
"
\n
"
<>
"with sizes : "
<>
show
(
map
length
bs
)
<>
"
\n
"
)
bs
...
...
src/Gargantext/Viz/Phylo/Metrics/Clustering.hs
View file @
815ab543
...
...
@@ -18,36 +18,20 @@ module Gargantext.Viz.Phylo.Metrics.Clustering
where
import
Data.Graph.Clustering.Louvain.CplusPlus
import
Data.List
(
last
,
concat
,
null
,
nub
,(
++
),
init
,
tail
,
elemIndex
,
groupBy
,(
!!
)
)
import
Data.List
(
concat
,
null
,
nub
,(
++
),
elemIndex
,
groupBy
,(
!!
),
(
\\
),
union
,
intersect
)
import
Data.Map
(
fromList
,
mapKeys
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
-- | To apply the related components method to a PhyloGraph
-- curr = the current PhyloGroup
-- (nodes,edges) = the initial PhyloGraph minus the current PhyloGroup
-- next = the next PhyloGroups to be added in the cluster
-- memo = the memory of the allready created clusters
relatedComp
::
Int
->
PhyloGroup
->
GroupGraph
->
[
PhyloGroup
]
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
relatedComp
idx
curr
(
nodes
,
edges
)
next
memo
|
null
nodes'
&&
null
next'
=
memo'
|
(
not
.
null
)
next'
=
relatedComp
idx
(
head'
"relatedComp1"
next'
)
(
nodes'
,
edges
)
(
tail
next'
)
memo'
|
otherwise
=
relatedComp
(
idx
+
1
)
(
head'
"relatedComp2"
nodes'
)
(
tail
nodes'
,
edges
)
[]
memo'
where
--------------------------------------
memo'
::
[[
PhyloGroup
]]
memo'
|
null
memo
=
[[
curr
]]
|
idx
==
((
length
memo
)
-
1
)
=
(
init
memo
)
++
[(
last
memo
)
++
[
curr
]]
|
otherwise
=
memo
++
[[
curr
]]
--------------------------------------
next'
::
[
PhyloGroup
]
next'
=
filter
(
\
x
->
not
$
elem
x
$
concat
memo
)
$
nub
$
next
++
(
getNeighbours
False
curr
edges
)
--------------------------------------
nodes'
::
[
PhyloGroup
]
nodes'
=
filter
(
\
x
->
not
$
elem
x
next'
)
nodes
--------------------------------------
relatedComp
::
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
relatedComp
graphs
=
foldl'
(
\
mem
groups
->
if
(
null
mem
)
then
mem
++
[
groups
]
else
let
related
=
filter
(
\
groups'
->
(
not
.
null
)
$
intersect
groups
groups'
)
mem
in
if
(
null
related
)
then
mem
++
[
groups
]
else
(
mem
\\
related
)
++
[
union
groups
(
nub
$
concat
related
)]
)
[]
graphs
louvain
::
([
GroupNode
],[
GroupEdge
])
->
IO
[[
PhyloGroup
]]
...
...
src/Gargantext/Viz/Phylo/Tools.hs
View file @
815ab543
...
...
@@ -110,6 +110,13 @@ listToDirectedCombi :: Eq a => [a] -> [(a,a)]
listToDirectedCombi
l
=
[(
x
,
y
)
|
x
<-
l
,
y
<-
l
,
x
/=
y
]
listToEqualCombi
::
Eq
a
=>
[
a
]
->
[(
a
,
a
)]
listToEqualCombi
l
=
[(
x
,
y
)
|
x
<-
l
,
y
<-
l
,
x
==
y
]
listToPairs
::
Eq
a
=>
[
a
]
->
[(
a
,
a
)]
listToPairs
l
=
(
listToEqualCombi
l
)
++
(
listToUnDirectedCombi
l
)
-- | To get all combinations of a list and apply a function to the resulting list of pairs
listToDirectedCombiWith
::
Eq
a
=>
forall
b
.
(
a
->
b
)
->
[
a
]
->
[(
b
,
b
)]
listToDirectedCombiWith
f
l
=
[(
f
x
,
f
y
)
|
x
<-
l
,
y
<-
l
,
x
/=
y
]
...
...
@@ -195,6 +202,9 @@ getPhyloDescription p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
getPhyloMatchingFrame
::
Phylo
->
Int
getPhyloMatchingFrame
p
=
_q_interTemporalMatchingFrame
$
_phyloParam_query
$
getPhyloParams
p
getPhyloProximity
::
Phylo
->
Proximity
getPhyloProximity
p
=
_q_interTemporalMatching
$
_phyloParam_query
$
getPhyloParams
p
getPhyloReBranchThr
::
Phylo
->
Double
getPhyloReBranchThr
p
=
_q_reBranchThr
$
_phyloParam_query
$
getPhyloParams
p
...
...
@@ -243,7 +253,7 @@ alterGroupWithLevel f lvl p = over ( phylo_periods
.
traverse
)
(
\
g
->
if
getGroupLevel
g
==
lvl
then
f
g
else
g
)
p
else
g
)
p
-- | To alter each list of PhyloGroups following a given function
...
...
stack.yaml
View file @
815ab543
...
...
@@ -35,6 +35,7 @@ extra-deps:
-
KMP-0.1.0.2
-
accelerate-1.2.0.0
-
aeson-lens-0.5.0.0
-
deepseq-th-0.1.0.4
-
duckling-0.1.3.0
-
full-text-search-0.2.1.4
-
fullstop-0.1.4
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment