Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
145
Issues
145
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
815ab543
Commit
815ab543
authored
Jun 14, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add parallelism
parent
7832afe9
Pipeline
#473
failed with stage
Changes
10
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
211 additions
and
169 deletions
+211
-169
package.yaml
package.yaml
+2
-0
Phylo.hs
src/Gargantext/Viz/Phylo.hs
+6
-2
Cluster.hs
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
+14
-12
BranchMaker.hs
src/Gargantext/Viz/Phylo/BranchMaker.hs
+21
-31
Example.hs
src/Gargantext/Viz/Phylo/Example.hs
+1
-1
LevelMaker.hs
src/Gargantext/Viz/Phylo/LevelMaker.hs
+28
-13
LinkMaker.hs
src/Gargantext/Viz/Phylo/LinkMaker.hs
+116
-82
Clustering.hs
src/Gargantext/Viz/Phylo/Metrics/Clustering.hs
+11
-27
Tools.hs
src/Gargantext/Viz/Phylo/Tools.hs
+11
-1
stack.yaml
stack.yaml
+1
-0
No files found.
package.yaml
View file @
815ab543
...
...
@@ -103,6 +103,7 @@ library:
-
contravariant
-
crawlerPubMed
-
data-time-segment
-
deepseq
-
directory
-
duckling
-
exceptions
...
...
@@ -138,6 +139,7 @@ library:
-
natural-transformation
-
opaleye
-
pandoc
-
parallel
-
parsec
-
patches-class
-
patches-map
...
...
src/Gargantext/Viz/Phylo.hs
View file @
815ab543
...
...
@@ -22,7 +22,7 @@ one 8, e54847.
-}
{-# LANGUAGE DeriveGeneric
#-}
{-# LANGUAGE DeriveGeneric
, DeriveAnyClass
#-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE MultiParamTypeClasses #-}
...
...
@@ -44,6 +44,8 @@ import Gargantext.Core.Utils.Prefix (unPrefix)
import
Gargantext.Text.Context
(
TermList
)
import
Gargantext.Prelude
import
Control.DeepSeq
--------------------
-- | PhyloParam | --
--------------------
...
...
@@ -161,7 +163,9 @@ data PhyloGroup =
,
_phylo_groupLevelParents
::
[
Pointer
]
,
_phylo_groupLevelChilds
::
[
Pointer
]
}
deriving
(
Generic
,
Show
,
Eq
,
Ord
)
deriving
(
Generic
,
NFData
,
Show
,
Eq
,
Ord
)
-- instance NFData PhyloGroup
-- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
...
...
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
View file @
815ab543
...
...
@@ -13,11 +13,13 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Viz.Phylo.Aggregates.Cluster
where
import
Data.List
(
null
,
tail
,
concat
,
sort
,
intersect
)
import
Control.Parallel.Strategies
import
Data.List
(
null
,
concat
,
sort
,
intersect
,(
++
))
import
Data.Map
(
Map
)
import
Data.Tuple
(
fst
)
import
Gargantext.Prelude
...
...
@@ -44,17 +46,19 @@ getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g
graphToClusters
::
Cluster
->
GroupGraph
->
[
PhyloCluster
]
graphToClusters
clust
(
nodes
,
edges
)
=
case
clust
of
Louvain
(
LouvainParams
_
)
->
undefined
RelatedComponents
(
RCParams
_
)
->
relatedComp
0
(
head'
"graphToClusters"
nodes
)
(
tail
nodes
,
edges
)
[]
[]
RelatedComponents
(
RCParams
_
)
->
relatedComp
$
((
map
(
\
((
g
,
g'
),
_
)
->
[
g
,
g'
])
edges
)
++
(
map
(
\
g
->
[
g
])
nodes
))
_
->
panic
"[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
-- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph
::
Double
->
Proximity
->
[
PhyloGroup
]
->
([
GroupNode
],[
GroupEdge
])
groupsToGraph
nbDocs
prox
gs
=
case
prox
of
WeightedLogJaccard
(
WLJParams
_
sens
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
weightedLogJaccard
sens
nbDocs
(
getGroupCooc
x
)
(
getGroupCooc
y
)
(
getGroupNgrams
x
)
(
getGroupNgrams
y
)))
$
getCandidates
gs
)
WeightedLogJaccard
(
WLJParams
_
sens
)
->
(
gs
,
let
candidates
=
map
(
\
(
x
,
y
)
->
((
x
,
y
),
weightedLogJaccard
sens
nbDocs
(
getGroupCooc
x
)
(
getGroupCooc
y
)
(
getGroupNgrams
x
)
(
getGroupNgrams
y
)))
$
getCandidates
gs
candidates'
=
candidates
`
using
`
parList
rdeepseq
in
candidates'
)
Hamming
(
HammingParams
_
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
hamming
(
getGroupCooc
x
)
(
getGroupCooc
y
)))
$
getCandidates
gs
)
_
->
undefined
_
->
undefined
-- | To filter a Graph of Proximity using a given threshold
...
...
@@ -78,9 +82,11 @@ phyloToClusters lvl clus p = Map.fromList
graphs'
=
traceGraphFiltered
lvl
$
map
(
\
g
->
filterGraph
prox
g
)
graphs
--------------------------------------
graphs
::
[([
GroupNode
],[
GroupEdge
])]
graphs
=
traceGraph
lvl
(
getThreshold
prox
)
$
map
(
\
prd
->
groupsToGraph
(
periodsToNbDocs
[
prd
]
p
)
prox
(
getGroupsWithFilters
lvl
prd
p
))
periods
graphs
::
[([
GroupNode
],[
GroupEdge
])]
graphs
=
traceGraph
lvl
(
getThreshold
prox
)
$
let
gs
=
map
(
\
prd
->
groupsToGraph
(
periodsToNbDocs
[
prd
]
p
)
prox
(
getGroupsWithFilters
lvl
prd
p
))
periods
gs'
=
gs
`
using
`
parList
rdeepseq
in
gs'
--------------------------------------
prox
::
Proximity
prox
=
getProximity
clus
...
...
@@ -115,7 +121,3 @@ traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <>
<>
show
(
percentile
90
(
VS
.
fromList
lst
))
<>
" (90%)
\n
"
)
g
where
lst
=
sort
$
map
snd
$
concat
$
map
snd
g
-- traceSim :: PhyloGroup -> PhyloGroup -> Phylo -> Double -> Double
-- traceSim g g' p sim = trace (show (getGroupText g p) <> " [vs] " <> show (getGroupText g' p) <> " = " <> show (sim) <> "\n") sim
src/Gargantext/Viz/Phylo/BranchMaker.hs
View file @
815ab543
...
...
@@ -17,6 +17,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.BranchMaker
where
import
Control.Parallel.Strategies
import
Control.Lens
hiding
(
both
,
Level
)
import
Data.List
(
concat
,
nub
,(
++
),
tail
,
sortOn
,
take
,
reverse
,
sort
,
null
,
intersect
,
union
)
import
Data.Map
(
Map
)
...
...
@@ -82,13 +83,14 @@ findSimBranches frame thr nth p (id,gs) bs
pks
=
getGroupsPeaks
gs
nth
p
--------------------------------------
findBestPointer
::
Phylo
->
Proximity
->
[
PhyloGroup
]
->
[
PhyloGroup
]
->
[(
PhyloGroupId
,
Pointer
)]
findBestPointer
p
prox
gs
gs'
=
take
1
$
reverse
$
sortOn
(
snd
.
snd
)
$
concat
$
map
(
\
g
->
let
pts
=
findBestCandidates'
prox
gs'
g
p
in
map
(
\
pt
->
(
getGroupId
g
,
pt
))
pts
)
gs
findBestPointer
p
prox
gs
gs'
=
let
candidates
=
map
(
\
g
->
let
pts
=
findBestCandidates'
prox
gs'
g
p
in
map
(
\
pt
->
(
getGroupId
g
,
pt
))
pts
)
gs
candidates'
=
candidates
`
using
`
parList
rdeepseq
in
take
1
$
reverse
$
sortOn
(
snd
.
snd
)
$
concat
candidates'
makeBranchLinks
::
Phylo
->
Proximity
->
(
PhyloBranchId
,[
PhyloGroup
])
->
[(
PhyloBranchId
,[
PhyloGroup
])]
->
[(
PhyloGroupId
,
Pointer
)]
->
[(
PhyloGroupId
,
Pointer
)]
makeBranchLinks
p
prox
(
id
,
gs
)
bs
pts
...
...
@@ -126,34 +128,22 @@ linkPhyloBranches lvl prox p = setPhyloBranches lvl
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
graphToBranches
::
Level
->
GroupGraph
->
Phylo
->
[(
Int
,
PhyloGroupId
)]
graphToBranches
_lvl
(
nodes
,
edges
)
_p
=
concat
$
map
(
\
(
idx
,
gs
)
->
map
(
\
g
->
(
idx
,
getGroupId
g
))
gs
)
$
zip
[
1
..
]
$
relatedComp
0
(
head'
"branchMaker"
nodes
)
(
tail
nodes
,
edges
)
[]
[]
-- | To build a graph using the parents and childs pointers
makeGraph
::
[
PhyloGroup
]
->
Phylo
->
GroupGraph
makeGraph
gs
p
=
(
gs
,
edges
)
where
edges
::
[
GroupEdge
]
edges
=
(
nub
.
concat
)
$
map
(
\
g
->
(
map
(
\
g'
->
((
g'
,
g
),
1
))
$
getGroupParents
g
p
)
++
(
map
(
\
g'
->
((
g
,
g'
),
1
))
$
getGroupChilds
g
p
))
gs
graphToBranches
::
[
PhyloGroup
]
->
Phylo
->
[(
Int
,
PhyloGroupId
)]
graphToBranches
groups
p
=
concat
$
map
(
\
(
idx
,
gs
)
->
map
(
\
g
->
(
idx
,
getGroupId
g
))
gs
)
$
zip
[
1
..
]
$
relatedComp
$
map
(
\
g
->
nub
$
[
g
]
++
(
getGroupParents
g
p
)
++
(
getGroupChilds
g
p
))
groups
-- | To set all the PhyloBranches for a given Level in a Phylo
setPhyloBranches
::
Level
->
Phylo
->
Phylo
setPhyloBranches
lvl
p
=
alterGroupWithLevel
(
\
g
->
let
bIdx
=
(
fst
$
head'
"branchMaker"
$
filter
(
\
b
->
snd
b
==
getGroupId
g
)
bs
)
in
over
(
phylo_groupBranchId
)
(
\
_
->
Just
(
lvl
,
bIdx
))
g
)
lvl
p
where
--------------------------------------
bs
::
[(
Int
,
PhyloGroupId
)]
bs
=
graphToBranches
lvl
graph
p
setPhyloBranches
lvl
p
=
alterGroupWithLevel
(
\
g
->
let
bIdx
=
(
fst
$
head'
"branchMaker"
$
filter
(
\
b
->
snd
b
==
getGroupId
g
)
branches
)
in
over
(
phylo_groupBranchId
)
(
\
_
->
Just
(
lvl
,
bIdx
))
g
)
lvl
p
where
--------------------------------------
graph
::
GroupGraph
graph
=
makeGraph
(
getGroupsWithLevel
lvl
p
)
p
branches
::
[(
Int
,
PhyloGroupId
)]
branches
=
graphToBranches
(
getGroupsWithLevel
lvl
p
)
p
--------------------------------------
src/Gargantext/Viz/Phylo/Example.hs
View file @
815ab543
...
...
@@ -105,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild
::
PhyloQueryBuild
phyloQueryBuild
=
PhyloQueryBuild
"Cesar et Cleôpatre"
"An example of Phylomemy (french without accent)"
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
1
20
)
5
0.5
4
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.3
0
)
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
6
20
)
5
0.5
4
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.4
0
)
...
...
src/Gargantext/Viz/Phylo/LevelMaker.hs
View file @
815ab543
...
...
@@ -19,6 +19,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.LevelMaker
where
import
Control.Parallel.Strategies
import
Control.Lens
hiding
(
both
,
Level
)
import
Data.List
((
++
),
sort
,
concat
,
nub
,
zip
,
last
)
import
Data.Map
(
Map
,
(
!
),
empty
,
singleton
)
...
...
@@ -61,7 +62,10 @@ instance PhyloLevelMaker PhyloCluster
|
otherwise
=
panic
(
"[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2"
)
--------------------------------------
-- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
toPhyloGroups
lvl
(
d
,
d'
)
l
m
p
=
map
(
\
(
idx
,
cluster
)
->
clusterToGroup
(
d
,
d'
)
lvl
idx
""
cluster
m
p
)
$
zip
[
1
..
]
l
toPhyloGroups
lvl
(
d
,
d'
)
l
m
p
=
let
clusters
=
map
(
\
(
idx
,
cluster
)
->
clusterToGroup
(
d
,
d'
)
lvl
idx
""
cluster
m
p
)
$
zip
[
1
..
]
l
clusters'
=
clusters
`
using
`
parList
rdeepseq
in
clusters'
--------------------------------------
...
...
@@ -74,7 +78,10 @@ instance PhyloLevelMaker PhyloFis
|
otherwise
=
panic
(
"[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1"
)
--------------------------------------
-- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups
lvl
(
d
,
d'
)
l
_
p
=
map
(
\
(
idx
,
fis
)
->
cliqueToGroup
(
d
,
d'
)
lvl
idx
""
fis
p
)
$
zip
[
1
..
]
l
toPhyloGroups
lvl
(
d
,
d'
)
l
_
p
=
let
groups
=
map
(
\
(
idx
,
fis
)
->
cliqueToGroup
(
d
,
d'
)
lvl
idx
""
fis
p
)
$
zip
[
1
..
]
l
groups'
=
groups
`
using
`
parList
rdeepseq
in
groups'
--------------------------------------
...
...
@@ -87,8 +94,7 @@ instance PhyloLevelMaker Document
|
otherwise
=
panic
(
"[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0"
)
--------------------------------------
-- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups
lvl
(
d
,
d'
)
l
_m
p
=
map
(
\
(
idx
,
ngram
)
->
ngramsToGroup
(
d
,
d'
)
lvl
idx
ngram
[
ngram
]
p
)
$
zip
[
1
..
]
toPhyloGroups
lvl
(
d
,
d'
)
l
_m
p
=
map
(
\
ngram
->
ngramsToGroup
(
d
,
d'
)
lvl
(
getIdxInRoots
ngram
p
)
ngram
[
ngram
]
p
)
$
(
nub
.
concat
)
$
map
text
l
--------------------------------------
...
...
@@ -100,11 +106,13 @@ clusterToGroup prd lvl idx lbl groups _m p =
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
empty
Nothing
(
getMiniCooc
(
listToFullCombi
ngrams
)
(
periodsToYears
[
prd
])
(
getPhyloCooc
p
))
[]
[]
[]
childs
ascLink
desLink
[]
childs
where
--------------------------------------
childs
::
[
Pointer
]
childs
=
map
(
\
g
->
(
getGroupId
g
,
1
))
groups
ascLink
=
concat
$
map
getGroupPeriodParents
groups
desLink
=
concat
$
map
getGroupPeriodChilds
groups
--------------------------------------
ngrams
::
[
Int
]
ngrams
=
(
sort
.
nub
.
concat
)
$
map
getGroupNgrams
groups
...
...
@@ -116,7 +124,7 @@ cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> P
cliqueToGroup
prd
lvl
idx
lbl
fis
p
=
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
(
singleton
"support"
(
fromIntegral
$
getSupport
fis
))
Nothing
(
getMiniCooc
(
listToFullCombi
ngrams
)
(
periodsToYears
[
prd
])
(
getPhyloCooc
p
))
[]
[]
[]
[]
[]
[]
[]
childs
where
--------------------------------------
ngrams
::
[
Int
]
...
...
@@ -124,6 +132,9 @@ cliqueToGroup prd lvl idx lbl fis p =
$
Set
.
toList
$
getClique
fis
--------------------------------------
childs
::
[
Pointer
]
childs
=
map
(
\
n
->
(((
prd
,
lvl
-
1
),
n
),
1
))
ngrams
--------------------------------------
-- | To transform a list of Ngrams into a PhyloGroup
...
...
@@ -152,6 +163,7 @@ toNthLevel lvlMax prox clus p
$
traceBranches
(
lvl
+
1
)
$
setPhyloBranches
(
lvl
+
1
)
$
transposePeriodLinks
(
lvl
+
1
)
$
tracePhyloN
(
lvl
+
1
)
$
setLevelLinks
(
lvl
,
lvl
+
1
)
$
addPhyloLevel
(
lvl
+
1
)
(
clusters
)
p
...
...
@@ -169,15 +181,15 @@ toNthLevel lvlMax prox clus p
toPhylo1
::
Cluster
->
Proximity
->
Map
(
Date
,
Date
)
[
Document
]
->
Phylo
->
Phylo
toPhylo1
clus
prox
d
p
=
case
clus
of
Fis
(
FisParams
k
s
t
)
->
traceReBranches
1
$
linkPhyloBranches
1
prox
--
$ linkPhyloBranches 1 prox
$
traceBranches
1
$
setPhyloBranches
1
$
traceTempoMatching
Descendant
1
$
interTempoMatching
Descendant
1
prox
$
traceTempoMatching
Ascendant
1
$
interTempoMatching
Ascendant
1
prox
$
tracePhylo1
$
setLevelLinks
(
0
,
1
)
$
setLevelLinks
(
1
,
0
)
$
addPhyloLevel
1
phyloFis
phylo'
where
--------------------------------------
...
...
@@ -212,7 +224,7 @@ instance PhyloMaker [(Date, Text)]
phylo1
=
toPhylo1
(
getContextualUnit
q
)
(
getInterTemporalMatching
q
)
phyloDocs
phylo0
--------------------------------------
phylo0
::
Phylo
phylo0
=
toPhylo0
phyloDocs
phyloBase
phylo0
=
t
racePhylo0
$
t
oPhylo0
phyloDocs
phyloBase
--------------------------------------
phyloDocs
::
Map
(
Date
,
Date
)
[
Document
]
phyloDocs
=
corpusToDocs
c
phyloBase
...
...
@@ -251,7 +263,7 @@ instance PhyloMaker [Document]
phylo1
=
toPhylo1
(
getContextualUnit
q
)
(
getInterTemporalMatching
q
)
phyloDocs
phylo0
--------------------------------------
phylo0
::
Phylo
phylo0
=
toPhylo0
phyloDocs
phyloBase
phylo0
=
t
racePhylo0
$
t
oPhylo0
phyloDocs
phyloBase
--------------------------------------
phyloDocs
::
Map
(
Date
,
Date
)
[
Document
]
phyloDocs
=
corpusToDocs
c
phyloBase
...
...
@@ -286,13 +298,16 @@ instance PhyloMaker [Document]
tracePhylo0
::
Phylo
->
Phylo
tracePhylo0
p
=
trace
(
"
\n
---------------
\n
--| Phylo 0 |--
\n
---------------
\n\n
"
)
p
tracePhylo0
p
=
trace
(
"
\n
---------------
\n
--| Phylo 0 |--
\n
---------------
\n\n
"
<>
show
(
length
$
getGroupsWithLevel
0
p
)
<>
" groups created
\n
"
)
p
tracePhylo1
::
Phylo
->
Phylo
tracePhylo1
p
=
trace
(
"
\n
---------------
\n
--| Phylo 1 |--
\n
---------------
\n\n
"
)
p
tracePhylo1
p
=
trace
(
"
\n
---------------
\n
--| Phylo 1 |--
\n
---------------
\n\n
"
<>
show
(
length
$
getGroupsWithLevel
1
p
)
<>
" groups created
\n
"
)
p
tracePhyloN
::
Level
->
Phylo
->
Phylo
tracePhyloN
lvl
p
=
trace
(
"
\n
---------------
\n
--| Phylo "
<>
show
(
lvl
)
<>
" |--
\n
---------------
\n\n
"
)
p
tracePhyloN
lvl
p
=
trace
(
"
\n
---------------
\n
--| Phylo "
<>
show
(
lvl
)
<>
" |--
\n
---------------
\n\n
"
<>
show
(
length
$
getGroupsWithLevel
lvl
p
)
<>
" groups created
\n
"
)
p
tracePhyloBase
::
Phylo
->
Phylo
...
...
src/Gargantext/Viz/Phylo/LinkMaker.hs
View file @
815ab543
...
...
@@ -17,8 +17,9 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.LinkMaker
where
import
Control.Parallel.Strategies
import
Control.Lens
hiding
(
both
,
Level
)
import
Data.List
((
++
),
sortOn
,
null
,
tail
,
splitAt
,
elem
,
concat
,
sort
,
delete
,
intersect
,
nub
,
groupBy
,
union
)
import
Data.List
((
++
),
sortOn
,
null
,
tail
,
splitAt
,
elem
,
concat
,
delete
,
intersect
,
nub
,
groupBy
,
union
,
inits
,
scanl
,
find
)
import
Data.Tuple.Extra
import
Data.Map
(
Map
,(
!
),
fromListWith
,
elems
,
restrictKeys
,
unionWith
,
member
)
import
Gargantext.Prelude
...
...
@@ -38,43 +39,23 @@ import Numeric.Statistics (percentile)
-----------------------------
-- | To choose a LevelLink strategy based an a given Level
shouldLink
::
(
Level
,
Level
)
->
PhyloGroup
->
PhyloGroup
->
Bool
shouldLink
(
lvl
,
lvl'
)
g
g'
|
(
lvl
<=
1
)
&&
(
lvl'
<=
1
)
=
doesContainsOrd
(
getGroupNgrams
g
)
(
getGroupNgrams
g'
)
|
otherwise
=
elem
(
getGroupId
g
)
(
getGroupLevelChildsId
g'
)
-- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
linkGroupToGroups
::
(
Level
,
Level
)
->
PhyloGroup
->
[
PhyloGroup
]
->
PhyloGroup
linkGroupToGroups
(
lvl
,
lvl'
)
current
targets
|
lvl
<
lvl'
=
setLevelParents
current
|
lvl
>
lvl'
=
setLevelChilds
current
|
otherwise
=
current
linkGroupToGroups
::
PhyloGroup
->
[
PhyloGroup
]
->
PhyloGroup
linkGroupToGroups
current
targets
=
over
(
phylo_groupLevelParents
)
addPointers
current
where
--------------------------------------
setLevelChilds
::
PhyloGroup
->
PhyloGroup
setLevelChilds
=
over
(
phylo_groupLevelChilds
)
addPointers
--------------------------------------
setLevelParents
::
PhyloGroup
->
PhyloGroup
setLevelParents
=
over
(
phylo_groupLevelParents
)
addPointers
--------------------------------------
addPointers
::
[
Pointer
]
->
[
Pointer
]
addPointers
lp
=
lp
++
Maybe
.
mapMaybe
(
\
target
->
if
shouldLink
(
lvl
,
lvl'
)
current
target
if
(
elem
(
getGroupId
current
)
(
getGroupLevelChildsId
target
))
then
Just
((
getGroupId
target
),
1
)
else
Nothing
)
targets
--------------------------------------
-- | To set the LevelLink of all the PhyloGroups of a Phylo
setLevelLinks
::
(
Level
,
Level
)
->
Phylo
->
Phylo
setLevelLinks
(
lvl
,
lvl'
)
p
=
alterPhyloGroups
(
\
groups
->
map
(
\
group
->
if
getGroupLevel
group
==
lvl
then
linkGroupToGroups
(
lvl
,
lvl'
)
group
$
filterCandidates
group
$
getGroupsWithFilters
lvl'
(
getGroupPeriod
group
)
p
else
group
)
groups
)
p
setLevelLinks
(
lvl
,
lvl'
)
p
=
alterGroupWithLevel
(
\
group
->
linkGroupToGroups
group
$
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
getGroupNgrams
group
)
(
getGroupNgrams
g'
))
$
getGroupsWithFilters
lvl'
(
getGroupPeriod
group
)
p
)
lvl
p
-------------------------------
...
...
@@ -83,10 +64,10 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (\groups ->
-- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
getNextPeriods
::
Filiation
->
PhyloPeriodId
->
[
PhyloPeriodId
]
->
[
PhyloPeriodId
]
getNextPeriods
to'
id
l
=
case
to'
of
Descendant
->
(
tail
.
snd
)
next
Ascendant
->
(
reverse
.
fst
)
next
getNextPeriods
::
Filiation
->
Int
->
PhyloPeriodId
->
[
PhyloPeriodId
]
->
[
PhyloPeriodId
]
getNextPeriods
to'
limit
id
l
=
case
to'
of
Descendant
->
take
limit
$
(
tail
.
snd
)
next
Ascendant
->
take
limit
$
(
reverse
.
fst
)
next
_
->
panic
(
"[ERR][Viz.Phylo.Example.getNextPeriods] Filiation type not defined"
)
where
--------------------------------------
...
...
@@ -115,45 +96,102 @@ processProximity proximity nbDocs cooc cooc' ngrams ngrams' = case proximity of
_
->
panic
"[ERR][Viz.Phylo.LinkMaker.processProximity] Unknown proximity"
filterProximity
::
Double
->
Proximity
->
Bool
filterProximity
score
prox
=
case
prox
of
WeightedLogJaccard
(
WLJParams
thr
_
)
->
score
>=
thr
Hamming
(
HammingParams
thr
)
->
score
<=
thr
_
->
panic
"[ERR][Viz.Phylo.LinkMaker.filterProximity] Unknown proximity"
makePairs
::
[(
Date
,
Date
)]
->
PhyloGroup
->
Phylo
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs
prds
g
p
=
filter
(
\
pair
->
((
last'
"makePairs"
prds
)
==
(
getGroupPeriod
$
fst
pair
))
||
((
last'
"makePairs"
prds
)
==
(
getGroupPeriod
$
snd
pair
)))
$
listToPairs
$
filter
(
\
g'
->
(
elem
(
getGroupPeriod
g'
)
prds
)
&&
((
not
.
null
)
$
intersect
(
getGroupNgrams
g
)
(
getGroupNgrams
g'
))
&&
(((
last'
"makePairs"
prds
)
==
(
getGroupPeriod
g
))
||
((
matchWithPairs
g
(
g
,
g'
)
p
)
>=
(
getThreshold
$
getPhyloProximity
p
))))
$
getGroupsWithLevel
(
getGroupLevel
g
)
p
-- | Find the best candidates to be time-linked with a group g1 (recursively until the limit of periods is reached)
-- | 1) find the next periods and get the mini cooc matrix of g1
-- | 2) build the pairs of candidates (single groups or tuples)
-- | 3) process the proximity mesure and select the best ones to create the pointers (ie: all the max)
findBestCandidates
::
Filiation
->
Int
->
Int
->
Proximity
->
[(
Date
,
Date
)]
->
[
PhyloGroup
]
->
PhyloGroup
->
Phylo
->
([
Pointer
],[
Double
])
findBestCandidates
filiation
depth
limit
proximity
periods
candidates
g1
phylo
findBestCandidates
::
Filiation
->
Int
->
Int
->
Proximity
->
[(
Date
,
Date
)]
->
PhyloGroup
->
Phylo
->
([
Pointer
],[
Double
])
findBestCandidates
filiation
depth
limit
proximity
periods
g1
phylo
|
depth
>
limit
||
null
nextPeriods
=
(
[]
,
[]
)
|
(
not
.
null
)
pointers
=
(
head'
"findBestCandidates"
$
groupBy
(
\
x
y
->
snd
x
==
snd
y
)
pointers
,
map
snd
similarities
)
|
otherwise
=
findBestCandidates
filiation
(
depth
+
1
)
limit
proximity
periods
candidates
g1
phylo
|
otherwise
=
findBestCandidates
filiation
(
depth
+
1
)
limit
proximity
periods
g1
phylo
where
--------------------------------------
pointers
::
[(
PhyloGroupId
,
Double
)]
pointers
=
reverse
$
sortOn
snd
$
filter
(
\
(
_
,
score
)
->
case
proximity
of
WeightedLogJaccard
(
WLJParams
thr
_
)
->
score
>=
thr
Hamming
(
HammingParams
thr
)
->
score
<=
thr
_
->
panic
"[ERR][Viz.Phylo.LinkMaker.findBestCandidates] Unknown proximity"
)
similarities
pointers
=
reverse
$
sortOn
snd
$
filter
(
\
(
_
,
score
)
->
filterProximity
score
proximity
)
similarities
--------------------------------------
similarities
::
[(
PhyloGroupId
,
Double
)]
similarities
=
concat
$
map
(
\
(
g2
,
g3
)
->
let
nbDocs
=
periodsToNbDocs
[(
getGroupPeriod
g1
),(
getGroupPeriod
g2
),(
getGroupPeriod
g3
)]
phylo
cooc'
=
unionWith
(
+
)
(
getGroupCooc
g2
)
(
getGroupCooc
g3
)
ngrams'
=
union
(
getGroupNgrams
g2
)
(
getGroupNgrams
g3
)
score
=
processProximity
proximity
nbDocs
cooc
cooc'
ngrams
ngrams'
in
nub
$
[(
getGroupId
g2
,
score
),(
getGroupId
g3
,
score
)])
pairsOfCandidates
similarities
=
concat
$
map
(
\
(
g2
,
g3
)
->
let
nbDocs
=
periodsToNbDocs
[(
getGroupPeriod
g1
),(
getGroupPeriod
g2
),(
getGroupPeriod
g3
)]
phylo
cooc'
=
if
(
g2
==
g3
)
then
getGroupCooc
g2
else
unionWith
(
+
)
(
getGroupCooc
g2
)
(
getGroupCooc
g3
)
ngrams'
=
if
(
g2
==
g3
)
then
getGroupNgrams
g2
else
union
(
getGroupNgrams
g2
)
(
getGroupNgrams
g3
)
score
=
processProximity
proximity
nbDocs
(
getGroupCooc
g1
)
cooc'
(
getGroupNgrams
g1
)
ngrams'
in
if
(
g2
==
g3
)
then
[(
getGroupId
g2
,
score
)]
else
[(
getGroupId
g2
,
score
),(
getGroupId
g3
,
score
)]
)
pairsOfCandidates
--------------------------------------
pairsOfCandidates
::
[(
PhyloGroup
,
PhyloGroup
)]
pairsOfCandidates
=
listToFullCombi
$
filter
(
\
g
->
elem
(
getGroupPeriod
g
)
nextPeriods
)
candidates
--------------------------------------
cooc
::
Map
(
Int
,
Int
)
Double
cooc
=
getGroupCooc
g1
--------------------------------------
ngrams
::
[
Int
]
ngrams
=
getGroupNgrams
g1
pairsOfCandidates
=
makePairs
nextPeriods
g1
phylo
--------------------------------------
nextPeriods
::
[(
Date
,
Date
)]
nextPeriods
=
take
depth
periods
--------------------------------------
--------------------------------------
matchWithPairs
::
PhyloGroup
->
(
PhyloGroup
,
PhyloGroup
)
->
Phylo
->
Double
matchWithPairs
g1
(
g2
,
g3
)
p
=
let
nbDocs
=
periodsToNbDocs
[(
getGroupPeriod
g1
),(
getGroupPeriod
g2
),(
getGroupPeriod
g3
)]
p
cooc
=
if
(
g2
==
g3
)
then
getGroupCooc
g2
else
unionWith
(
+
)
(
getGroupCooc
g2
)
(
getGroupCooc
g3
)
ngrams
=
if
(
g2
==
g3
)
then
getGroupNgrams
g2
else
union
(
getGroupNgrams
g2
)
(
getGroupNgrams
g3
)
in
processProximity
(
getPhyloProximity
p
)
nbDocs
(
getGroupCooc
g1
)
cooc
(
getGroupNgrams
g1
)
ngrams
phyloGroupMatching
::
[
PhyloPeriodId
]
->
PhyloGroup
->
Phylo
->
[
Pointer
]
phyloGroupMatching
periods
g
p
=
case
pointers
of
Nothing
->
[]
Just
pts
->
head'
"phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
reverse
$
sortOn
snd
pts
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
where
--------------------------------------
pointers
::
Maybe
[
Pointer
]
pointers
=
find
(
not
.
null
)
-- | For each time frame, process the Proximity on relevant pairs of targeted groups
$
scanl
(
\
acc
frame
->
let
pairs
=
makePairs
frame
g
p
in
acc
++
(
filter
(
\
(
_
,
proxi
)
->
filterProximity
proxi
(
getPhyloProximity
p
))
$
concat
$
map
(
\
(
t
,
t'
)
->
let
proxi
=
matchWithPairs
g
(
t
,
t'
)
p
in
if
(
t
==
t'
)
then
[(
getGroupId
t
,
proxi
)]
else
[(
getGroupId
t
,
proxi
),(
getGroupId
t'
,
proxi
)]
)
pairs
)
)
[]
-- | [[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years
$
inits
periods
--------------------------------------
findBestCandidates'
::
Proximity
->
[
PhyloGroup
]
->
PhyloGroup
->
Phylo
->
[
Pointer
]
...
...
@@ -204,9 +242,10 @@ updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if ((getGroupLeve
-- | Optimisation : to keep only the groups that have at least one ngrams in commons with the target
filterCandidates
::
PhyloGroup
->
[
PhyloGroup
]
->
[
PhyloGroup
]
filterCandidates
g
gs
=
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
getGroupNgrams
g
)
(
getGroupNgrams
g'
))
$
delete
g
gs
initCandidates
::
PhyloGroup
->
[
PhyloPeriodId
]
->
[
PhyloGroup
]
->
[
PhyloGroup
]
initCandidates
g
prds
gs
=
filter
(
\
g'
->
elem
(
getGroupPeriod
g'
)
prds
)
$
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
getGroupNgrams
g
)
(
getGroupNgrams
g'
))
$
delete
g
gs
-- | a init avec la [[head groups]] et la tail groups
...
...
@@ -236,26 +275,24 @@ toBranches mem gs
-- | 2) for each branch, for each group find the best candidates (by Filiation and Proximity) and create the corresponding pointers
-- | 3) update all the groups with the new pointers if they exist
interTempoMatching
::
Filiation
->
Level
->
Proximity
->
Phylo
->
Phylo
interTempoMatching
fil
lvl
prox
p
=
traceMatching
fil
lvl
(
getThreshold
prox
)
debug
$
updateGroups
fil
lvl
pointersMap
p
interTempoMatching
fil
lvl
_
p
=
updateGroups
fil
lvl
(
Map
.
fromList
pointers
)
p
where
--------------------------------------
debug
::
[
Double
]
debug
=
sort
$
concat
$
map
(
snd
.
snd
)
pointers
-- debug :: [Pointers
]
-- debug = concat $ map (
snd) pointers
--------------------------------------
pointersMap
::
Map
PhyloGroupId
[
Pointer
]
pointersMap
=
Map
.
fromList
$
map
(
\
(
id
,
x
)
->
(
id
,
fst
x
))
pointers
--
pointersMap :: Map PhyloGroupId [Pointer]
--
pointersMap = Map.fromList $ map (\(id,x) -> (id,fst x)) pointers
--------------------------------------
pointers
::
[(
PhyloGroupId
,
([
Pointer
],[
Double
])
)]
pointers
=
concat
$
map
(
\
branche
->
map
(
\
g
->
(
getGroupId
g
,
findBestCandidates
fil
1
(
getPhyloMatchingFrame
p
)
prox
(
getNextPeriods
fil
(
getGroupPeriod
g
)
(
getPhyloPeriods
p
))
(
filterCandidates
g
branche
)
g
p
)
)
branche
)
branches
pointers
::
[(
PhyloGroupId
,
[
Pointer
]
)]
pointers
=
let
pts
=
map
(
\
g
->
let
periods
=
getNextPeriods
fil
(
getPhyloMatchingFrame
p
)
(
getGroupPeriod
g
)
(
getPhyloPeriods
p
)
in
(
getGroupId
g
,
phyloGroupMatching
periods
g
p
))
groups
pts'
=
pts
`
using
`
parList
rdeepseq
in
pts'
--------------------------------------
branches
::
[[
PhyloGroup
]]
branches
=
tracePreBranches
$
toBranches
[[
head'
"interTempoMatching"
(
getGroupsWithLevel
lvl
p
)]]
$
tail
(
getGroupsWithLevel
lvl
p
)
groups
::
[
PhyloGroup
]
groups
=
getGroupsWithLevel
lvl
p
--------------------------------------
...
...
@@ -265,12 +302,10 @@ interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) de
toLevelUp
::
[
Pointer
]
->
Phylo
->
[
Pointer
]
toLevelUp
lst
p
=
Map
.
toList
$
map
(
\
ws
->
maximum
ws
)
$
fromListWith
(
++
)
[(
id
,
[
w
])
|
(
id
,
w
)
<-
pointers
]
where
--------------------------------------
pointers
::
[
Pointer
]
pointers
=
map
(
\
(
id
,
v
)
->
(
getGroupLevelParentId
$
getGroupFromId
id
p
,
v
))
lst
--------------------------------------
$
fromListWith
(
++
)
[(
id
,
[
w
])
|
(
id
,
w
)
<-
let
pointers
=
map
(
\
(
id
,
v
)
->
(
getGroupLevelParentId
$
getGroupFromId
id
p
,
v
))
lst
pointers'
=
pointers
`
using
`
parList
rdeepseq
in
pointers'
]
-- | Transpose the parent/child pointers from one level to another
...
...
@@ -278,16 +313,14 @@ transposePeriodLinks :: Level -> Phylo -> Phylo
transposePeriodLinks
lvl
p
=
alterGroupWithLevel
(
\
g
->
--------------------------------------
let
childs
=
getGroupsFromIds
(
map
fst
$
getGroupLevelChilds
g
)
p
ascLink
=
toLevelUp
(
concat
$
map
getGroupPeriodParents
childs
)
p
desLink
=
toLevelUp
(
concat
$
map
getGroupPeriodChilds
childs
)
p
let
ascLink
=
toLevelUp
(
getGroupPeriodParents
g
)
p
desLink
=
toLevelUp
(
getGroupPeriodChilds
g
)
p
--------------------------------------
in
g
&
phylo_groupPeriodParents
%~
(
++
ascLink
)
&
phylo_groupPeriodChilds
%~
(
++
desLink
)
in
g
&
phylo_groupPeriodParents
.~
ascLink
&
phylo_groupPeriodChilds
.~
desLink
--------------------------------------
)
lvl
p
----------------
-- | Tracer | --
----------------
...
...
@@ -301,6 +334,7 @@ traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered
<>
show
(
percentile
75
(
VS
.
fromList
lst
))
<>
" (75%) "
<>
show
(
percentile
90
(
VS
.
fromList
lst
))
<>
" (90%)
\n
"
)
p
tracePreBranches
::
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
tracePreBranches
bs
=
trace
(
show
(
length
bs
)
<>
" pre-branches"
<>
"
\n
"
<>
"with sizes : "
<>
show
(
map
length
bs
)
<>
"
\n
"
)
bs
...
...
src/Gargantext/Viz/Phylo/Metrics/Clustering.hs
View file @
815ab543
...
...
@@ -18,36 +18,20 @@ module Gargantext.Viz.Phylo.Metrics.Clustering
where
import
Data.Graph.Clustering.Louvain.CplusPlus
import
Data.List
(
last
,
concat
,
null
,
nub
,(
++
),
init
,
tail
,
elemIndex
,
groupBy
,(
!!
)
)
import
Data.List
(
concat
,
null
,
nub
,(
++
),
elemIndex
,
groupBy
,(
!!
),
(
\\
),
union
,
intersect
)
import
Data.Map
(
fromList
,
mapKeys
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
-- | To apply the related components method to a PhyloGraph
-- curr = the current PhyloGroup
-- (nodes,edges) = the initial PhyloGraph minus the current PhyloGroup
-- next = the next PhyloGroups to be added in the cluster
-- memo = the memory of the allready created clusters
relatedComp
::
Int
->
PhyloGroup
->
GroupGraph
->
[
PhyloGroup
]
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
relatedComp
idx
curr
(
nodes
,
edges
)
next
memo
|
null
nodes'
&&
null
next'
=
memo'
|
(
not
.
null
)
next'
=
relatedComp
idx
(
head'
"relatedComp1"
next'
)
(
nodes'
,
edges
)
(
tail
next'
)
memo'
|
otherwise
=
relatedComp
(
idx
+
1
)
(
head'
"relatedComp2"
nodes'
)
(
tail
nodes'
,
edges
)
[]
memo'
where
--------------------------------------
memo'
::
[[
PhyloGroup
]]
memo'
|
null
memo
=
[[
curr
]]
|
idx
==
((
length
memo
)
-
1
)
=
(
init
memo
)
++
[(
last
memo
)
++
[
curr
]]
|
otherwise
=
memo
++
[[
curr
]]
--------------------------------------
next'
::
[
PhyloGroup
]
next'
=
filter
(
\
x
->
not
$
elem
x
$
concat
memo
)
$
nub
$
next
++
(
getNeighbours
False
curr
edges
)
--------------------------------------
nodes'
::
[
PhyloGroup
]
nodes'
=
filter
(
\
x
->
not
$
elem
x
next'
)
nodes
--------------------------------------
relatedComp
::
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
relatedComp
graphs
=
foldl'
(
\
mem
groups
->
if
(
null
mem
)
then
mem
++
[
groups
]
else
let
related
=
filter
(
\
groups'
->
(
not
.
null
)
$
intersect
groups
groups'
)
mem
in
if
(
null
related
)
then
mem
++
[
groups
]
else
(
mem
\\
related
)
++
[
union
groups
(
nub
$
concat
related
)]
)
[]
graphs
louvain
::
([
GroupNode
],[
GroupEdge
])
->
IO
[[
PhyloGroup
]]
...
...
src/Gargantext/Viz/Phylo/Tools.hs
View file @
815ab543
...
...
@@ -110,6 +110,13 @@ listToDirectedCombi :: Eq a => [a] -> [(a,a)]
listToDirectedCombi
l
=
[(
x
,
y
)
|
x
<-
l
,
y
<-
l
,
x
/=
y
]
listToEqualCombi
::
Eq
a
=>
[
a
]
->
[(
a
,
a
)]
listToEqualCombi
l
=
[(
x
,
y
)
|
x
<-
l
,
y
<-
l
,
x
==
y
]
listToPairs
::
Eq
a
=>
[
a
]
->
[(
a
,
a
)]
listToPairs
l
=
(
listToEqualCombi
l
)
++
(
listToUnDirectedCombi
l
)
-- | To get all combinations of a list and apply a function to the resulting list of pairs
listToDirectedCombiWith
::
Eq
a
=>
forall
b
.
(
a
->
b
)
->
[
a
]
->
[(
b
,
b
)]
listToDirectedCombiWith
f
l
=
[(
f
x
,
f
y
)
|
x
<-
l
,
y
<-
l
,
x
/=
y
]
...
...
@@ -195,6 +202,9 @@ getPhyloDescription p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
getPhyloMatchingFrame
::
Phylo
->
Int
getPhyloMatchingFrame
p
=
_q_interTemporalMatchingFrame
$
_phyloParam_query
$
getPhyloParams
p
getPhyloProximity
::
Phylo
->
Proximity
getPhyloProximity
p
=
_q_interTemporalMatching
$
_phyloParam_query
$
getPhyloParams
p
getPhyloReBranchThr
::
Phylo
->
Double
getPhyloReBranchThr
p
=
_q_reBranchThr
$
_phyloParam_query
$
getPhyloParams
p
...
...
@@ -243,7 +253,7 @@ alterGroupWithLevel f lvl p = over ( phylo_periods
.
traverse
)
(
\
g
->
if
getGroupLevel
g
==
lvl
then
f
g
else
g
)
p
else
g
)
p
-- | To alter each list of PhyloGroups following a given function
...
...
stack.yaml
View file @
815ab543
...
...
@@ -35,6 +35,7 @@ extra-deps:
-
KMP-0.1.0.2
-
accelerate-1.2.0.0
-
aeson-lens-0.5.0.0
-
deepseq-th-0.1.0.4
-
duckling-0.1.3.0
-
full-text-search-0.2.1.4
-
fullstop-0.1.4
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment