1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
{-|
Module : Gargantext.Core.Viz.Graph.Bridgeness
Description : Bridgeness filter
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Let be a graph Bridgeness filters inter-communities links in two ways.
If the partitions are known, filtering is uniform to expose the communities clearly for the beginners.
But
uniformly
filters inter-communities links.
TODO use Map LouvainNodeId (Map LouvainNodeId)
-}
{-# LANGUAGE BangPatterns #-}
module Gargantext.Core.Viz.Graph.Bridgeness -- (bridgeness)
where
import Data.Map.Strict (Map, fromListWith, lookup, toList, mapWithKey, elems)
import Data.Maybe (catMaybes)
import Data.Ord (Down(..))
import Data.Set (Set)
import Data.Tuple.Extra (swap)
import Debug.Trace (trace)
import Gargantext.Core.Methods.Similarities (Similarity(..))
import Gargantext.Prelude
import Prelude (pi)
import Graph.Types (ClusterNode(..))
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Data.Tuple.Extra as Tuple
import qualified Data.IntMap as Dico
----------------------------------------------------------------------
type Partitions = Map (Int, Int) Double -> IO [ClusterNode]
type Partitions' = Map (Int, Int) Double -> IO [Set NodeId]
----------------------------------------------------------------------
nodeId2comId :: ClusterNode -> (NodeId, CommunityId)
nodeId2comId (ClusterNode i1 i2) = (i1, i2)
type NodeId = Int
type CommunityId = Int
----------------------------------------------------------------------
-- recursiveClustering : to get more granularity of a given clustering
-- tested with spinglass clustering only (WIP)
recursiveClustering' :: Partitions' -> Map (Int, Int) Double -> IO [[Set NodeId]]
recursiveClustering' f mp = do
let
n :: Double
n = fromIntegral $ Set.size
$ Set.unions $ List.concat
$ map (\(k1,k2) -> map Set.singleton [k1, k2])
$ Map.keys mp
t :: Int
t = round $ 2 * n / sqrt n
ss <- f mp
mapM (\s -> if Set.size s > t then f (removeNodes s mp) else pure [s]) ss
----------------------------------------------------------------------
recursiveClustering :: Partitions -> Map (Int, Int) Double -> IO [ClusterNode]
recursiveClustering f mp = do
let
n :: Double
n = fromIntegral $ Set.size
$ Set.unions $ List.concat
$ map (\(k1,k2) -> map Set.singleton [k1, k2])
$ Map.keys mp
t :: Int
t = round $ 2 * n / sqrt n
(toSplit,others) <- List.span (\a -> Set.size a > t) <$> clusterNodes2sets <$> f mp
cls' <- mapM f $ map (\s -> removeNodes s mp) toSplit
pure $ setNodes2clusterNodes $ others <> (List.concat $ map clusterNodes2sets cls')
----------------------------------------------------------------------
setNodes2clusterNodes :: [Set NodeId] -> [ClusterNode]
setNodes2clusterNodes ns = List.concat $ map (\(n,ns') -> toCluster n ns') $ zip [1..] ns
where
toCluster :: CommunityId -> Set NodeId -> [ClusterNode]
toCluster cId setNodeId = map (\n -> ClusterNode n cId) (Set.toList setNodeId)
clusterNodes2map :: [ClusterNode] -> Map NodeId Int
clusterNodes2map = Map.fromList . map (\(ClusterNode nId cId) -> (nId, cId))
removeNodes :: Set NodeId
-> Map (NodeId, NodeId) Double
-> Map (NodeId, NodeId) Double
removeNodes s = Map.filterWithKey (\(n1,n2) _v -> Set.member n1 s && Set.member n2 s)
clusterNodes2sets :: [ClusterNode] -> [Set NodeId]
clusterNodes2sets = Dico.elems
. Dico.fromListWith (<>)
. (map ((Tuple.second Set.singleton) . swap . nodeId2comId))
----------------------------------------------------------------------
data Bridgeness = Bridgeness_Basic { bridgeness_partitions :: [ClusterNode]
, bridgeness_filter :: Double
}
| Bridgeness_Advanced { bridgeness_similarity :: Similarity
, bridgness_confluence :: Confluence
}
| Bridgeness_Recursive { br_partitions :: [[Set NodeId]]
, br_filter :: Double
, br_similarity :: Similarity
}
type Confluence = Map (NodeId, NodeId) Double
-- Filter Links between the Clusters
-- Links: Map (NodeId, NodeId) Double
-- List of Clusters: [Set NodeId]
bridgeness :: Bridgeness
-> Map (NodeId, NodeId) Double
-> Map (NodeId, NodeId) Double
bridgeness (Bridgeness_Recursive sn f sim) m =
Map.unions $ [linksBetween] <> map (\s -> bridgeness (Bridgeness_Basic (setNodes2clusterNodes s) (if sim == Conditional then pi*f else f)) m') sn
where
(linksBetween, m') = Map.partitionWithKey (\(n1,n2) _v -> Map.lookup n1 mapNodeIdClusterId
/= Map.lookup n2 mapNodeIdClusterId
) $ bridgeness (Bridgeness_Basic clusters f) m
clusters = setNodes2clusterNodes (map Set.unions sn)
mapNodeIdClusterId = clusterNodes2map clusters
bridgeness (Bridgeness_Advanced sim c) m = Map.fromList
$ List.filter (\x -> if sim == Conditional then snd x > 0.2 else snd x > 0.02)
$ map (\(ks, (v1,_v2)) -> (ks,v1))
$ Map.toList
$ Map.intersectionWithKey
(\k v1 v2 -> trace ("intersectionWithKey " <> (show (k, v1, v2))) (v1, v2)) m c
bridgeness (Bridgeness_Basic ns b) m = Map.fromList
$ List.concat
$ Map.elems
$ filterComs (round b)
$ groupEdges (Map.fromList $ map nodeId2comId ns) m
groupEdges :: (Ord comId, Ord nodeId)
=> Map nodeId comId
-> Map (nodeId, nodeId) value
-> Map (comId, comId) [((nodeId, nodeId), value)]
groupEdges m = fromListWith (<>)
. catMaybes
. map (\((n1,n2), d)
-> let
n1n2_m = (,) <$> lookup n1 m <*> lookup n2 m
n1n2_d = Just [((n1,n2),d)]
in (,) <$> n1n2_m <*> n1n2_d
)
. toList
filterComs :: (Ord n1, Eq n2)
=> Int
-> Map (n2, n2) [(a3, n1)]
-> Map (n2, n2) [(a3, n1)]
filterComs b m = Map.filter (\n -> length n > 0) $ mapWithKey filter' m
where
filter' (c1,c2) a
| c1 == c2 = a
-- TODO use n here
| otherwise = take (b * 2*n) $ List.sortOn (Down . snd) a
where
n :: Int
n = round $ 100 * a' / t
a'= fromIntegral $ length a
t :: Double
t = fromIntegral $ length $ List.concat $ elems m
--------------------------------------------------------------
-- Utils
{--
map2intMap :: Map (Int, Int) a -> IntMap (IntMap a)
map2intMap m = IntMap.fromListWith (<>)
$ map (\((k1,k2), v) -> if k1 < k2
then (k1, IntMap.singleton k2 v)
else (k2, IntMap.singleton k1 v)
)
$ Map.toList m
look :: (Int,Int) -> IntMap (IntMap a) -> Maybe a
look (k1,k2) m = if k1 < k2
then case (IntMap.lookup k1 m) of
Just m' -> IntMap.lookup k2 m'
_ -> Nothing
else look (k2,k1) m
{-
Compute the median of a list
From: https://hackage.haskell.org/package/dsp-0.2.5.1/docs/src/Numeric.Statistics.Median.html
Compute the center of the list in a more lazy manner
and thus halves memory requirement.
-}
median :: (Ord a, Fractional a) => [a] -> a
median [] = panic "medianFast: empty list has no median"
median zs =
let recurse (x0:_) (_:[]) = x0
recurse (x0:x1:_) (_:_:[]) = (x0+x1)/2
recurse (_:xs) (_:_:ys) = recurse xs ys
recurse _ _ =
panic "median: this error cannot occur in the way 'recurse' is called"
in recurse zs zs
-}