[flouvain] more work on the algorithm

4cc94395 · Przemyslaw Kaminski · 14159f5a · 4cc94395
Commit 4cc94395 authored Mar 21, 2020 by Przemyslaw Kaminski
Hide whitespace changes
Inline Side-by-side

Showing with 49 additions and 10 deletions

FLouvain.hs src/Data/Graph/Clustering/FLouvain.hs +49 -10

No files found.
--- a/src/Data/Graph/Clustering/FLouvain.hs
+++ b/src/Data/Graph/Clustering/FLouvain.hs
@@ -88,13 +88,26 @@ fedgeWeight :: FEdge b -> Double
 fedgeWeight = unWeight . fst
 type FGraph a b = Gr a (FEdge b)
-- | This is the \Sum_in in formula (2) of Louvain paper
+-- Used for k_i in formula (2)
+newtype NodeWeightSum = NodeWeightSum { unNodeWeightSum :: Double }
+-- Used for k_i,in in formula (2)
+newtype NodeComWeightSum = NodeComWeightSum { unNodeComWeightSum :: Double }
+-- Probably this structure is better to reduce the number of computations
+-- (precompute sum of node weights, which is the k_i variable in formula (2)).
+-- type FNode a = (NodeWeightSum, a)
+-- fnodeWeightSum :: FNode a -> Double
+-- fnodeWeightSum = unNodeWeightSum . fst
+-- | This is the m variable in formula (2) of the Louvain paper
+newtype GraphWeightSum = GraphWeightSum { unGraphWeightSum :: Double }
+-- | This is the \Sum_in in formula (2) of the Louvain paper
 newtype InWeightSum = InWeightSum { unInWeightSum :: Double }
-instance Num InWeightSum where
+-- | This is the \Sum_tot in formula (2) of the Louvain paper
-  (InWeightSum w1) + (InWeightSum w2) = InWeightSum (w1 + w2)
-  (InWeightSum w1) * (InWeightSum w2) = InWeightSum (w1 * w2)
-- | This is the \Sum_tot in formula (2) of Louvain paper
 newtype TotWeightSum = TotWeightSum { unTotWeightSum :: Double }
+-- | Computed Delta_Q value in (2)
+newtype DeltaQ = DeltaQ { unDeltaQ :: Double }
+-- | Type for the clusters we will be creating.
 newtype Community = Community { unCommunity ::  ([Node], InWeightSum, TotWeightSum) }
 comNodes :: Community -> [Node]
 comNodes (Community (ns, _, _)) = ns
@@ -104,6 +117,11 @@ type CGrEdge = (InWeightSum, TotWeightSum)
 type CGr = Gr Community CGrEdge
+graphWeight :: FGraph a b -> GraphWeightSum
+graphWeight gr = GraphWeightSum $ ufold weight' 0 gr
+  where
+    weight' (p, _, _, s) acc = acc + (sum $ map (fedgeWeight . fst) $ p <> s)
 -- ALGORITHM
@@ -111,26 +129,36 @@ type CGr = Gr Community CGrEdge
 modularity :: Gr a b -> CGr -> Double
 modularity gr cgr = 0.0
-type Delta a b = Gr a b -> Node -> Community -> Double
+type Delta a b = Community -> NodeWeightSum -> NodeComWeightSum -> GraphWeightSum -> DeltaQ
 -- | Delta Q function from Louvain paper (2).
 delta :: Delta a b
-delta gr n (Community (ns, inWeightSum, totWeightSum)) = 0.0
+delta com nws ncws gws = DeltaQ 0.0
 -- | One iteration step takes the graph and existing communities as a graph and
 -- computes new community graph
+-- NOTE: xdfsFoldWith only iterates with the defined Context. In each step of
+-- the algorithm, we need full node information however (i.e. all edges
+-- connected to a node), so instead of calling just 'step' we are forced to call via:
+-- 'step . context gr . node''
+-- We could avoid the higher complexity, eg. by precomputing the whole graph
+-- into a HashMap Node [Edge].
 iteration :: FGraph a b -> CGr -> CGr
-iteration gr cs = xdfsFoldWith suc' step cs (nodes gr) gr
+iteration gr cs = xdfsFoldWith suc' (\(_, v, _, _) -> step gw $ context gr $ v) cs (nodes gr) gr
  where
+    gw = graphWeight gr
    --weightSum = ufold weightSum' 0 gr
    --weightSum' (p, v, l, s) acc = acc + (sum )
 -- TODO Remember to filter out empty Communities
 -- | Step for one node. We try re-assign it to a neighbouring community, where
 -- the increase of modularity for graph will be the largest
-step :: CFunFold a (FEdge b) CGr
+step :: GraphWeightSum -> CFunFold a (FEdge b) CGr
-step (p, v, l, s) cgr = cgr
+step gw (p, v, l, s) cgr = cgr
  where
+    mNc :: Maybe (LNode Community)
    mNc = nodeCommunity  v cgr
+    ncs :: [LNode Community]
    ncs = nodeNeighbours v cgr
    -- We move node from community nc into ncs
    moves :: Maybe (LNode Community, [LNode Community])
@@ -142,6 +170,17 @@ step (p, v, l, s) cgr = cgr
    makeMove :: Direction -> LNode Community -> LNode Community
    makeMove direction (cn, c) = (cn, moveNodeWithNeighbours (p <> s) v direction c)
+    -- k_i variable in formula (2)
+    ki :: NodeWeightSum
+    ki = NodeWeightSum $ sum $ map (fedgeWeight . fst) $ p <> s
+    -- k_i,in variable in formula (2)
+    kiin :: Maybe NodeComWeightSum
+    kiin = case mNc of
+      Nothing -> Nothing
+      Just (_, com) -> Just $
+          NodeComWeightSum $ sum $ map (fedgeWeight . fst) $ filter (\(l, n) -> n `elem` (comNodes com)) s
    --bestFit = maximumBy
    -- TODO Compute \Delta Q (gain of moving node v into Community C) which consists of: