Commit 4cc94395 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[flouvain] more work on the algorithm

parent 14159f5a
...@@ -88,13 +88,26 @@ fedgeWeight :: FEdge b -> Double ...@@ -88,13 +88,26 @@ fedgeWeight :: FEdge b -> Double
fedgeWeight = unWeight . fst fedgeWeight = unWeight . fst
type FGraph a b = Gr a (FEdge b) type FGraph a b = Gr a (FEdge b)
-- | This is the \Sum_in in formula (2) of Louvain paper -- Used for k_i in formula (2)
newtype NodeWeightSum = NodeWeightSum { unNodeWeightSum :: Double }
-- Used for k_i,in in formula (2)
newtype NodeComWeightSum = NodeComWeightSum { unNodeComWeightSum :: Double }
-- Probably this structure is better to reduce the number of computations
-- (precompute sum of node weights, which is the k_i variable in formula (2)).
-- type FNode a = (NodeWeightSum, a)
-- fnodeWeightSum :: FNode a -> Double
-- fnodeWeightSum = unNodeWeightSum . fst
-- | This is the m variable in formula (2) of the Louvain paper
newtype GraphWeightSum = GraphWeightSum { unGraphWeightSum :: Double }
-- | This is the \Sum_in in formula (2) of the Louvain paper
newtype InWeightSum = InWeightSum { unInWeightSum :: Double } newtype InWeightSum = InWeightSum { unInWeightSum :: Double }
instance Num InWeightSum where -- | This is the \Sum_tot in formula (2) of the Louvain paper
(InWeightSum w1) + (InWeightSum w2) = InWeightSum (w1 + w2)
(InWeightSum w1) * (InWeightSum w2) = InWeightSum (w1 * w2)
-- | This is the \Sum_tot in formula (2) of Louvain paper
newtype TotWeightSum = TotWeightSum { unTotWeightSum :: Double } newtype TotWeightSum = TotWeightSum { unTotWeightSum :: Double }
-- | Computed Delta_Q value in (2)
newtype DeltaQ = DeltaQ { unDeltaQ :: Double }
-- | Type for the clusters we will be creating.
newtype Community = Community { unCommunity :: ([Node], InWeightSum, TotWeightSum) } newtype Community = Community { unCommunity :: ([Node], InWeightSum, TotWeightSum) }
comNodes :: Community -> [Node] comNodes :: Community -> [Node]
comNodes (Community (ns, _, _)) = ns comNodes (Community (ns, _, _)) = ns
...@@ -104,6 +117,11 @@ type CGrEdge = (InWeightSum, TotWeightSum) ...@@ -104,6 +117,11 @@ type CGrEdge = (InWeightSum, TotWeightSum)
type CGr = Gr Community CGrEdge type CGr = Gr Community CGrEdge
graphWeight :: FGraph a b -> GraphWeightSum
graphWeight gr = GraphWeightSum $ ufold weight' 0 gr
where
weight' (p, _, _, s) acc = acc + (sum $ map (fedgeWeight . fst) $ p <> s)
-- ALGORITHM -- ALGORITHM
...@@ -111,26 +129,36 @@ type CGr = Gr Community CGrEdge ...@@ -111,26 +129,36 @@ type CGr = Gr Community CGrEdge
modularity :: Gr a b -> CGr -> Double modularity :: Gr a b -> CGr -> Double
modularity gr cgr = 0.0 modularity gr cgr = 0.0
type Delta a b = Gr a b -> Node -> Community -> Double type Delta a b = Community -> NodeWeightSum -> NodeComWeightSum -> GraphWeightSum -> DeltaQ
-- | Delta Q function from Louvain paper (2). -- | Delta Q function from Louvain paper (2).
delta :: Delta a b delta :: Delta a b
delta gr n (Community (ns, inWeightSum, totWeightSum)) = 0.0 delta com nws ncws gws = DeltaQ 0.0
-- | One iteration step takes the graph and existing communities as a graph and -- | One iteration step takes the graph and existing communities as a graph and
-- computes new community graph -- computes new community graph
-- NOTE: xdfsFoldWith only iterates with the defined Context. In each step of
-- the algorithm, we need full node information however (i.e. all edges
-- connected to a node), so instead of calling just 'step' we are forced to call via:
-- 'step . context gr . node''
-- We could avoid the higher complexity, eg. by precomputing the whole graph
-- into a HashMap Node [Edge].
iteration :: FGraph a b -> CGr -> CGr iteration :: FGraph a b -> CGr -> CGr
iteration gr cs = xdfsFoldWith suc' step cs (nodes gr) gr iteration gr cs = xdfsFoldWith suc' (\(_, v, _, _) -> step gw $ context gr $ v) cs (nodes gr) gr
where where
gw = graphWeight gr
--weightSum = ufold weightSum' 0 gr --weightSum = ufold weightSum' 0 gr
--weightSum' (p, v, l, s) acc = acc + (sum ) --weightSum' (p, v, l, s) acc = acc + (sum )
-- TODO Remember to filter out empty Communities -- TODO Remember to filter out empty Communities
-- | Step for one node. We try re-assign it to a neighbouring community, where -- | Step for one node. We try re-assign it to a neighbouring community, where
-- the increase of modularity for graph will be the largest -- the increase of modularity for graph will be the largest
step :: CFunFold a (FEdge b) CGr step :: GraphWeightSum -> CFunFold a (FEdge b) CGr
step (p, v, l, s) cgr = cgr step gw (p, v, l, s) cgr = cgr
where where
mNc :: Maybe (LNode Community)
mNc = nodeCommunity v cgr mNc = nodeCommunity v cgr
ncs :: [LNode Community]
ncs = nodeNeighbours v cgr ncs = nodeNeighbours v cgr
-- We move node from community nc into ncs -- We move node from community nc into ncs
moves :: Maybe (LNode Community, [LNode Community]) moves :: Maybe (LNode Community, [LNode Community])
...@@ -142,6 +170,17 @@ step (p, v, l, s) cgr = cgr ...@@ -142,6 +170,17 @@ step (p, v, l, s) cgr = cgr
makeMove :: Direction -> LNode Community -> LNode Community makeMove :: Direction -> LNode Community -> LNode Community
makeMove direction (cn, c) = (cn, moveNodeWithNeighbours (p <> s) v direction c) makeMove direction (cn, c) = (cn, moveNodeWithNeighbours (p <> s) v direction c)
-- k_i variable in formula (2)
ki :: NodeWeightSum
ki = NodeWeightSum $ sum $ map (fedgeWeight . fst) $ p <> s
-- k_i,in variable in formula (2)
kiin :: Maybe NodeComWeightSum
kiin = case mNc of
Nothing -> Nothing
Just (_, com) -> Just $
NodeComWeightSum $ sum $ map (fedgeWeight . fst) $ filter (\(l, n) -> n `elem` (comNodes com)) s
--bestFit = maximumBy --bestFit = maximumBy
-- TODO Compute \Delta Q (gain of moving node v into Community C) which consists of: -- TODO Compute \Delta Q (gain of moving node v into Community C) which consists of:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment