Commit 70e38ea9 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[FLouvain] second louvain step and runFLouvain cycle

parent 1acd124c
......@@ -4,7 +4,7 @@ cabal-version: 1.12
--
-- see: https://github.com/sol/hpack
--
-- hash: e1e53adbd24148d990b70f078530a01370effd8531413470ea9947aa095bf247
-- hash: 70b34ec62fea08353f69f29cc103940feb22097a32b1f061446843b348e5f727
name: clustering-louvain
version: 0.1.0.0
......@@ -30,6 +30,7 @@ library
Data.Graph.Clustering.Example
Data.Graph.Clustering.HLouvain
Data.Graph.Clustering.ILouvain
Data.Graph.Clustering.Louvain.Types
Paths_clustering_louvain
hs-source-dirs:
src
......
......@@ -4,28 +4,42 @@ import Protolude
import Control.Monad (foldM_)
import Data.List (nub, sort)
import Data.Graph.Clustering.Louvain.Utils
import Data.Graph.FGL
import Data.Graph.Inductive
import Data.Graph.Clustering.FLouvain
import qualified Data.Text as T
import qualified Text.ParserCombinators.Parsec as P
import Text.Parsec.Language (haskellStyle)
import qualified Text.Parsec.Token as PT
import Data.Graph.Clustering.Louvain.Utils
import Data.Graph.Clustering.Louvain.Types
import Data.Graph.Clustering.FLouvain
-- | Run FLouvain.iterate on an example graph
-- Example call:
-- putStrLn $ prettify $ iterateOnce cuiller
-- Prelude.map (fst3 . unCommunity . snd) $ labNodes $ iterateOnce karate
iterateOnce :: Gr () Double -> CGr
iterateOnce :: Gr a Double -> CGr a
iterateOnce gr = iteration fgr cgr
where
fgr = toFGraph gr
cgr = initialCGr fgr
runIterations :: Int -> Gr () Double -> IO ()
runIterations n gr = do
let fgr = toFGraph gr
runFLouvain :: (Show a, Eq a) => Int -> Int -> FGraph a () -> IO ()
runFLouvain 0 _ fgr = return ()
runFLouvain cycles n fgr = do
cgr <- runFIterations n fgr
let fgrNext = louvainSecondStep fgr cgr
putStrLn ("-----------------" :: Text)
putStrLn ("New FGraph:" :: Text)
putStrLn $ prettify fgrNext
runFLouvain (cycles - 1) n fgrNext
runIterations :: Show a => Int -> Gr a Double -> IO (CGr a)
runIterations n gr = runFIterations n $ toFGraph gr
runFIterations :: Show a => Int -> FGraph a () -> IO (CGr a)
runFIterations n fgr = do
let fgrWeight = graphWeight fgr
let initCgr = initialCGr fgr
......@@ -41,6 +55,8 @@ runIterations n gr = do
putStrLn ("Non-empty communities: " :: Text)
mapM_ (\c -> putStrLn (show c :: Text)) coms
return lastCgr
where
runIteration fgr fgrWeight iterCgr i = do
let iterNextCgr = iteration fgr iterCgr
......@@ -50,7 +66,7 @@ runIterations n gr = do
putStrLn $ T.unpack $ show $ modularity fgr iterNextCgr fgrWeight
return iterNextCgr
runLouvainFirstStepIterate :: Int -> Gr () Double -> (Modularity, CGr)
runLouvainFirstStepIterate :: Int -> Gr a Double -> (Modularity, CGr a)
runLouvainFirstStepIterate n gr = (modularity fgr cgr m, cgr)
where
fgr = toFGraph gr
......
......@@ -40,29 +40,13 @@ import Data.Graph.Inductive
import qualified Data.List as DL
import Data.Graph.FGL
-- "glue" : function to gather/merge communities
-- "klue" : function to split communities
data ClusteringMethod = Glue | Klue
deriving (Eq)
import Data.Graph.Clustering.Louvain.Utils (fixPt, mkFGraph)
import Data.Graph.Clustering.Louvain.Types
------------------------------------------------------------------------
-- | Fixed point with at most n iterations
-- 'Int' argument is the maximal number of iterations to make
-- 'a -> a' is the iterator function
-- 'a -> Bool' is the condition checking function ('True' continues looping, 'False' breaks it)
-- 'a' is the initial value
fixPt :: Int -> (a -> a) -> (a -> Bool) -> a -> a
fixPt 0 iterator _ init = iterator init
fixPt n iterator cond init =
if cond next
then fixPt (n - 1) iterator cond init
else next
where
next = iterator init
-- | Main Louvain first step iteration function
louvainFirstStepIterate :: Int -> FGraph a b -> CGr
louvainFirstStepIterate :: Int -> FGraph a b -> CGr a
louvainFirstStepIterate n gr = fixPt n iterator cond initCGr
where
initCGr = initialCGr gr
......@@ -72,95 +56,44 @@ louvainFirstStepIterate n gr = fixPt n iterator cond initCGr
-- | Second step from the Louvain paper -- given a clustering, create new graph
-- of clusters
louvainSecondStep :: FGraph a b -> CGr -> FGraph a b
louvainSecondStep gr cgr = gr
louvainSecondStep :: forall a b c. Eq c => FGraph a b -> CGr c -> FGraph (Community c) ()
louvainSecondStep gr cgr = mkFGraph nodes edges
where
nodes :: [LNode (Community c)]
--nodes = filter (\(_, com) -> length (comNodes com) > 0) $ labNodes cgr
nodes = labNodes cgr
edges :: [(Node, Node, Double)]
edges = concatMap comEdges $ labNodes cgr
comEdges :: LNode (Community c) -> [(Node, Node, Double)]
comEdges lnCom = mapMaybe (comToComEdge lnCom) $ labNodes cgr
comToComEdge :: LNode (Community c) -> LNode (Community c) -> Maybe (Node, Node, Double)
-- No self-edges
comToComEdge lnCom1 lnCom2 | lnCom1 == lnCom2 = Nothing
comToComEdge (_, com1) _ | length (comNodes com1) == 0 = Nothing
comToComEdge _ (_, com2) | length (comNodes com2) == 0 = Nothing
comToComEdge (com1N, com1) (com2N, com2) = Just (com1N, com2N, comToComWeight gr com1 com2)
-- | Weight between communities. Base graph is needed to fetch weights between
-- individual nodes.
comToComWeight :: FGraph a b -> Community c -> Community c -> Double
comToComWeight gr com1 com2 = weight
where
weight :: Double
weight = sum $ map (\n -> unNodeComWeightSum $ nodeComWeightSum com2 $ context gr n) $ comNodes com1
------------------------------------------------------------------------
-- Our basic graph. Nodes have custom labels. Edges have weight assigned to them.
newtype Weight = Weight { unWeight :: Double }
deriving (Show, Eq, Ord)
type FEdge b = (Weight, b)
fedgeWeight :: FEdge b -> Double
fedgeWeight = unWeight . fst
sumEdgeWeights :: Adj (FEdge b) -> Double
sumEdgeWeights es = sum $ map (fedgeWeight . fst) es
type FGraph a b = Gr a (FEdge b)
-- | Used for k_i in formula (2)
-- (sum of the weights of the links incident to node i)
newtype NodeWeightSum = NodeWeightSum { unNodeWeightSum :: Double }
deriving (Show, Eq, Ord)
nodeWeightSum :: Context a (FEdge b) -> NodeWeightSum
nodeWeightSum (p, _, _, s) = NodeWeightSum $ sumEdgeWeights $ p <> s
-- |Used for k_i,in in formula (2)
-- (Sum of weights of links from a given 'Node' to nodes in a given 'Community')
newtype NodeComWeightSum = NodeComWeightSum { unNodeComWeightSum :: Double }
deriving (Show, Eq, Ord)
nodeComWeightSum :: Community -> Context a (FEdge b) -> NodeComWeightSum
nodeComWeightSum com (p, _, _, s) =
NodeComWeightSum $ sumEdgeWeights $ filter (\(_, n) -> n `elem` comNodes com) $ p <> s
newtype NodeNonComWeightSum = NodeNonComWeightSum { unNodeNonComWeightSum :: Double }
deriving (Show, Eq, Ord)
nodeNonComWeightSum :: Community -> Context a (FEdge b) -> NodeNonComWeightSum
nodeNonComWeightSum com (p, _, _, s) =
NodeNonComWeightSum $ sumEdgeWeights $ filter (\(_, n) -> n `notElem` comNodes com) $ p <> s
-- Probably this structure is better to reduce the number of computations
-- (precompute sum of node weights, which is the k_i variable in formula (2)).
-- type FNode a = (NodeWeightSum, a)
-- fnodeWeightSum :: FNode a -> Double
-- fnodeWeightSum = unNodeWeightSum . fst
-- | This is the m variable in formula (2) of the Louvain paper
newtype GraphWeightSum = GraphWeightSum { unGraphWeightSum :: Double }
deriving (Show, Eq, Ord)
-- | This is the \Sum_in in formula (2) of the Louvain paper
-- (sum of the weights of the links inside C)
newtype InWeightSum = InWeightSum { unInWeightSum :: Double }
deriving (Show, Eq, Ord)
-- | This is the \Sum_tot in formula (2) of the Louvain paper
-- (sum of the weights of the links incident to nodes in C)
newtype TotWeightSum = TotWeightSum { unTotWeightSum :: Double }
deriving (Show, Eq, Ord)
-- | Computed Delta_Q value in (2)
newtype DeltaQ = DeltaQ { unDeltaQ :: Double }
deriving (Show, Eq, Ord)
newtype Modularity = Modularity { unModularity :: Double }
deriving (Show, Eq, Ord)
-- | Type for the clusters we will be creating.
newtype Community = Community { unCommunity :: ([Node], InWeightSum, TotWeightSum) }
deriving (Show, Eq, Ord)
comNodes :: Community -> [Node]
comNodes (Community (ns, _, _)) = ns
comInWeightSum :: Community -> InWeightSum
comInWeightSum (Community (_, inWeightSum, _)) = inWeightSum
comTotWeightSum :: Community -> TotWeightSum
comTotWeightSum (Community (_, _, totWeightSum)) = totWeightSum
type CGrNode = Node
type CGrEdge = (InWeightSum, TotWeightSum)
type CGr = Gr Community ()
graphWeight :: FGraph a b -> GraphWeightSum
graphWeight gr = GraphWeightSum $ 0.5 * ufold (\(_, n, _, _) -> weight' $ context gr n) 0 gr
where
weight' (p, _, _, s) acc = acc + (sumEdgeWeights $ p <> s)
-- | Compute initial 'CGr' for a given 'FGraph a b'. This means, put each node
-- in a separate community.
initialCGr :: FGraph a b -> CGr
initialCGr :: FGraph a b -> CGr a
initialCGr gr = gmap singletonCom gr
where
-- A singleton community is given:
-- the same node id for a community
-- same incoming/outgoing edges
-- custom Community label
singletonCom (p, v, _, s) = (p', v, Community ([v], iws, tws), s')
singletonCom (p, v, l, s) = (p', v, Community ([v], iws, tws, l), s')
where
p' = map edgeComRemap p
s' = map edgeComRemap s
......@@ -177,7 +110,7 @@ initialCGr gr = gmap singletonCom gr
-- | Q function from Louvain paper (1).
-- We just fold over the communities (this is because of the delta(c_i, c_j)
-- param)
modularity :: FGraph a b -> CGr -> GraphWeightSum -> Modularity
modularity :: FGraph a b -> CGr c -> GraphWeightSum -> Modularity
modularity gr cgr m = Modularity $ coeff * ( ufold modularity' 0.0 cgr )
where
coeff = 0.5 / (unGraphWeightSum m)
......@@ -194,9 +127,9 @@ modularity gr cgr m = Modularity $ coeff * ( ufold modularity' 0.0 cgr )
ki :: Node -> Double
ki n = unNodeWeightSum $ nodeWeightSum $ context gr n
type Delta = Community -> NodeWeightSum -> NodeComWeightSum -> GraphWeightSum -> DeltaQ
type Delta c = Community c -> NodeWeightSum -> NodeComWeightSum -> GraphWeightSum -> DeltaQ
-- | Delta Q function from Louvain paper (2).
delta :: Delta
delta :: Delta c
delta com ki kiin m = DeltaQ $ acc - dec
where
inWeightSum = comInWeightSum com
......@@ -218,7 +151,7 @@ delta com ki kiin m = DeltaQ $ acc - dec
-- 'step . context gr . node''
-- We could avoid the higher complexity, eg. by precomputing the whole graph
-- into a HashMap Node [Edge].
iteration :: FGraph a b -> CGr -> CGr
iteration :: FGraph a b -> CGr c -> CGr c
iteration gr cs = xdfsFoldWith suc' (\(_, v, _, _)
-> step gw $ context gr $ v) cs (nodes gr) gr
where
......@@ -229,7 +162,7 @@ iteration gr cs = xdfsFoldWith suc' (\(_, v, _, _)
-- TODO Remember to filter out empty Communities
-- | Step for one node. We try re-assign it to a neighbouring community, where
-- the increase of modularity for graph will be the largest
step :: GraphWeightSum -> CFunFold a (FEdge b) CGr
step :: forall a b c. GraphWeightSum -> CFunFold a (FEdge b) (CGr c)
step gw ctx@(_, v, _, _) cgr = newCgr
where
newCgr = case mNc of
......@@ -246,9 +179,9 @@ step gw ctx@(_, v, _, _) cgr = newCgr
(bestFitCom, DeltaQ bestFitdq) =
maximumBy (\(_, deltaq1) (_, deltaq2) -> compare deltaq1 deltaq2) deltas
mNc :: Maybe (LNode Community)
mNc :: Maybe (LNode (Community c))
mNc = nodeCommunity v cgr
ncs :: [LNode Community]
ncs :: [LNode (Community c)]
ncs = nodeNeighbours v cgr
-- We move node from community nc into ncs
......@@ -258,17 +191,17 @@ step gw ctx@(_, v, _, _) cgr = newCgr
-- Just nc -> Just ( makeMove OutOf nc
-- , map (makeMove Into) ncs )
makeMove :: Direction -> LNode Community -> LNode Community
makeMove :: Direction -> LNode (Community c) -> LNode (Community c)
makeMove direction (cn, c) = (cn, moveNodeWithContext ctx direction c)
-- k_i variable in formula (2)
ki :: NodeWeightSum
ki = nodeWeightSum ctx
deltas :: [(LNode Community, DeltaQ)]
deltas :: [(LNode (Community c), DeltaQ)]
deltas = map (\c -> (c, delta' c)) ncs
delta' :: LNode Community -> DeltaQ
delta' :: LNode (Community c) -> DeltaQ
delta' com = delta (llab com) ki kiin gw
where
-- k_i,in variable in formula (2)
......@@ -291,14 +224,14 @@ data Direction = Into | OutOf
-- | Given 'Node' and 'Community' graph, find the 'LNode' of 'Community' which
-- contains the node
nodeCommunity :: Node -> CGr -> Maybe (LNode Community)
nodeCommunity :: Node -> CGr c -> Maybe (LNode (Community c))
nodeCommunity n cgr = head (filter f $ labNodes cgr)
where
f :: (a, Community) -> Bool
f :: (a, Community c) -> Bool
f (_, com) = n `elem` comNodes com
-- | Find 'LNode's of 'Community' graph neighbouring a given node
nodeNeighbours :: Node -> CGr -> [LNode Community]
nodeNeighbours :: Node -> CGr c -> [LNode (Community c)]
nodeNeighbours n cgr =
case nodeCommunity n cgr of
Nothing -> []
......@@ -312,16 +245,16 @@ nodeNeighbours n cgr =
-- Just (cn, _) -> lneighbors cgr cn
-- | Moves 'Node' in the 'Direction' of 'Community' and recomputes 'Community''s weights
moveNode :: forall a b. FGraph a b -> Node -> Direction -> Community -> Community
moveNode :: forall a b c. FGraph a b -> Node -> Direction -> Community c -> Community c
moveNode gr n direction c = moveNodeWithContext ctx direction c
where
ctx :: Context a (FEdge b)
ctx = context gr n
-- | Same as 'moveNode' above but with only node context, not whole graph
moveNodeWithContext :: forall a b. Context a (FEdge b) -> Direction -> Community -> Community
moveNodeWithContext ctx@(_, n, _, _) direction com@(Community (ns, inwsum, totwsum)) =
Community (newNs, InWeightSum newInWsum, TotWeightSum newTotWsum)
moveNodeWithContext :: forall a b c. Context a (FEdge b) -> Direction -> Community c -> Community c
moveNodeWithContext ctx@(_, n, _, _) direction com@(Community (ns, inwsum, totwsum, l)) =
Community (newNs, InWeightSum newInWsum, TotWeightSum newTotWsum, l)
where
newNs = case direction of
Into -> sort (n:ns)
......
......@@ -19,11 +19,11 @@ References:
module Data.Graph.Clustering.Louvain
where
import Data.Tuple.Extra (fst3)
import Data.List (maximumBy, nub, intersect, foldl', zipWith, concat)
import Data.Graph.Inductive
import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..), toFGraph)
import Data.Graph.Clustering.FLouvain (louvainFirstStepIterate, Community(..), initialCGr)
import Data.Graph.Clustering.FLouvain (louvainFirstStepIterate, initialCGr)
import Data.Graph.Clustering.Louvain.Types (Community(..), comNodes)
------------------------------------------------------------------------
-- | Definitions
------------------------------------------------------------------------
......@@ -35,7 +35,7 @@ type Reverse = Bool
------------------------------------------------------------------------
flouvain :: Int -> Gr () Double -> [[Node]]
flouvain n g = map (fst3 . unCommunity . snd) $ labNodes g'
flouvain n g = map (comNodes . snd) $ labNodes g'
where
g' = louvainFirstStepIterate n (toFGraph g)
------------------------------------------------------------------------
......
module Data.Graph.Clustering.Louvain.Types where
import Protolude
import Data.Graph.Inductive
-- "glue" : function to gather/merge communities
-- "klue" : function to split communities
data ClusteringMethod = Glue | Klue
deriving (Eq)
newtype Weight = Weight { unWeight :: Double }
deriving (Show, Eq, Ord)
type FEdge b = (Weight, b)
fedgeWeight :: FEdge b -> Double
fedgeWeight = unWeight . fst
sumEdgeWeights :: Adj (FEdge b) -> Double
sumEdgeWeights es = sum $ map (fedgeWeight . fst) es
-- Our basic graph. Nodes have custom labels. Edges have weight assigned to them.
type FGraph a b = Gr a (FEdge b)
-- | Used for k_i in formula (2)
-- (sum of the weights of the links incident to node i)
newtype NodeWeightSum = NodeWeightSum { unNodeWeightSum :: Double }
deriving (Show, Eq, Ord)
nodeWeightSum :: Context a (FEdge b) -> NodeWeightSum
nodeWeightSum (p, _, _, s) = NodeWeightSum $ sumEdgeWeights $ p <> s
-- | Used for k_i,in in formula (2)
-- (Sum of weights of links from a given 'Node' to nodes in a given 'Community')
newtype NodeComWeightSum = NodeComWeightSum { unNodeComWeightSum :: Double }
deriving (Show, Eq, Ord)
nodeComWeightSum :: Community c -> Context a (FEdge b) -> NodeComWeightSum
nodeComWeightSum com (p, _, _, s) =
NodeComWeightSum $ sumEdgeWeights $ filter (\(_, n) -> n `elem` comNodes com) $ p <> s
newtype NodeNonComWeightSum = NodeNonComWeightSum { unNodeNonComWeightSum :: Double }
deriving (Show, Eq, Ord)
nodeNonComWeightSum :: Community c -> Context a (FEdge b) -> NodeNonComWeightSum
nodeNonComWeightSum com (p, _, _, s) =
NodeNonComWeightSum $ sumEdgeWeights $ filter (\(_, n) -> n `notElem` comNodes com) $ p <> s
-- Probably this structure is better to reduce the number of computations
-- (precompute sum of node weights, which is the k_i variable in formula (2)).
-- type FNode a = (NodeWeightSum, a)
-- fnodeWeightSum :: FNode a -> Double
-- fnodeWeightSum = unNodeWeightSum . fst
-- | This is the m variable in formula (2) of the Louvain paper
newtype GraphWeightSum = GraphWeightSum { unGraphWeightSum :: Double }
deriving (Show, Eq, Ord)
-- | This is the \Sum_in in formula (2) of the Louvain paper
-- (sum of the weights of the links inside C)
newtype InWeightSum = InWeightSum { unInWeightSum :: Double }
deriving (Show, Eq, Ord)
-- | This is the \Sum_tot in formula (2) of the Louvain paper
-- (sum of the weights of the links incident to nodes in C)
newtype TotWeightSum = TotWeightSum { unTotWeightSum :: Double }
deriving (Show, Eq, Ord)
-- | Computed Delta_Q value in (2)
newtype DeltaQ = DeltaQ { unDeltaQ :: Double }
deriving (Show, Eq, Ord)
-- | Computed modularity in (1)
newtype Modularity = Modularity { unModularity :: Double }
deriving (Show, Eq, Ord)
-- | Type for the clusters we will be creating.
newtype Community a = Community { unCommunity :: ([Node], InWeightSum, TotWeightSum, a) }
deriving (Show, Eq, Ord)
comNodes :: Community c -> [Node]
comNodes (Community (ns, _, _, _)) = ns
comInWeightSum :: Community c -> InWeightSum
comInWeightSum (Community (_, inWeightSum, _, _)) = inWeightSum
comTotWeightSum :: Community c -> TotWeightSum
comTotWeightSum (Community (_, _, totWeightSum, _)) = totWeightSum
comLabel :: Community c -> c
comLabel (Community (_, _, _, c)) = c
type CGrNode = Node
type CGrEdge = (InWeightSum, TotWeightSum)
type CGr a = Gr (Community a) ()
graphWeight :: FGraph a b -> GraphWeightSum
graphWeight gr = GraphWeightSum $ 0.5 * ufold (\(_, n, _, _) -> weight' $ context gr n) 0 gr
where
weight' (p, _, _, s) acc = acc + (sumEdgeWeights $ p <> s)
......@@ -19,7 +19,7 @@ import Data.Graph.Inductive
import Data.List (lookup, nub)
import qualified Data.Map.Strict as Map
import Data.Graph.Clustering.FLouvain (FGraph, Weight(..))
import Data.Graph.Clustering.Louvain.Types
data LouvainNode = LouvainNode { l_node_id :: Int
, l_community_id :: Int
......@@ -63,3 +63,17 @@ toFGraph gr = gmap remap gr
edgeMap (w, n) = ((Weight w, ()), n)
p' = map edgeMap p
s' = map edgeMap s
-- | Fixed point with at most n iterations
-- 'Int' argument is the maximal number of iterations to make
-- 'a -> a' is the iterator function
-- 'a -> Bool' is the condition checking function ('True' continues looping, 'False' breaks it)
-- 'a' is the initial value
fixPt :: Int -> (a -> a) -> (a -> Bool) -> a -> a
fixPt 0 iterator _ init = iterator init
fixPt n iterator cond init =
if cond next
then fixPt (n - 1) iterator cond init
else next
where
next = iterator init
......@@ -22,7 +22,7 @@ replaceLNode gr (n, ln) = gmap replacer gr
-- | Find LNode of a node (i.e. a node with label)
lnode :: (Graph gr) => gr a b -> Node -> Maybe (LNode a)
lnode cgr n = case lab cgr n of
lnode gr n = case lab gr n of
Nothing -> Nothing
Just l -> Just (n, l)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment