Commit 1dd094f7 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'flouvain' of ssh://gitlab.iscpif.fr:20022/gargantext/clustering-louvain into flouvain

parents 2659cf0e b97763c7
......@@ -88,13 +88,26 @@ fedgeWeight :: FEdge b -> Double
fedgeWeight = unWeight . fst
type FGraph a b = Gr a (FEdge b)
-- | This is the \Sum_in in formula (2) of Louvain paper
-- Used for k_i in formula (2)
newtype NodeWeightSum = NodeWeightSum { unNodeWeightSum :: Double }
-- Used for k_i,in in formula (2)
newtype NodeComWeightSum = NodeComWeightSum { unNodeComWeightSum :: Double }
-- Probably this structure is better to reduce the number of computations
-- (precompute sum of node weights, which is the k_i variable in formula (2)).
-- type FNode a = (NodeWeightSum, a)
-- fnodeWeightSum :: FNode a -> Double
-- fnodeWeightSum = unNodeWeightSum . fst
-- | This is the m variable in formula (2) of the Louvain paper
newtype GraphWeightSum = GraphWeightSum { unGraphWeightSum :: Double }
-- | This is the \Sum_in in formula (2) of the Louvain paper
newtype InWeightSum = InWeightSum { unInWeightSum :: Double }
instance Num InWeightSum where
(InWeightSum w1) + (InWeightSum w2) = InWeightSum (w1 + w2)
(InWeightSum w1) * (InWeightSum w2) = InWeightSum (w1 * w2)
-- | This is the \Sum_tot in formula (2) of Louvain paper
-- | This is the \Sum_tot in formula (2) of the Louvain paper
newtype TotWeightSum = TotWeightSum { unTotWeightSum :: Double }
-- | Computed Delta_Q value in (2)
newtype DeltaQ = DeltaQ { unDeltaQ :: Double }
-- | Type for the clusters we will be creating.
newtype Community = Community { unCommunity :: ([Node], InWeightSum, TotWeightSum) }
comNodes :: Community -> [Node]
comNodes (Community (ns, _, _)) = ns
......@@ -108,6 +121,11 @@ type CGrEdge = (InWeightSum, TotWeightSum)
type CGr = Gr Community CGrEdge
graphWeight :: FGraph a b -> GraphWeightSum
graphWeight gr = GraphWeightSum $ ufold weight' 0 gr
where
weight' (p, _, _, s) acc = acc + (sum $ map (fedgeWeight . fst) $ p <> s)
-- ALGORITHM
......@@ -115,36 +133,46 @@ type CGr = Gr Community CGrEdge
modularity :: Gr a b -> CGr -> Double
modularity gr cgr = 0.0
type Delta = Community -> Weight -> Weight -> Weight -> Double
type Delta = Community -> NodeWeightSum -> NodeComWeightSum -> GraphWeightSum -> DeltaQ
-- | Delta Q function from Louvain paper (2).
delta :: Delta
delta com ki kin m = acc - dec
delta com ki kin m = DeltaQ $ acc - dec
where
inWeightSum = comInWeightSum com
totWeightSum = comTotWeightSum com
acc = accL - accR*accR
accL = 0.5*(unInWeightSum inWeightSum + 2.0*(unWeight kin)) / (unWeight m)
accR = 0.5*(unTotWeightSum totWeightSum + unWeight ki) / (unWeight m)
dec = decL - decM*decM - decR*decR
decL = 0.5*(unInWeightSum inWeightSum) / (unWeight m)
decM = 0.5*(unTotWeightSum totWeightSum) / (unWeight m)
decR = 0.5*(unWeight ki) / (unWeight m)
accL = 0.5 * (unInWeightSum inWeightSum + 2.0 * (unNodeComWeightSum kin)) / (unGraphWeightSum m)
accR = 0.5 * (unTotWeightSum totWeightSum + unNodeWeightSum ki) / (unGraphWeightSum m)
dec = decL - decM * decM - decR * decR
decL = 0.5 * (unInWeightSum inWeightSum) / (unGraphWeightSum m)
decM = 0.5 * (unTotWeightSum totWeightSum) / (unGraphWeightSum m)
decR = 0.5 * (unNodeWeightSum ki) / (unGraphWeightSum m)
-- | One iteration step takes the graph and existing communities as a graph and
-- computes new community graph
-- NOTE: xdfsFoldWith only iterates with the defined Context. In each step of
-- the algorithm, we need full node information however (i.e. all edges
-- connected to a node), so instead of calling just 'step' we are forced to call via:
-- 'step . context gr . node''
-- We could avoid the higher complexity, eg. by precomputing the whole graph
-- into a HashMap Node [Edge].
iteration :: FGraph a b -> CGr -> CGr
iteration gr cs = xdfsFoldWith suc' step cs (nodes gr) gr
iteration gr cs = xdfsFoldWith suc' (\(_, v, _, _) -> step gw $ context gr $ v) cs (nodes gr) gr
where
gw = graphWeight gr
--weightSum = ufold weightSum' 0 gr
--weightSum' (p, v, l, s) acc = acc + (sum )
-- TODO Remember to filter out empty Communities
-- | Step for one node. We try re-assign it to a neighbouring community, where
-- the increase of modularity for graph will be the largest
step :: CFunFold a (FEdge b) CGr
step (p, v, l, s) cgr = cgr
step :: GraphWeightSum -> CFunFold a (FEdge b) CGr
step gw (p, v, l, s) cgr = cgr
where
mNc :: Maybe (LNode Community)
mNc = nodeCommunity v cgr
ncs :: [LNode Community]
ncs = nodeNeighbours v cgr
-- We move node from community nc into ncs
moves :: Maybe (LNode Community, [LNode Community])
......@@ -156,6 +184,17 @@ step (p, v, l, s) cgr = cgr
makeMove :: Direction -> LNode Community -> LNode Community
makeMove direction (cn, c) = (cn, moveNodeWithNeighbours (p <> s) v direction c)
-- k_i variable in formula (2)
ki :: NodeWeightSum
ki = NodeWeightSum $ sum $ map (fedgeWeight . fst) $ p <> s
-- k_i,in variable in formula (2)
kiin :: Maybe NodeComWeightSum
kiin = case mNc of
Nothing -> Nothing
Just (_, com) -> Just $
NodeComWeightSum $ sum $ map (fedgeWeight . fst) $ filter (\(l, n) -> n `elem` (comNodes com)) s
--bestFit = maximumBy
-- TODO Compute \Delta Q (gain of moving node v into Community C) which consists of:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment