Commit 0115f3de authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[temp] tried out to fix algorithm, according to the python code

parent 6d939c69
......@@ -12,3 +12,10 @@ git clone ssh://git@gitlab.iscpif.fr:20022/gargantext/clustering-louvain-cpluspl
cd clustering-louvain-cplusplus
./install
```
## Running
``` bash
stack --nix build --profile
stack exec --profile -- run-example +RTS -xc
```
......@@ -4,7 +4,7 @@ cabal-version: 1.12
--
-- see: https://github.com/sol/hpack
--
-- hash: d5768235cb674d3617c860c0be6df203e51373bd36995a8e7df1f612778cdeee
-- hash: e7eca8a6c90593bd7b67a5ba8bc4ece3253f1ff4f46353aa6aa75dbc4b6ee829
name: clustering-louvain
version: 0.1.0.0
......
......@@ -19,7 +19,7 @@ import Data.Graph.Clustering.FLouvain
-- Example call:
-- putStrLn $ prettify $ iterateOnce cuiller
-- Prelude.map (fst3 . unCommunity . snd) $ labNodes $ iterateOnce karate
iterateOnce :: Show a => Gr a Double -> CGr a
iterateOnce :: (Show a, Eq a) => Gr a Double -> CGr a
iterateOnce gr = iteration fgr cgr
where
fgr = toFGraph gr
......@@ -38,10 +38,10 @@ runFLouvain cycles n fgr = do
putStrLn (show fgrNext :: Text)
runFLouvain (cycles - 1) n fgrNext
runIterations :: Show a => Int -> Gr a Double -> IO (CGr a)
runIterations :: (Show a, Eq a) => Int -> Gr a Double -> IO (CGr a)
runIterations n gr = runFIterations n $ toFGraph gr
runFIterations :: Show a => Int -> FGraph a () -> IO (CGr a)
runFIterations :: (Show a, Eq a) => Int -> FGraph a () -> IO (CGr a)
runFIterations n fgr = do
let fgrWeight = graphWeight fgr
let initCgr = initialCGr fgr
......@@ -70,7 +70,7 @@ runFIterations n fgr = do
putStrLn $ T.unpack $ show $ modularity fgr iterNextCgr fgrWeight
return iterNextCgr
runLouvainFirstStepIterate :: Show a => Int -> Gr a Double -> (Modularity, CGr a)
runLouvainFirstStepIterate :: (Show a, Eq a) => Int -> Gr a Double -> (Modularity, CGr a)
runLouvainFirstStepIterate n gr = (modularity fgr cgr m, cgr)
where
fgr = toFGraph gr
......
......@@ -32,6 +32,8 @@ doi:10.1088/1742-5468/2008/10/P10008.
-}
-- TODO Try modularity instead of delta in the step function
module Data.Graph.Clustering.FLouvain
where
......@@ -46,12 +48,13 @@ import Data.Graph.Clustering.Louvain.Types
------------------------------------------------------------------------
-- | Main Louvain first step iteration function
louvainFirstStepIterate :: Int -> FGraph a b -> CGr a
louvainFirstStepIterate :: (Show a, Eq a) => Int -> FGraph a b -> CGr a
louvainFirstStepIterate n gr = fixPt n iterator cond initCGr
where
initCGr = initialCGr gr
grWeight = graphWeight gr
iterator cgr = iteration gr cgr
-- modularity in [-1, 1]
cond cgr = (unModularity $ modularity gr cgr grWeight) < 0.1
-- | Second step from the Louvain paper -- given a clustering, create new graph
......@@ -60,10 +63,11 @@ louvainSecondStep :: forall a b c. Eq c => FGraph a b -> CGr c -> FGraph (Commun
louvainSecondStep gr cgr = mkFGraph nodes edges
where
nodes :: [LNode (Community c)]
--nodes = filter (\(_, com) -> length (comNodes com) > 0) $ labNodes cgr
nodes = labNodes cgr
nodes = filter (\(_, com) -> length (comNodes com) > 0) $ labNodes cgr
--nodes = labNodes cgr
edges :: [(Node, Node, Double)]
edges = concatMap comEdges nodes
--edges = concatMap comEdges nodes
edges = filter (\(_, _, w) -> w > 0.0) $ concatMap comEdges nodes
comEdges :: LNode (Community c) -> [(Node, Node, Double)]
comEdges lnCom = mapMaybe (comToComEdge lnCom) nodes
comToComEdge :: LNode (Community c) -> LNode (Community c) -> Maybe (Node, Node, Double)
......@@ -121,26 +125,34 @@ modularity gr cgr m = Modularity $ coeff * ( ufold modularity' 0.0 cgr )
modularity' (_, _, com, _) acc = acc + component
where
component = (unInWeightSum $ comInWeightSum com) - weightsMul
weightsMul = coeff * ( sum $ map weightsMul' $ comNodes com )
weightsMul' n = (ki n) * (sum $ map ki $ comNodes com)
weightsMul = coeff * (unTotWeightSum $ comTotWeightSum com) * (unTotWeightSum $ comTotWeightSum com)
--weightsMul = coeff * ( sum $ map weightsMul' $ comNodes com )
--weightsMul' n = (ki n) * (sum $ map ki $ comNodes com)
-- k_i variable in formula (1)
ki :: Node -> Double
ki n = unNodeWeightSum $ nodeWeightSum $ context gr n
-- ki :: Node -> Double
-- ki n = unNodeWeightSum $ nodeWeightSum $ context gr n
type Delta c = Community c -> NodeWeightSum -> NodeComWeightSum -> GraphWeightSum -> DeltaQ
-- | Delta Q function from Louvain paper (2).
delta :: Delta c
delta com ki kiin m = DeltaQ $ acc - dec
-- delta com ki kiin m = DeltaQ $ acc - dec
-- where
-- inWeightSum = comInWeightSum com
-- totWeightSum = comTotWeightSum com
-- acc = accL - accR * accR
-- accL = 0.5 * (unInWeightSum inWeightSum + 2.0 * (unNodeComWeightSum kiin)) / (unGraphWeightSum m)
-- accR = 0.5 * (unTotWeightSum totWeightSum + unNodeWeightSum ki) / (unGraphWeightSum m)
-- dec = decL - decM * decM - decR * decR
-- decL = 0.5 * (unInWeightSum inWeightSum) / (unGraphWeightSum m)
-- decM = 0.5 * (unTotWeightSum totWeightSum) / (unGraphWeightSum m)
-- decR = 0.5 * (unNodeWeightSum ki) / (unGraphWeightSum m)
delta com ki kiin m = DeltaQ $ 2.0 * kiin' - totWeightSum * ki' / m'
where
inWeightSum = comInWeightSum com
totWeightSum = comTotWeightSum com
acc = accL - accR * accR
accL = 0.5 * (unInWeightSum inWeightSum + 2.0 * (unNodeComWeightSum kiin)) / (unGraphWeightSum m)
accR = 0.5 * (unTotWeightSum totWeightSum + unNodeWeightSum ki) / (unGraphWeightSum m)
dec = decL - decM * decM - decR * decR
decL = 0.5 * (unInWeightSum inWeightSum) / (unGraphWeightSum m)
decM = 0.5 * (unTotWeightSum totWeightSum) / (unGraphWeightSum m)
decR = 0.5 * (unNodeWeightSum ki) / (unGraphWeightSum m)
totWeightSum = unTotWeightSum $ comTotWeightSum com
kiin' = unNodeComWeightSum kiin
ki' = unNodeWeightSum ki
m' = unGraphWeightSum m
-- | One iteration step takes the graph and existing communities as a graph and
-- computes new community graph
......@@ -151,7 +163,7 @@ delta com ki kiin m = DeltaQ $ acc - dec
-- 'step . context gr . node''
-- We could avoid the higher complexity, eg. by precomputing the whole graph
-- into a HashMap Node [Edge].
iteration :: FGraph a b -> CGr c -> CGr c
iteration :: (Show c, Eq c) => FGraph a b -> CGr c -> CGr c
iteration gr cs = xdfsFoldWith suc' (\(_, v, _, _)
-> step gw $ context gr $ v) cs (nodes gr) gr
where
......@@ -162,25 +174,36 @@ iteration gr cs = xdfsFoldWith suc' (\(_, v, _, _)
-- TODO Remember to filter out empty Communities
-- | Step for one node. We try re-assign it to a neighbouring community, where
-- the increase of modularity for graph will be the largest
step :: forall a b c. GraphWeightSum -> CFunFold a (FEdge b) (CGr c)
step gw ctx@(_, v, _, _) cgr = newCgr
step :: forall a b c. (Show c, Eq c) => GraphWeightSum -> CFunFold a (FEdge b) (CGr c)
step gw ctx@(_, v, _, _) cgr = trace ("step v: " <> show v :: Text) $ newCgr
where
stepStr = "[step, node: " <> show v <> "]"
newCgr = case mNc of
Nothing -> cgr
Just nc ->
if bestFitdq > 0.0 then
let newBestFitCom = makeMove Into bestFitCom
newNc = makeMove OutOf nc
in
replaceLNode (replaceLNode cgr newNc) newBestFitCom
else
cgr
case ncs of
[] -> cgr
_ ->
let (DeltaQ deltaCurrent) = delta' nc
in
if bestFitdq > 0.0 && bestFitdq > deltaCurrent then
let newBestFitCom = makeMove Into bestFitCom
newNc = makeMove OutOf nc
cgrWithNewNc = replaceLNode cgr newNc
replaced = replaceLNode cgrWithNewNc newBestFitCom
in
trace (stepStr <> " replaced: " <> show replaced <> "\n" <>
stepStr <> " deltaCurrent: " <> show deltaCurrent :: Text) $ replaced
else
cgr
(bestFitCom, DeltaQ bestFitdq) =
trace (stepStr <> " bestFit, deltas: " <> show (map (\((n, _), d) -> "Com: " <> show n <> ", delta: " <> show d :: Text) deltas) :: Text) $
maximumBy (\(_, deltaq1) (_, deltaq2) -> compare deltaq1 deltaq2) deltas
mNc :: Maybe (LNode (Community c))
mNc = nodeCommunity v cgr
mNc = nodeCommunity v cgr
ncs :: [LNode (Community c)]
ncs = nodeNeighbours v cgr
......@@ -202,12 +225,18 @@ step gw ctx@(_, v, _, _) cgr = newCgr
deltas = map (\c -> (c, delta' c)) ncs
delta' :: LNode (Community c) -> DeltaQ
delta' com = delta (llab com) ki kiin gw
delta' com = trace (stepStr <> " com: " <> show com <>
", ki: " <> show ki <>
", kiin: " <> show kiin <>
", gw: " <> show gw <>
", delta: " <> show d) d
where
-- k_i,in variable in formula (2)
kiin :: NodeComWeightSum
kiin = nodeComWeightSum (llab com) ctx
d = delta (llab com) ki kiin gw
-- TODO Compute \Delta Q (gain of moving node v into Community C) which consists of:
-- - Community InWeightSum
-- - Community TotWeightSum
......@@ -231,11 +260,11 @@ nodeCommunity n cgr = head (filter f $ labNodes cgr)
f (_, com) = n `elem` comNodes com
-- | Find 'LNode's of 'Community' graph neighbouring a given node
nodeNeighbours :: Node -> CGr c -> [LNode (Community c)]
nodeNeighbours :: Eq c => Node -> CGr c -> [LNode (Community c)]
nodeNeighbours n cgr =
case nodeCommunity n cgr of
Nothing -> []
Just (cn, _) -> mapMaybe (lnode cgr) (neighbors cgr cn)
Just (cn, _) -> DL.nub $ mapMaybe (lnode cgr) (neighbors cgr cn)
-- | Find 'Ajd CGrEdge's of 'Community' graph neighbouring a given node
-- nodeLNeighbours :: Node -> CGr -> Adj CGrEdge
......@@ -262,15 +291,20 @@ moveNodeWithContext ctx@(_, n, _, _) direction com@(Community (ns, inwsum, totws
(newInWsum, newTotWsum) =
case direction of
Into -> (unInWeightSum inwsum + unNodeComWeightSum cws, unTotWeightSum totwsum - unNodeComWeightSum cws + unNodeNonComWeightSum nws)
OutOf -> (unInWeightSum inwsum - unNodeComWeightSum cws, unTotWeightSum totwsum + unNodeComWeightSum cws - unNodeNonComWeightSum nws)
--Into -> (unInWeightSum inwsum + unNodeComWeightSum cws, unTotWeightSum totwsum - unNodeComWeightSum cws + unNodeNonComWeightSum nws)
--OutOf -> (unInWeightSum inwsum - unNodeComWeightSum cws, unTotWeightSum totwsum + unNodeComWeightSum cws - unNodeNonComWeightSum nws)
Into -> (unInWeightSum inwsum + unNodeComWeightSum cws, unTotWeightSum totwsum + unNodeWeightSum nws)
OutOf -> (unInWeightSum inwsum - unNodeComWeightSum cws, unTotWeightSum totwsum - unNodeWeightSum nws)
nws :: NodeWeightSum
nws = nodeWeightSum ctx
-- sum of edge weights inside community
cws :: NodeComWeightSum
cws = nodeComWeightSum com ctx
nws :: NodeNonComWeightSum
nws = nodeNonComWeightSum com ctx
-- nws :: NodeNonComWeightSum
-- nws = nodeNonComWeightSum com ctx
{-
......
......@@ -20,6 +20,11 @@
# resolver: https://example.com/snapshots/2018-01-01.yaml
resolver: lts-14.27
nix:
enable: true
add-gc-roots: true
shell-file: build-shell.nix
# User packages to be built.
# Various formats can be used as shown in the example below.
#
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment