Commit 0115f3de authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[temp] tried out to fix algorithm, according to the python code

parent 6d939c69
...@@ -12,3 +12,10 @@ git clone ssh://git@gitlab.iscpif.fr:20022/gargantext/clustering-louvain-cpluspl ...@@ -12,3 +12,10 @@ git clone ssh://git@gitlab.iscpif.fr:20022/gargantext/clustering-louvain-cpluspl
cd clustering-louvain-cplusplus cd clustering-louvain-cplusplus
./install ./install
``` ```
## Running
``` bash
stack --nix build --profile
stack exec --profile -- run-example +RTS -xc
```
...@@ -4,7 +4,7 @@ cabal-version: 1.12 ...@@ -4,7 +4,7 @@ cabal-version: 1.12
-- --
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
-- --
-- hash: d5768235cb674d3617c860c0be6df203e51373bd36995a8e7df1f612778cdeee -- hash: e7eca8a6c90593bd7b67a5ba8bc4ece3253f1ff4f46353aa6aa75dbc4b6ee829
name: clustering-louvain name: clustering-louvain
version: 0.1.0.0 version: 0.1.0.0
......
...@@ -19,7 +19,7 @@ import Data.Graph.Clustering.FLouvain ...@@ -19,7 +19,7 @@ import Data.Graph.Clustering.FLouvain
-- Example call: -- Example call:
-- putStrLn $ prettify $ iterateOnce cuiller -- putStrLn $ prettify $ iterateOnce cuiller
-- Prelude.map (fst3 . unCommunity . snd) $ labNodes $ iterateOnce karate -- Prelude.map (fst3 . unCommunity . snd) $ labNodes $ iterateOnce karate
iterateOnce :: Show a => Gr a Double -> CGr a iterateOnce :: (Show a, Eq a) => Gr a Double -> CGr a
iterateOnce gr = iteration fgr cgr iterateOnce gr = iteration fgr cgr
where where
fgr = toFGraph gr fgr = toFGraph gr
...@@ -38,10 +38,10 @@ runFLouvain cycles n fgr = do ...@@ -38,10 +38,10 @@ runFLouvain cycles n fgr = do
putStrLn (show fgrNext :: Text) putStrLn (show fgrNext :: Text)
runFLouvain (cycles - 1) n fgrNext runFLouvain (cycles - 1) n fgrNext
runIterations :: Show a => Int -> Gr a Double -> IO (CGr a) runIterations :: (Show a, Eq a) => Int -> Gr a Double -> IO (CGr a)
runIterations n gr = runFIterations n $ toFGraph gr runIterations n gr = runFIterations n $ toFGraph gr
runFIterations :: Show a => Int -> FGraph a () -> IO (CGr a) runFIterations :: (Show a, Eq a) => Int -> FGraph a () -> IO (CGr a)
runFIterations n fgr = do runFIterations n fgr = do
let fgrWeight = graphWeight fgr let fgrWeight = graphWeight fgr
let initCgr = initialCGr fgr let initCgr = initialCGr fgr
...@@ -70,7 +70,7 @@ runFIterations n fgr = do ...@@ -70,7 +70,7 @@ runFIterations n fgr = do
putStrLn $ T.unpack $ show $ modularity fgr iterNextCgr fgrWeight putStrLn $ T.unpack $ show $ modularity fgr iterNextCgr fgrWeight
return iterNextCgr return iterNextCgr
runLouvainFirstStepIterate :: Show a => Int -> Gr a Double -> (Modularity, CGr a) runLouvainFirstStepIterate :: (Show a, Eq a) => Int -> Gr a Double -> (Modularity, CGr a)
runLouvainFirstStepIterate n gr = (modularity fgr cgr m, cgr) runLouvainFirstStepIterate n gr = (modularity fgr cgr m, cgr)
where where
fgr = toFGraph gr fgr = toFGraph gr
......
...@@ -32,6 +32,8 @@ doi:10.1088/1742-5468/2008/10/P10008. ...@@ -32,6 +32,8 @@ doi:10.1088/1742-5468/2008/10/P10008.
-} -}
-- TODO Try modularity instead of delta in the step function
module Data.Graph.Clustering.FLouvain module Data.Graph.Clustering.FLouvain
where where
...@@ -46,12 +48,13 @@ import Data.Graph.Clustering.Louvain.Types ...@@ -46,12 +48,13 @@ import Data.Graph.Clustering.Louvain.Types
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Main Louvain first step iteration function -- | Main Louvain first step iteration function
louvainFirstStepIterate :: Int -> FGraph a b -> CGr a louvainFirstStepIterate :: (Show a, Eq a) => Int -> FGraph a b -> CGr a
louvainFirstStepIterate n gr = fixPt n iterator cond initCGr louvainFirstStepIterate n gr = fixPt n iterator cond initCGr
where where
initCGr = initialCGr gr initCGr = initialCGr gr
grWeight = graphWeight gr grWeight = graphWeight gr
iterator cgr = iteration gr cgr iterator cgr = iteration gr cgr
-- modularity in [-1, 1]
cond cgr = (unModularity $ modularity gr cgr grWeight) < 0.1 cond cgr = (unModularity $ modularity gr cgr grWeight) < 0.1
-- | Second step from the Louvain paper -- given a clustering, create new graph -- | Second step from the Louvain paper -- given a clustering, create new graph
...@@ -60,10 +63,11 @@ louvainSecondStep :: forall a b c. Eq c => FGraph a b -> CGr c -> FGraph (Commun ...@@ -60,10 +63,11 @@ louvainSecondStep :: forall a b c. Eq c => FGraph a b -> CGr c -> FGraph (Commun
louvainSecondStep gr cgr = mkFGraph nodes edges louvainSecondStep gr cgr = mkFGraph nodes edges
where where
nodes :: [LNode (Community c)] nodes :: [LNode (Community c)]
--nodes = filter (\(_, com) -> length (comNodes com) > 0) $ labNodes cgr nodes = filter (\(_, com) -> length (comNodes com) > 0) $ labNodes cgr
nodes = labNodes cgr --nodes = labNodes cgr
edges :: [(Node, Node, Double)] edges :: [(Node, Node, Double)]
edges = concatMap comEdges nodes --edges = concatMap comEdges nodes
edges = filter (\(_, _, w) -> w > 0.0) $ concatMap comEdges nodes
comEdges :: LNode (Community c) -> [(Node, Node, Double)] comEdges :: LNode (Community c) -> [(Node, Node, Double)]
comEdges lnCom = mapMaybe (comToComEdge lnCom) nodes comEdges lnCom = mapMaybe (comToComEdge lnCom) nodes
comToComEdge :: LNode (Community c) -> LNode (Community c) -> Maybe (Node, Node, Double) comToComEdge :: LNode (Community c) -> LNode (Community c) -> Maybe (Node, Node, Double)
...@@ -121,26 +125,34 @@ modularity gr cgr m = Modularity $ coeff * ( ufold modularity' 0.0 cgr ) ...@@ -121,26 +125,34 @@ modularity gr cgr m = Modularity $ coeff * ( ufold modularity' 0.0 cgr )
modularity' (_, _, com, _) acc = acc + component modularity' (_, _, com, _) acc = acc + component
where where
component = (unInWeightSum $ comInWeightSum com) - weightsMul component = (unInWeightSum $ comInWeightSum com) - weightsMul
weightsMul = coeff * ( sum $ map weightsMul' $ comNodes com ) weightsMul = coeff * (unTotWeightSum $ comTotWeightSum com) * (unTotWeightSum $ comTotWeightSum com)
weightsMul' n = (ki n) * (sum $ map ki $ comNodes com) --weightsMul = coeff * ( sum $ map weightsMul' $ comNodes com )
--weightsMul' n = (ki n) * (sum $ map ki $ comNodes com)
-- k_i variable in formula (1) -- k_i variable in formula (1)
ki :: Node -> Double -- ki :: Node -> Double
ki n = unNodeWeightSum $ nodeWeightSum $ context gr n -- ki n = unNodeWeightSum $ nodeWeightSum $ context gr n
type Delta c = Community c -> NodeWeightSum -> NodeComWeightSum -> GraphWeightSum -> DeltaQ type Delta c = Community c -> NodeWeightSum -> NodeComWeightSum -> GraphWeightSum -> DeltaQ
-- | Delta Q function from Louvain paper (2). -- | Delta Q function from Louvain paper (2).
delta :: Delta c delta :: Delta c
delta com ki kiin m = DeltaQ $ acc - dec -- delta com ki kiin m = DeltaQ $ acc - dec
-- where
-- inWeightSum = comInWeightSum com
-- totWeightSum = comTotWeightSum com
-- acc = accL - accR * accR
-- accL = 0.5 * (unInWeightSum inWeightSum + 2.0 * (unNodeComWeightSum kiin)) / (unGraphWeightSum m)
-- accR = 0.5 * (unTotWeightSum totWeightSum + unNodeWeightSum ki) / (unGraphWeightSum m)
-- dec = decL - decM * decM - decR * decR
-- decL = 0.5 * (unInWeightSum inWeightSum) / (unGraphWeightSum m)
-- decM = 0.5 * (unTotWeightSum totWeightSum) / (unGraphWeightSum m)
-- decR = 0.5 * (unNodeWeightSum ki) / (unGraphWeightSum m)
delta com ki kiin m = DeltaQ $ 2.0 * kiin' - totWeightSum * ki' / m'
where where
inWeightSum = comInWeightSum com totWeightSum = unTotWeightSum $ comTotWeightSum com
totWeightSum = comTotWeightSum com kiin' = unNodeComWeightSum kiin
acc = accL - accR * accR ki' = unNodeWeightSum ki
accL = 0.5 * (unInWeightSum inWeightSum + 2.0 * (unNodeComWeightSum kiin)) / (unGraphWeightSum m) m' = unGraphWeightSum m
accR = 0.5 * (unTotWeightSum totWeightSum + unNodeWeightSum ki) / (unGraphWeightSum m)
dec = decL - decM * decM - decR * decR
decL = 0.5 * (unInWeightSum inWeightSum) / (unGraphWeightSum m)
decM = 0.5 * (unTotWeightSum totWeightSum) / (unGraphWeightSum m)
decR = 0.5 * (unNodeWeightSum ki) / (unGraphWeightSum m)
-- | One iteration step takes the graph and existing communities as a graph and -- | One iteration step takes the graph and existing communities as a graph and
-- computes new community graph -- computes new community graph
...@@ -151,7 +163,7 @@ delta com ki kiin m = DeltaQ $ acc - dec ...@@ -151,7 +163,7 @@ delta com ki kiin m = DeltaQ $ acc - dec
-- 'step . context gr . node'' -- 'step . context gr . node''
-- We could avoid the higher complexity, eg. by precomputing the whole graph -- We could avoid the higher complexity, eg. by precomputing the whole graph
-- into a HashMap Node [Edge]. -- into a HashMap Node [Edge].
iteration :: FGraph a b -> CGr c -> CGr c iteration :: (Show c, Eq c) => FGraph a b -> CGr c -> CGr c
iteration gr cs = xdfsFoldWith suc' (\(_, v, _, _) iteration gr cs = xdfsFoldWith suc' (\(_, v, _, _)
-> step gw $ context gr $ v) cs (nodes gr) gr -> step gw $ context gr $ v) cs (nodes gr) gr
where where
...@@ -162,21 +174,32 @@ iteration gr cs = xdfsFoldWith suc' (\(_, v, _, _) ...@@ -162,21 +174,32 @@ iteration gr cs = xdfsFoldWith suc' (\(_, v, _, _)
-- TODO Remember to filter out empty Communities -- TODO Remember to filter out empty Communities
-- | Step for one node. We try re-assign it to a neighbouring community, where -- | Step for one node. We try re-assign it to a neighbouring community, where
-- the increase of modularity for graph will be the largest -- the increase of modularity for graph will be the largest
step :: forall a b c. GraphWeightSum -> CFunFold a (FEdge b) (CGr c) step :: forall a b c. (Show c, Eq c) => GraphWeightSum -> CFunFold a (FEdge b) (CGr c)
step gw ctx@(_, v, _, _) cgr = newCgr step gw ctx@(_, v, _, _) cgr = trace ("step v: " <> show v :: Text) $ newCgr
where where
stepStr = "[step, node: " <> show v <> "]"
newCgr = case mNc of newCgr = case mNc of
Nothing -> cgr Nothing -> cgr
Just nc -> Just nc ->
if bestFitdq > 0.0 then case ncs of
[] -> cgr
_ ->
let (DeltaQ deltaCurrent) = delta' nc
in
if bestFitdq > 0.0 && bestFitdq > deltaCurrent then
let newBestFitCom = makeMove Into bestFitCom let newBestFitCom = makeMove Into bestFitCom
newNc = makeMove OutOf nc newNc = makeMove OutOf nc
cgrWithNewNc = replaceLNode cgr newNc
replaced = replaceLNode cgrWithNewNc newBestFitCom
in in
replaceLNode (replaceLNode cgr newNc) newBestFitCom trace (stepStr <> " replaced: " <> show replaced <> "\n" <>
stepStr <> " deltaCurrent: " <> show deltaCurrent :: Text) $ replaced
else else
cgr cgr
(bestFitCom, DeltaQ bestFitdq) = (bestFitCom, DeltaQ bestFitdq) =
trace (stepStr <> " bestFit, deltas: " <> show (map (\((n, _), d) -> "Com: " <> show n <> ", delta: " <> show d :: Text) deltas) :: Text) $
maximumBy (\(_, deltaq1) (_, deltaq2) -> compare deltaq1 deltaq2) deltas maximumBy (\(_, deltaq1) (_, deltaq2) -> compare deltaq1 deltaq2) deltas
mNc :: Maybe (LNode (Community c)) mNc :: Maybe (LNode (Community c))
...@@ -202,12 +225,18 @@ step gw ctx@(_, v, _, _) cgr = newCgr ...@@ -202,12 +225,18 @@ step gw ctx@(_, v, _, _) cgr = newCgr
deltas = map (\c -> (c, delta' c)) ncs deltas = map (\c -> (c, delta' c)) ncs
delta' :: LNode (Community c) -> DeltaQ delta' :: LNode (Community c) -> DeltaQ
delta' com = delta (llab com) ki kiin gw delta' com = trace (stepStr <> " com: " <> show com <>
", ki: " <> show ki <>
", kiin: " <> show kiin <>
", gw: " <> show gw <>
", delta: " <> show d) d
where where
-- k_i,in variable in formula (2) -- k_i,in variable in formula (2)
kiin :: NodeComWeightSum kiin :: NodeComWeightSum
kiin = nodeComWeightSum (llab com) ctx kiin = nodeComWeightSum (llab com) ctx
d = delta (llab com) ki kiin gw
-- TODO Compute \Delta Q (gain of moving node v into Community C) which consists of: -- TODO Compute \Delta Q (gain of moving node v into Community C) which consists of:
-- - Community InWeightSum -- - Community InWeightSum
-- - Community TotWeightSum -- - Community TotWeightSum
...@@ -231,11 +260,11 @@ nodeCommunity n cgr = head (filter f $ labNodes cgr) ...@@ -231,11 +260,11 @@ nodeCommunity n cgr = head (filter f $ labNodes cgr)
f (_, com) = n `elem` comNodes com f (_, com) = n `elem` comNodes com
-- | Find 'LNode's of 'Community' graph neighbouring a given node -- | Find 'LNode's of 'Community' graph neighbouring a given node
nodeNeighbours :: Node -> CGr c -> [LNode (Community c)] nodeNeighbours :: Eq c => Node -> CGr c -> [LNode (Community c)]
nodeNeighbours n cgr = nodeNeighbours n cgr =
case nodeCommunity n cgr of case nodeCommunity n cgr of
Nothing -> [] Nothing -> []
Just (cn, _) -> mapMaybe (lnode cgr) (neighbors cgr cn) Just (cn, _) -> DL.nub $ mapMaybe (lnode cgr) (neighbors cgr cn)
-- | Find 'Ajd CGrEdge's of 'Community' graph neighbouring a given node -- | Find 'Ajd CGrEdge's of 'Community' graph neighbouring a given node
-- nodeLNeighbours :: Node -> CGr -> Adj CGrEdge -- nodeLNeighbours :: Node -> CGr -> Adj CGrEdge
...@@ -262,15 +291,20 @@ moveNodeWithContext ctx@(_, n, _, _) direction com@(Community (ns, inwsum, totws ...@@ -262,15 +291,20 @@ moveNodeWithContext ctx@(_, n, _, _) direction com@(Community (ns, inwsum, totws
(newInWsum, newTotWsum) = (newInWsum, newTotWsum) =
case direction of case direction of
Into -> (unInWeightSum inwsum + unNodeComWeightSum cws, unTotWeightSum totwsum - unNodeComWeightSum cws + unNodeNonComWeightSum nws) --Into -> (unInWeightSum inwsum + unNodeComWeightSum cws, unTotWeightSum totwsum - unNodeComWeightSum cws + unNodeNonComWeightSum nws)
OutOf -> (unInWeightSum inwsum - unNodeComWeightSum cws, unTotWeightSum totwsum + unNodeComWeightSum cws - unNodeNonComWeightSum nws) --OutOf -> (unInWeightSum inwsum - unNodeComWeightSum cws, unTotWeightSum totwsum + unNodeComWeightSum cws - unNodeNonComWeightSum nws)
Into -> (unInWeightSum inwsum + unNodeComWeightSum cws, unTotWeightSum totwsum + unNodeWeightSum nws)
OutOf -> (unInWeightSum inwsum - unNodeComWeightSum cws, unTotWeightSum totwsum - unNodeWeightSum nws)
nws :: NodeWeightSum
nws = nodeWeightSum ctx
-- sum of edge weights inside community -- sum of edge weights inside community
cws :: NodeComWeightSum cws :: NodeComWeightSum
cws = nodeComWeightSum com ctx cws = nodeComWeightSum com ctx
nws :: NodeNonComWeightSum -- nws :: NodeNonComWeightSum
nws = nodeNonComWeightSum com ctx -- nws = nodeNonComWeightSum com ctx
{- {-
......
...@@ -20,6 +20,11 @@ ...@@ -20,6 +20,11 @@
# resolver: https://example.com/snapshots/2018-01-01.yaml # resolver: https://example.com/snapshots/2018-01-01.yaml
resolver: lts-14.27 resolver: lts-14.27
nix:
enable: true
add-gc-roots: true
shell-file: build-shell.nix
# User packages to be built. # User packages to be built.
# Various formats can be used as shown in the example below. # Various formats can be used as shown in the example below.
# #
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment