Commit cdba50dc authored by Alexandre Delanoë's avatar Alexandre Delanoë

[NEW] using more rec with FGL.

parent f9292434
......@@ -4,7 +4,7 @@ cabal-version: 1.12
--
-- see: https://github.com/sol/hpack
--
-- hash: f5efad8af0a555a91b2aa7b955eb00996a2391daa5222b63ae306ad40e22121c
-- hash: 92999c9be9048a97745703a9575dcb38cea579137bd2a893acf77c708290b84e
name: clustering-louvain
version: 0.1.0.0
......@@ -13,7 +13,7 @@ description: Please see README.md
category: Data
author: Alexandre Delanoë
maintainer: alexandre.delanoe+louvain@iscpif.fr
copyright: Copyright: (c) 2017-2018: see git logs and README
copyright: Copyright: (c) 2017-present: see git logs and README
license: BSD3
license-file: LICENSE
build-type: Simple
......@@ -29,8 +29,10 @@ library
, fgl
, foldl
, hxt
, protolude
, text
, turtle
, vector
exposed-modules:
Data.Graph.Clustering.Louvain
Data.Graph.Clustering.Louvain.Utils
......
......@@ -6,7 +6,7 @@ category: Data
author: Alexandre Delanoë
maintainer: alexandre.delanoe+louvain@iscpif.fr
copyright:
- ! 'Copyright: (c) 2017-2018: see git logs and README'
- ! 'Copyright: (c) 2017-present: see git logs and README'
license: BSD3
homepage:
ghc-options: -Wall
......@@ -14,6 +14,10 @@ dependencies:
- extra
- text
- containers
- vector
- protolude
#- union-find
#- mtl
library:
source-dirs: src
ghc-options:
......
......@@ -30,6 +30,7 @@ eU = [
,(4,5,1)
,(5,4,1)
-- ,(6,7,1)
]
eD :: [LEdge Double]
......@@ -55,6 +56,10 @@ gU = mkGraph' eU
-- 4:()->[(1,1),(1,3),(1,5)]
-- 5:()->[(1,4)]
cuiller :: Gr () Double
cuiller = gU
-- Visual representation:
--
-- 2
......
......@@ -9,84 +9,229 @@ Portability : POSIX
-}
{-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE NoImplicitPrelude #-}
module Data.Graph.Clustering.HLouvain
module Data.Graph.Clustering.HLouvain
where
import Data.Ord (Down(..))
import Data.List (sortOn, foldl')
import Data.Graph.Inductive
-- import Data.Graph.Clustering.Louvain
import Protolude
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Ord (Down(..), compare)
import Data.List (foldl', maximumBy)
import Data.Graph.Inductive (DynGraph, Node, Graph)
import qualified Data.Graph.Inductive as G
------------------------------------------------------------------------
-- | Main Tools
------------------------------------------------------------------------
-- | Glustering (instead of clustering) since we are working on the glue
-- between nodes to create "glusters" of nodes
glustering :: DynGraph gr
-- between nodes to create "glusters" of nodes: this API will enable the
-- choice of "glue" (as a function) for the glustering.
glustering :: (DynGraph gr, Eq b)
=> gr a b
-> [Module]
glustering g = foldl' (\a b -> modulare g b a) [] modules
where
modules :: [Module]
modules = map (\n -> Module n [] (modularity' g [n])) ns
-> [[Set Node]]
glustering g = map (map nodesD) $ glusteringD g
glusteringD :: (DynGraph gr, Eq b)
=> gr a b
-> [[Dendogram]]
glusteringD g = map (glusteringC g) (comp g)
glusteringC :: DynGraph gr
=> gr a b
-> [Node]
-> [Dendogram]
glusteringC g ns = foldl' (\ds n -> insert g (toDendo g n) ds) [] ns
------------------------------------------------------------------------
type Component = [Node]
ns :: [Node]
ns = sortOn (Down . (deg g)) $ nodes g
comp :: (DynGraph gr, Eq b) => gr a b -> [Component]
comp g' = map sortNodes cs
where
sortNodes ns = case head $ sortOn (Down . (G.deg g)) ns of
Nothing -> []
-- sort nodes by depth first to speed up the glustering
Just n -> G.dfs [n] g
cs = G.components g
g = G.undir g'
------------------------------------------------------------------------
-- | Definitions
------------------------------------------------------------------------
data Module = Module { node :: !Node
, modules :: ![Module]
, score :: !Double
}
deriving (Show)
-- | Divide en conquer method
modulare :: DynGraph gr
=> gr a b
-> Module
-> [Module]
-> [Module]
modulare _ m [] = [m]
modulare _ m [n] = sortOn (Down . score) [m,n]
modulare g m0@(Module n ms0 _) m1@(m@(Module n1 _ _):ms)
| score1 > score2 = [Module n1 (modulare g m0 ms0) score1] ++ ms
| otherwise = [m] ++ (modulare g m0 ms)
where
score1 = sum $ map (modularity g) [m0, m]
score2 = sum $ map (modularity g) ([m0]++ms)
-- | need to flatten the Tree of modules to get a list of nodes
flat :: Module -> [Node]
flat (Module n [] _) = [n]
flat (Module n ns _) = [n] ++ (concat $ map flat ns)
-- | TODO Sum or Union -> QuickCheck
modularity :: DynGraph gr => gr a b -> Module -> Double
modularity g m = modularity' g (flat m)
modularity' :: DynGraph gr => gr a b -> [Node] -> Double
modularity' gr ns = coverage - edgeDensity
where
coverage :: Double
coverage = sizeSubGraph / sizeAllGraph
where
sizeSubGraph :: Double
sizeSubGraph = fromIntegral ( size $ subgraph ns gr )
sizeAllGraph :: Double
sizeAllGraph = fromIntegral (size gr)
edgeDensity :: Double
edgeDensity = (sum (Prelude.map (\node -> (degree node) / links ) ns)) ** 2
where
degree :: Node -> Double
degree node = fromIntegral (deg gr node)
links :: Double
links = fromIntegral (2 * (size gr))
-- | A dendogram is a Tree whose leaf is one gram only and parents nodes
-- are glusters with score of glue to build it.
data Dendogram = Gram Node
| Dendogram { gluster :: ![Dendogram]
, score :: !Double
}
deriving (Show, Eq)
--------------------------------------------------------------------------
insert :: DynGraph gr
=> gr n e
-> Dendogram
-> [Dendogram]
-> [Dendogram]
insert g d ds = maximumBy (\a b -> compare (hasScore g a) (hasScore g b))
$ insertD g d ds
insertD :: DynGraph gr
=> gr n e
-> Dendogram
-> [Dendogram]
-> [[Dendogram]]
insertD _ d [] = [[d]]
insertD g d (x:xs) = [[inDendo g d x] ++ xs]
++ (map (\y -> [x] ++ y) $ insertD g d xs)
--------------------------------------------------------------------------
toDendo :: DynGraph gr
=> gr n e
-> Node
-> Dendogram
toDendo g n = Dendogram [Gram n] s
where
s = modularity g (Set.singleton n)
toDendoD :: DynGraph gr
=> gr n e
-> Dendogram
-> Dendogram
toDendoD g d@(Gram _) = Dendogram [d] (hasScore g d)
toDendoD _ d@(Dendogram _ _) = d
--------------------------------------------------------------------------
inDendo :: DynGraph gr
=> gr n e
-> Dendogram
-> Dendogram
-> Dendogram
inDendo g d1@(Gram _) d2@(Gram _) = Dendogram n' (hasScore g n')
where
ds = inDendo g d1 (toDendoD g d2)
ss = hasScore g ds
n' | hasScore g d1 + hasScore g d2 > ss = [d1,d2]
| otherwise = [ds]
inDendo g n@(Gram _) d@(Dendogram _ _) = inDendo g (toDendoD g n) d
inDendo g d@(Dendogram _ _) n@(Gram _) = inDendo g n d
inDendo g d1@(Dendogram _ _) d2@(Dendogram _ _) = Dendogram n' (hasScore g n')
where
ds = inDendo g d1 d2
ss = hasScore g ds
n' | hasScore g [d1, d2] > ss = [d1,d2]
| otherwise = [ds]
-------------------------------------------------------------------
nodesD :: Dendogram -> Set Node
nodesD (Gram n) = Set.singleton n
nodesD (Dendogram [] _) = Set.empty
nodesD (Dendogram s _) = Set.unions $ map nodesD s
deg :: Graph gr => gr a b -> Dendogram -> Int
deg g d = Set.size (neighbors g d)
neighbors :: Graph gr => gr a b -> Dendogram -> Set Node
neighbors g d = exclusion inNodes ouNodes
where
inNodes :: Set Node
inNodes = nodesD d
ouNodes :: Set Node
ouNodes = Set.unions $ map (neighbors' g) (Set.toList inNodes)
neighbors' :: Graph gr => gr a b -> Node -> Set Node
neighbors' g' n' = Set.fromList $ G.neighbors g' n'
class HasScore a
where
hasScore :: DynGraph gr => gr n e -> a -> Double
instance HasScore Dendogram
where
hasScore :: DynGraph gr => gr a b -> Dendogram -> Double
hasScore g m = modularity g (nodesD m)
instance HasScore [Dendogram]
where
hasScore g ds = modularity g (Set.unions $ map nodesD ds)
modulare :: DynGraph gr => gr a b -> Set Node -> Double
modulare = modularity
modularity :: DynGraph gr => gr a b -> Set Node -> Double
modularity gr ns = coverage - edgeDensity
where
coverage :: Double
coverage = sizeSubGraph / sizeAllGraph
where
sizeSubGraph :: Double
sizeSubGraph = fromIntegral ( G.size $ subgraph ns gr )
sizeAllGraph :: Double
sizeAllGraph = fromIntegral (G.size gr)
edgeDensity :: Double
edgeDensity = (sum (Set.map (\node -> (degree node) / links ) ns)) ** 2
where
degree :: Node -> Double
degree node = fromIntegral (G.deg gr node)
links :: Double
links = fromIntegral (2 * (G.size gr))
subgraph :: DynGraph gr => Set Node -> gr a b -> gr a b
subgraph ns = G.subgraph (Set.toList ns)
exclusion :: Ord a => Set a -> Set a -> Set a
exclusion a b = (Set.\\) b a
{-
Union Find algorithm could be used but Inductive Functional Graph
exactly have been created by Martin Erwig to avoid such style of
programming to enable clearer Graphs algorithms
import Control.Monad (mapM)
-- import Data.UnionFind.ST as ST
import Control.Monad.Trans.UnionFind as ST
-- import Control.Monad.ST as ST
import Control.Monad.State as ST
data UnionFindT p m a
runUnionFind :: Monad m => UnionFindT p m a -> m a
data Point a
fresh :: Monad m => p -> UnionFindT p m (Point p)
repr :: Monad m => Point p -> UnionFindT p m (Point p)
descriptor :: Monad m => Point p -> UnionFindT p m p
union :: Monad m => Point p -> Point p -> UnionFindT p m ()
equivalent :: Monad m => Point p -> Point p -> UnionFindT p m Bool
unionFind :: IO [Int]
unionFind = ST.runUnionFind $ do
ns <- mapM ST.fresh ([1..10] :: [Int])
ST.union (ns !! 1) (ns !! 2)
ST.union (ns !! 3) (ns !! 4)
ST.union (ns !! 5) (ns !! 6)
t1 <- ST.equivalent (ns !! 1) (ns !! 2)
ST.lift $ putStrLn $ show t1
t2 <- ST.equivalent (ns !! 4) (ns !! 5)
n <- mapM ST.repr ns
n' <- mapM ST.descriptor n
pure n'
-}
......@@ -7,7 +7,7 @@ Maintainer : alexandre.delanoe+louvain@iscpif.fr
Stability : experimental
Portability : POSIX
Tools to clustering Graphs with louvain clustering algorithm.
Tools to clustering Graphs with louvain clustering algorithm (not optimized version).
References:
* Blondel, Vincent D; Guillaume, Jean-Loup; Lambiotte, Renaud; Lefebvre, Etienne (9 October 2008). "Fast unfolding of communities in large networks". Journal of Statistical Mechanics: Theory and Experiment. 2008 (10): P10008. arXiv:0803.0476 Freely accessible. doi:10.1088/1742-5468/2008/10/P10008.
......@@ -64,7 +64,7 @@ dendogram :: (Eq b, DynGraph gr) => gr a b -> Int -> Reverse -> [[Node]]
dendogram gr n r = stepscom gr n (start gr r)
start :: DynGraph gr => gr a b -> Reverse -> [[Node]]
start gr r = order' $ Prelude.map (\x -> [] ++ [x]) ( nodes gr )
start gr r = order' $ Prelude.map (\x -> [x]) ( nodes gr )
where
order' = case r of
True -> reverse
......@@ -81,7 +81,8 @@ stepscom gr n ns = foldl' (\xs _ -> stepcom gr' (smallCom xs) xs) ns [1..n]
------------------------------------------------------------------------
stepcom :: DynGraph gr => gr a b -> [Node] -> [[Node]] -> [[Node]]
stepcom gr n ns = bestModularities gr $ [ns] ++ Prelude.map (\x -> x ++ neighout) (addcom n neighin)
stepcom gr n ns = bestModularity gr
$ [ns] ++ Prelude.map (\x -> x ++ neighout) (addcom n neighin)
where
-- | First remove the node (n) of the current partition (ns)
ns' = filter (/= n) ns
......@@ -101,33 +102,32 @@ neighcom :: DynGraph gr => gr a b -> [Node] -> [Node]
neighcom gr ns = ( nub . filter (not . (`elem` ns)) . concat ) ns'
where ns' = Prelude.map (neighbors gr) ns
rotate :: Int -> [a] -> [a]
rotate _ [] = []
rotate n xs = zipWith const (drop n (cycle xs)) xs
------------------------------------------------------------------------
data Module = Node | Module { modules :: [Module]
, score :: Double
}
------------------------------------------------------------------------
-- | Computing modularity of the partition
------------------------------------------------------------------------
modularities :: DynGraph gr => gr a b -> [[Node]] -> Double
modularities gr xs = sum $ Prelude.map (\y -> modularity gr y) xs
compareModularities :: DynGraph gr => gr a b -> [[Node]] -> [[Node]] -> Ordering
bestModularity :: DynGraph gr
=> gr a b
-> [[[Node]]]
-> [[Node]]
bestModularity gr ns = maximumBy (compareModularities gr) ns
compareModularities :: DynGraph gr
=> gr a b
-> [[Node]]
-> [[Node]]
-> Ordering
compareModularities gr xs ys
| modularities gr xs < modularities gr ys = LT
| modularities gr xs > modularities gr ys = GT
| otherwise = EQ
bestModularities :: DynGraph gr => gr a b -> [[[Node]]] -> [[Node]]
bestModularities gr ns = maximumBy (compareModularities gr) ns
modularities :: DynGraph gr => gr a b -> [[Node]] -> Double
modularities gr xs = sum $ Prelude.map (\y -> modularity gr y) xs
modularity :: DynGraph gr => gr a b -> [Node] -> Double
modularity gr ns = coverage - edgeDensity
......@@ -151,37 +151,34 @@ modularity gr ns = coverage - edgeDensity
links = fromIntegral (2 * (size gr))
----------------------------------------------------------
-- | Discover what NP complete means:
-- | Exampel to discover what NP complete means
----------------------------------------------------------
bestPartition' :: DynGraph gr => gr a b -> [[Node]]
bestPartition' gr = maximumBy (compareModularities gr) $ gpartition gr
gpartition :: DynGraph gr => gr a b -> [[[Node]]]
gpartition gr = separate (nodes gr)
takeDrop :: Int -> [a] -> [[a]]
takeDrop n xs = [ (take n xs), drop n xs]
-- | Naive description of the algo
-- separate' :: forall a. [a] -> [[[a]]]
-- separate' xs = [ takeDrop t (rotate r xs)
-- | t <- [1.. fromIntegral (length xs) - 1 ]
-- , r <- [0.. fromIntegral (length xs) ]
-- ]
-- Optimization
-- http://stackoverflow.com/questions/35388734/list-partitioning-implemented-recursively
separate :: [a] -> [[[a]]]
separate [] = [[]]
separate (x:xs) = let recur = separate xs
split = do
partition <- recur
return $ [x] : partition
pure $ [x] : partition
noSplit = do
(y:ys) <- recur
return $ (x:y):ys
pure $ (x:y):ys
in split ++ noSplit
-- separate' :: forall a. [a] -> [[[a]]]
-- separate' xs = [ takeDrop t (rotate r xs)
-- | t <- [1.. fromIntegral (length xs) - 1 ]
-- , r <- [0.. fromIntegral (length xs) ]
-- ]
gpartition :: DynGraph gr => gr a b -> [[[Node]]]
gpartition gr = separate (nodes gr)
bestPartition' :: DynGraph gr => gr a b -> [[Node]]
bestPartition' gr = maximumBy (compareModularities gr) $ gpartition gr
----------------------------------------------------------
......@@ -32,7 +32,6 @@ import qualified Data.Text.IO as T
import qualified Turtle as TU
cLouvain :: Text -> Map (Int, Int) Double -> IO [LouvainNode]
cLouvain _params ms = do
let inFileD = "/tmp/louvainData.txt"
......@@ -53,8 +52,8 @@ cLouvain _params ms = do
putStrLn "cmdHierarchy"
let cmdHierarchy = hierarchy <> " " <> outTree <> " -l 1 > " <> outRes
--pure cmdLouvain
-- _ <- inMVarIO $ shell (T.pack cmdLouvain)
-- _ <- inMVarIO $ shell (T.pack cmdHierarchy)
_ <- inMVarIO $ shell (T.pack cmdLouvain)
_ <- inMVarIO $ shell (T.pack cmdHierarchy)
putStrLn "myResult start"
myResult <- inMVarIO $ readOutput outRes
......
......@@ -21,7 +21,6 @@ data LouvainNode = LouvainNode { l_node_id :: Int
, l_community_id :: Int
} deriving (Show)
label' :: (Graph gr) => gr a b -> Edge -> Maybe b
label' gr (u,v) = lookup v (lsuc gr u)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment