Commit cdba50dc authored by Alexandre Delanoë's avatar Alexandre Delanoë

[NEW] using more rec with FGL.

parent f9292434
...@@ -4,7 +4,7 @@ cabal-version: 1.12 ...@@ -4,7 +4,7 @@ cabal-version: 1.12
-- --
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
-- --
-- hash: f5efad8af0a555a91b2aa7b955eb00996a2391daa5222b63ae306ad40e22121c -- hash: 92999c9be9048a97745703a9575dcb38cea579137bd2a893acf77c708290b84e
name: clustering-louvain name: clustering-louvain
version: 0.1.0.0 version: 0.1.0.0
...@@ -13,7 +13,7 @@ description: Please see README.md ...@@ -13,7 +13,7 @@ description: Please see README.md
category: Data category: Data
author: Alexandre Delanoë author: Alexandre Delanoë
maintainer: alexandre.delanoe+louvain@iscpif.fr maintainer: alexandre.delanoe+louvain@iscpif.fr
copyright: Copyright: (c) 2017-2018: see git logs and README copyright: Copyright: (c) 2017-present: see git logs and README
license: BSD3 license: BSD3
license-file: LICENSE license-file: LICENSE
build-type: Simple build-type: Simple
...@@ -29,8 +29,10 @@ library ...@@ -29,8 +29,10 @@ library
, fgl , fgl
, foldl , foldl
, hxt , hxt
, protolude
, text , text
, turtle , turtle
, vector
exposed-modules: exposed-modules:
Data.Graph.Clustering.Louvain Data.Graph.Clustering.Louvain
Data.Graph.Clustering.Louvain.Utils Data.Graph.Clustering.Louvain.Utils
......
...@@ -6,7 +6,7 @@ category: Data ...@@ -6,7 +6,7 @@ category: Data
author: Alexandre Delanoë author: Alexandre Delanoë
maintainer: alexandre.delanoe+louvain@iscpif.fr maintainer: alexandre.delanoe+louvain@iscpif.fr
copyright: copyright:
- ! 'Copyright: (c) 2017-2018: see git logs and README' - ! 'Copyright: (c) 2017-present: see git logs and README'
license: BSD3 license: BSD3
homepage: homepage:
ghc-options: -Wall ghc-options: -Wall
...@@ -14,6 +14,10 @@ dependencies: ...@@ -14,6 +14,10 @@ dependencies:
- extra - extra
- text - text
- containers - containers
- vector
- protolude
#- union-find
#- mtl
library: library:
source-dirs: src source-dirs: src
ghc-options: ghc-options:
......
...@@ -30,6 +30,7 @@ eU = [ ...@@ -30,6 +30,7 @@ eU = [
,(4,5,1) ,(4,5,1)
,(5,4,1) ,(5,4,1)
-- ,(6,7,1)
] ]
eD :: [LEdge Double] eD :: [LEdge Double]
...@@ -55,6 +56,10 @@ gU = mkGraph' eU ...@@ -55,6 +56,10 @@ gU = mkGraph' eU
-- 4:()->[(1,1),(1,3),(1,5)] -- 4:()->[(1,1),(1,3),(1,5)]
-- 5:()->[(1,4)] -- 5:()->[(1,4)]
cuiller :: Gr () Double
cuiller = gU
-- Visual representation: -- Visual representation:
-- --
-- 2 -- 2
......
...@@ -9,84 +9,229 @@ Portability : POSIX ...@@ -9,84 +9,229 @@ Portability : POSIX
-} -}
{-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE NoImplicitPrelude #-}
module Data.Graph.Clustering.HLouvain module Data.Graph.Clustering.HLouvain
where where
import Data.Ord (Down(..)) import Protolude
import Data.List (sortOn, foldl') import Data.Set (Set)
import Data.Graph.Inductive import qualified Data.Set as Set
-- import Data.Graph.Clustering.Louvain import Data.Ord (Down(..), compare)
import Data.List (foldl', maximumBy)
import Data.Graph.Inductive (DynGraph, Node, Graph)
import qualified Data.Graph.Inductive as G
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Main Tools -- | Main Tools
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Glustering (instead of clustering) since we are working on the glue -- | Glustering (instead of clustering) since we are working on the glue
-- between nodes to create "glusters" of nodes -- between nodes to create "glusters" of nodes: this API will enable the
glustering :: DynGraph gr -- choice of "glue" (as a function) for the glustering.
glustering :: (DynGraph gr, Eq b)
=> gr a b => gr a b
-> [Module] -> [[Set Node]]
glustering g = foldl' (\a b -> modulare g b a) [] modules glustering g = map (map nodesD) $ glusteringD g
where
modules :: [Module] glusteringD :: (DynGraph gr, Eq b)
modules = map (\n -> Module n [] (modularity' g [n])) ns => gr a b
-> [[Dendogram]]
glusteringD g = map (glusteringC g) (comp g)
glusteringC :: DynGraph gr
=> gr a b
-> [Node]
-> [Dendogram]
glusteringC g ns = foldl' (\ds n -> insert g (toDendo g n) ds) [] ns
------------------------------------------------------------------------
type Component = [Node]
ns :: [Node] comp :: (DynGraph gr, Eq b) => gr a b -> [Component]
ns = sortOn (Down . (deg g)) $ nodes g comp g' = map sortNodes cs
where
sortNodes ns = case head $ sortOn (Down . (G.deg g)) ns of
Nothing -> []
-- sort nodes by depth first to speed up the glustering
Just n -> G.dfs [n] g
cs = G.components g
g = G.undir g'
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Definitions -- | Definitions
------------------------------------------------------------------------ ------------------------------------------------------------------------
data Module = Module { node :: !Node -- | A dendogram is a Tree whose leaf is one gram only and parents nodes
, modules :: ![Module] -- are glusters with score of glue to build it.
data Dendogram = Gram Node
| Dendogram { gluster :: ![Dendogram]
, score :: !Double , score :: !Double
} }
deriving (Show) deriving (Show, Eq)
--------------------------------------------------------------------------
insert :: DynGraph gr
=> gr n e
-> Dendogram
-> [Dendogram]
-> [Dendogram]
insert g d ds = maximumBy (\a b -> compare (hasScore g a) (hasScore g b))
$ insertD g d ds
insertD :: DynGraph gr
=> gr n e
-> Dendogram
-> [Dendogram]
-> [[Dendogram]]
insertD _ d [] = [[d]]
insertD g d (x:xs) = [[inDendo g d x] ++ xs]
++ (map (\y -> [x] ++ y) $ insertD g d xs)
--------------------------------------------------------------------------
toDendo :: DynGraph gr
=> gr n e
-> Node
-> Dendogram
toDendo g n = Dendogram [Gram n] s
where
s = modularity g (Set.singleton n)
toDendoD :: DynGraph gr
=> gr n e
-> Dendogram
-> Dendogram
toDendoD g d@(Gram _) = Dendogram [d] (hasScore g d)
toDendoD _ d@(Dendogram _ _) = d
--------------------------------------------------------------------------
inDendo :: DynGraph gr
=> gr n e
-> Dendogram
-> Dendogram
-> Dendogram
inDendo g d1@(Gram _) d2@(Gram _) = Dendogram n' (hasScore g n')
where
ds = inDendo g d1 (toDendoD g d2)
ss = hasScore g ds
n' | hasScore g d1 + hasScore g d2 > ss = [d1,d2]
| otherwise = [ds]
-- | Divide en conquer method inDendo g n@(Gram _) d@(Dendogram _ _) = inDendo g (toDendoD g n) d
modulare :: DynGraph gr
=> gr a b inDendo g d@(Dendogram _ _) n@(Gram _) = inDendo g n d
-> Module
-> [Module] inDendo g d1@(Dendogram _ _) d2@(Dendogram _ _) = Dendogram n' (hasScore g n')
-> [Module]
modulare _ m [] = [m]
modulare _ m [n] = sortOn (Down . score) [m,n]
modulare g m0@(Module n ms0 _) m1@(m@(Module n1 _ _):ms)
| score1 > score2 = [Module n1 (modulare g m0 ms0) score1] ++ ms
| otherwise = [m] ++ (modulare g m0 ms)
where where
score1 = sum $ map (modularity g) [m0, m] ds = inDendo g d1 d2
score2 = sum $ map (modularity g) ([m0]++ms) ss = hasScore g ds
n' | hasScore g [d1, d2] > ss = [d1,d2]
| otherwise = [ds]
-- | need to flatten the Tree of modules to get a list of nodes
flat :: Module -> [Node]
flat (Module n [] _) = [n]
flat (Module n ns _) = [n] ++ (concat $ map flat ns)
-- | TODO Sum or Union -> QuickCheck -------------------------------------------------------------------
modularity :: DynGraph gr => gr a b -> Module -> Double nodesD :: Dendogram -> Set Node
modularity g m = modularity' g (flat m) nodesD (Gram n) = Set.singleton n
nodesD (Dendogram [] _) = Set.empty
nodesD (Dendogram s _) = Set.unions $ map nodesD s
modularity' :: DynGraph gr => gr a b -> [Node] -> Double deg :: Graph gr => gr a b -> Dendogram -> Int
modularity' gr ns = coverage - edgeDensity deg g d = Set.size (neighbors g d)
neighbors :: Graph gr => gr a b -> Dendogram -> Set Node
neighbors g d = exclusion inNodes ouNodes
where
inNodes :: Set Node
inNodes = nodesD d
ouNodes :: Set Node
ouNodes = Set.unions $ map (neighbors' g) (Set.toList inNodes)
neighbors' :: Graph gr => gr a b -> Node -> Set Node
neighbors' g' n' = Set.fromList $ G.neighbors g' n'
class HasScore a
where
hasScore :: DynGraph gr => gr n e -> a -> Double
instance HasScore Dendogram
where
hasScore :: DynGraph gr => gr a b -> Dendogram -> Double
hasScore g m = modularity g (nodesD m)
instance HasScore [Dendogram]
where
hasScore g ds = modularity g (Set.unions $ map nodesD ds)
modulare :: DynGraph gr => gr a b -> Set Node -> Double
modulare = modularity
modularity :: DynGraph gr => gr a b -> Set Node -> Double
modularity gr ns = coverage - edgeDensity
where where
coverage :: Double coverage :: Double
coverage = sizeSubGraph / sizeAllGraph coverage = sizeSubGraph / sizeAllGraph
where where
sizeSubGraph :: Double sizeSubGraph :: Double
sizeSubGraph = fromIntegral ( size $ subgraph ns gr ) sizeSubGraph = fromIntegral ( G.size $ subgraph ns gr )
sizeAllGraph :: Double sizeAllGraph :: Double
sizeAllGraph = fromIntegral (size gr) sizeAllGraph = fromIntegral (G.size gr)
edgeDensity :: Double edgeDensity :: Double
edgeDensity = (sum (Prelude.map (\node -> (degree node) / links ) ns)) ** 2 edgeDensity = (sum (Set.map (\node -> (degree node) / links ) ns)) ** 2
where where
degree :: Node -> Double degree :: Node -> Double
degree node = fromIntegral (deg gr node) degree node = fromIntegral (G.deg gr node)
links :: Double links :: Double
links = fromIntegral (2 * (size gr)) links = fromIntegral (2 * (G.size gr))
subgraph :: DynGraph gr => Set Node -> gr a b -> gr a b
subgraph ns = G.subgraph (Set.toList ns)
exclusion :: Ord a => Set a -> Set a -> Set a
exclusion a b = (Set.\\) b a
{-
Union Find algorithm could be used but Inductive Functional Graph
exactly have been created by Martin Erwig to avoid such style of
programming to enable clearer Graphs algorithms
import Control.Monad (mapM)
-- import Data.UnionFind.ST as ST
import Control.Monad.Trans.UnionFind as ST
-- import Control.Monad.ST as ST
import Control.Monad.State as ST
data UnionFindT p m a
runUnionFind :: Monad m => UnionFindT p m a -> m a
data Point a
fresh :: Monad m => p -> UnionFindT p m (Point p)
repr :: Monad m => Point p -> UnionFindT p m (Point p)
descriptor :: Monad m => Point p -> UnionFindT p m p
union :: Monad m => Point p -> Point p -> UnionFindT p m ()
equivalent :: Monad m => Point p -> Point p -> UnionFindT p m Bool
unionFind :: IO [Int]
unionFind = ST.runUnionFind $ do
ns <- mapM ST.fresh ([1..10] :: [Int])
ST.union (ns !! 1) (ns !! 2)
ST.union (ns !! 3) (ns !! 4)
ST.union (ns !! 5) (ns !! 6)
t1 <- ST.equivalent (ns !! 1) (ns !! 2)
ST.lift $ putStrLn $ show t1
t2 <- ST.equivalent (ns !! 4) (ns !! 5)
n <- mapM ST.repr ns
n' <- mapM ST.descriptor n
pure n'
-}
...@@ -7,7 +7,7 @@ Maintainer : alexandre.delanoe+louvain@iscpif.fr ...@@ -7,7 +7,7 @@ Maintainer : alexandre.delanoe+louvain@iscpif.fr
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
Tools to clustering Graphs with louvain clustering algorithm. Tools to clustering Graphs with louvain clustering algorithm (not optimized version).
References: References:
* Blondel, Vincent D; Guillaume, Jean-Loup; Lambiotte, Renaud; Lefebvre, Etienne (9 October 2008). "Fast unfolding of communities in large networks". Journal of Statistical Mechanics: Theory and Experiment. 2008 (10): P10008. arXiv:0803.0476 Freely accessible. doi:10.1088/1742-5468/2008/10/P10008. * Blondel, Vincent D; Guillaume, Jean-Loup; Lambiotte, Renaud; Lefebvre, Etienne (9 October 2008). "Fast unfolding of communities in large networks". Journal of Statistical Mechanics: Theory and Experiment. 2008 (10): P10008. arXiv:0803.0476 Freely accessible. doi:10.1088/1742-5468/2008/10/P10008.
...@@ -64,7 +64,7 @@ dendogram :: (Eq b, DynGraph gr) => gr a b -> Int -> Reverse -> [[Node]] ...@@ -64,7 +64,7 @@ dendogram :: (Eq b, DynGraph gr) => gr a b -> Int -> Reverse -> [[Node]]
dendogram gr n r = stepscom gr n (start gr r) dendogram gr n r = stepscom gr n (start gr r)
start :: DynGraph gr => gr a b -> Reverse -> [[Node]] start :: DynGraph gr => gr a b -> Reverse -> [[Node]]
start gr r = order' $ Prelude.map (\x -> [] ++ [x]) ( nodes gr ) start gr r = order' $ Prelude.map (\x -> [x]) ( nodes gr )
where where
order' = case r of order' = case r of
True -> reverse True -> reverse
...@@ -81,7 +81,8 @@ stepscom gr n ns = foldl' (\xs _ -> stepcom gr' (smallCom xs) xs) ns [1..n] ...@@ -81,7 +81,8 @@ stepscom gr n ns = foldl' (\xs _ -> stepcom gr' (smallCom xs) xs) ns [1..n]
------------------------------------------------------------------------ ------------------------------------------------------------------------
stepcom :: DynGraph gr => gr a b -> [Node] -> [[Node]] -> [[Node]] stepcom :: DynGraph gr => gr a b -> [Node] -> [[Node]] -> [[Node]]
stepcom gr n ns = bestModularities gr $ [ns] ++ Prelude.map (\x -> x ++ neighout) (addcom n neighin) stepcom gr n ns = bestModularity gr
$ [ns] ++ Prelude.map (\x -> x ++ neighout) (addcom n neighin)
where where
-- | First remove the node (n) of the current partition (ns) -- | First remove the node (n) of the current partition (ns)
ns' = filter (/= n) ns ns' = filter (/= n) ns
...@@ -101,33 +102,32 @@ neighcom :: DynGraph gr => gr a b -> [Node] -> [Node] ...@@ -101,33 +102,32 @@ neighcom :: DynGraph gr => gr a b -> [Node] -> [Node]
neighcom gr ns = ( nub . filter (not . (`elem` ns)) . concat ) ns' neighcom gr ns = ( nub . filter (not . (`elem` ns)) . concat ) ns'
where ns' = Prelude.map (neighbors gr) ns where ns' = Prelude.map (neighbors gr) ns
rotate :: Int -> [a] -> [a] rotate :: Int -> [a] -> [a]
rotate _ [] = [] rotate _ [] = []
rotate n xs = zipWith const (drop n (cycle xs)) xs rotate n xs = zipWith const (drop n (cycle xs)) xs
------------------------------------------------------------------------
data Module = Node | Module { modules :: [Module]
, score :: Double
}
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Computing modularity of the partition -- | Computing modularity of the partition
------------------------------------------------------------------------ ------------------------------------------------------------------------
modularities :: DynGraph gr => gr a b -> [[Node]] -> Double bestModularity :: DynGraph gr
modularities gr xs = sum $ Prelude.map (\y -> modularity gr y) xs => gr a b
-> [[[Node]]]
-> [[Node]]
bestModularity gr ns = maximumBy (compareModularities gr) ns
compareModularities :: DynGraph gr => gr a b -> [[Node]] -> [[Node]] -> Ordering compareModularities :: DynGraph gr
=> gr a b
-> [[Node]]
-> [[Node]]
-> Ordering
compareModularities gr xs ys compareModularities gr xs ys
| modularities gr xs < modularities gr ys = LT | modularities gr xs < modularities gr ys = LT
| modularities gr xs > modularities gr ys = GT | modularities gr xs > modularities gr ys = GT
| otherwise = EQ | otherwise = EQ
bestModularities :: DynGraph gr => gr a b -> [[[Node]]] -> [[Node]] modularities :: DynGraph gr => gr a b -> [[Node]] -> Double
bestModularities gr ns = maximumBy (compareModularities gr) ns modularities gr xs = sum $ Prelude.map (\y -> modularity gr y) xs
modularity :: DynGraph gr => gr a b -> [Node] -> Double modularity :: DynGraph gr => gr a b -> [Node] -> Double
modularity gr ns = coverage - edgeDensity modularity gr ns = coverage - edgeDensity
...@@ -151,37 +151,34 @@ modularity gr ns = coverage - edgeDensity ...@@ -151,37 +151,34 @@ modularity gr ns = coverage - edgeDensity
links = fromIntegral (2 * (size gr)) links = fromIntegral (2 * (size gr))
---------------------------------------------------------- ----------------------------------------------------------
-- | Discover what NP complete means: -- | Exampel to discover what NP complete means
---------------------------------------------------------- ----------------------------------------------------------
bestPartition' :: DynGraph gr => gr a b -> [[Node]]
bestPartition' gr = maximumBy (compareModularities gr) $ gpartition gr
gpartition :: DynGraph gr => gr a b -> [[[Node]]]
gpartition gr = separate (nodes gr)
takeDrop :: Int -> [a] -> [[a]] takeDrop :: Int -> [a] -> [[a]]
takeDrop n xs = [ (take n xs), drop n xs] takeDrop n xs = [ (take n xs), drop n xs]
-- | Naive description of the algo
-- separate' :: forall a. [a] -> [[[a]]]
-- separate' xs = [ takeDrop t (rotate r xs)
-- | t <- [1.. fromIntegral (length xs) - 1 ]
-- , r <- [0.. fromIntegral (length xs) ]
-- ]
-- Optimization
-- http://stackoverflow.com/questions/35388734/list-partitioning-implemented-recursively -- http://stackoverflow.com/questions/35388734/list-partitioning-implemented-recursively
separate :: [a] -> [[[a]]] separate :: [a] -> [[[a]]]
separate [] = [[]] separate [] = [[]]
separate (x:xs) = let recur = separate xs separate (x:xs) = let recur = separate xs
split = do split = do
partition <- recur partition <- recur
return $ [x] : partition pure $ [x] : partition
noSplit = do noSplit = do
(y:ys) <- recur (y:ys) <- recur
return $ (x:y):ys pure $ (x:y):ys
in split ++ noSplit in split ++ noSplit
-- separate' :: forall a. [a] -> [[[a]]]
-- separate' xs = [ takeDrop t (rotate r xs)
-- | t <- [1.. fromIntegral (length xs) - 1 ]
-- , r <- [0.. fromIntegral (length xs) ]
-- ]
gpartition :: DynGraph gr => gr a b -> [[[Node]]]
gpartition gr = separate (nodes gr)
bestPartition' :: DynGraph gr => gr a b -> [[Node]]
bestPartition' gr = maximumBy (compareModularities gr) $ gpartition gr
---------------------------------------------------------- ----------------------------------------------------------
...@@ -32,7 +32,6 @@ import qualified Data.Text.IO as T ...@@ -32,7 +32,6 @@ import qualified Data.Text.IO as T
import qualified Turtle as TU import qualified Turtle as TU
cLouvain :: Text -> Map (Int, Int) Double -> IO [LouvainNode] cLouvain :: Text -> Map (Int, Int) Double -> IO [LouvainNode]
cLouvain _params ms = do cLouvain _params ms = do
let inFileD = "/tmp/louvainData.txt" let inFileD = "/tmp/louvainData.txt"
...@@ -53,8 +52,8 @@ cLouvain _params ms = do ...@@ -53,8 +52,8 @@ cLouvain _params ms = do
putStrLn "cmdHierarchy" putStrLn "cmdHierarchy"
let cmdHierarchy = hierarchy <> " " <> outTree <> " -l 1 > " <> outRes let cmdHierarchy = hierarchy <> " " <> outTree <> " -l 1 > " <> outRes
--pure cmdLouvain --pure cmdLouvain
-- _ <- inMVarIO $ shell (T.pack cmdLouvain) _ <- inMVarIO $ shell (T.pack cmdLouvain)
-- _ <- inMVarIO $ shell (T.pack cmdHierarchy) _ <- inMVarIO $ shell (T.pack cmdHierarchy)
putStrLn "myResult start" putStrLn "myResult start"
myResult <- inMVarIO $ readOutput outRes myResult <- inMVarIO $ readOutput outRes
......
...@@ -21,7 +21,6 @@ data LouvainNode = LouvainNode { l_node_id :: Int ...@@ -21,7 +21,6 @@ data LouvainNode = LouvainNode { l_node_id :: Int
, l_community_id :: Int , l_community_id :: Int
} deriving (Show) } deriving (Show)
label' :: (Graph gr) => gr a b -> Edge -> Maybe b label' :: (Graph gr) => gr a b -> Edge -> Maybe b
label' gr (u,v) = lookup v (lsuc gr u) label' gr (u,v) = lookup v (lsuc gr u)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment