[NEW] using more rec with FGL.

cdba50dc · Alexandre Delanoë · f9292434 · cdba50dc · cdba50dc · cdba50dc
Commit cdba50dc authored Mar 12, 2020 by Alexandre Delanoë
7 changed files
--- a/clustering-louvain.cabal
+++ b/clustering-louvain.cabal
@@ -4,7 +4,7 @@ cabal-version: 1.12
 --
 -- see: https://github.com/sol/hpack
 --
-- hash: f5efad8af0a555a91b2aa7b955eb00996a2391daa5222b63ae306ad40e22121c
+-- hash: 92999c9be9048a97745703a9575dcb38cea579137bd2a893acf77c708290b84e
 name:           clustering-louvain
 version:        0.1.0.0
@@ -13,7 +13,7 @@ description:    Please see README.md
 category:       Data
 author:         Alexandre Delanoë
 maintainer:     alexandre.delanoe+louvain@iscpif.fr
-copyright:      Copyright: (c) 2017-2018: see git logs and README
+copyright:      Copyright: (c) 2017-present: see git logs and README
 license:        BSD3
 license-file:   LICENSE
 build-type:     Simple
@@ -29,8 +29,10 @@ library
    , fgl
    , foldl
    , hxt
+    , protolude
    , text
    , turtle
+    , vector
  exposed-modules:
      Data.Graph.Clustering.Louvain
      Data.Graph.Clustering.Louvain.Utils

--- a/package.yaml
+++ b/package.yaml
@@ -6,7 +6,7 @@ category: Data
 author: Alexandre Delanoë
 maintainer: alexandre.delanoe+louvain@iscpif.fr
 copyright:
-    - ! 'Copyright: (c) 2017-2018: see git logs and README'
+    - ! 'Copyright: (c) 2017-present: see git logs and README'
 license: BSD3
 homepage: 
 ghc-options: -Wall
@@ -14,6 +14,10 @@ dependencies:
 - extra
 - text
 - containers
+- vector
+- protolude
+  #- union-find
+  #- mtl
 library:
  source-dirs: src
  ghc-options:

--- a/src/Data/Graph/Clustering/Example.hs
+++ b/src/Data/Graph/Clustering/Example.hs
@@ -30,6 +30,7 @@ eU = [
    ,(4,5,1)
    ,(5,4,1)
+--    ,(6,7,1)
    ]
 eD :: [LEdge Double]
@@ -55,6 +56,10 @@ gU = mkGraph' eU
 -- 4:()->[(1,1),(1,3),(1,5)]
 -- 5:()->[(1,4)]
+cuiller :: Gr () Double
+cuiller = gU
 -- Visual representation:
 -- 
 --    2

--- a/src/Data/Graph/Clustering/HLouvain.hs
+++ b/src/Data/Graph/Clustering/HLouvain.hs
@@ -9,84 +9,229 @@ Portability : POSIX
 -}
+{-# LANGUAGE ConstrainedClassMethods #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE InstanceSigs      #-}
+{-# LANGUAGE NoImplicitPrelude #-}
-module Data.Graph.Clustering.HLouvain 
+module Data.Graph.Clustering.HLouvain
  where
-import Data.Ord (Down(..))
+import Protolude
-import Data.List (sortOn, foldl')
+import Data.Set (Set)
-import Data.Graph.Inductive
+import qualified Data.Set as Set
-- import Data.Graph.Clustering.Louvain
+import Data.Ord (Down(..), compare)
+import Data.List (foldl', maximumBy)
+import Data.Graph.Inductive (DynGraph, Node, Graph)
+import qualified Data.Graph.Inductive as G
 ------------------------------------------------------------------------
 -- | Main Tools
 ------------------------------------------------------------------------
 -- | Glustering (instead of clustering) since we are working on the glue
-- between nodes to create "glusters" of nodes
+-- between nodes to create "glusters" of nodes: this API will enable the 
-glustering :: DynGraph gr
+-- choice of "glue" (as a function) for the glustering.
+glustering :: (DynGraph gr, Eq b)
           => gr a b
-           -> [Module]
+           -> [[Set Node]]
-glustering g = foldl' (\a b -> modulare g b a) [] modules
+glustering g = map (map nodesD) $ glusteringD g
-  where
-    modules :: [Module]
+glusteringD :: (DynGraph gr, Eq b)
-    modules = map (\n -> Module n [] (modularity' g [n])) ns
+           => gr a b
+           -> [[Dendogram]]
+glusteringD g = map (glusteringC g) (comp g)
+glusteringC :: DynGraph gr
+           => gr a b
+           -> [Node]
+           -> [Dendogram]
+glusteringC g ns = foldl' (\ds n -> insert g (toDendo g n) ds) [] ns
+------------------------------------------------------------------------
+type Component = [Node]
-    ns :: [Node]
+comp :: (DynGraph gr, Eq b) => gr a b -> [Component]
-    ns = sortOn (Down . (deg g)) $ nodes g
+comp g' = map sortNodes cs
+  where
+    sortNodes ns = case head $ sortOn (Down . (G.deg g)) ns of
+      Nothing -> []
+      -- sort nodes by depth first to speed up the glustering
+      Just  n -> G.dfs [n] g
+    cs = G.components g
+    g  = G.undir g'
 ------------------------------------------------------------------------
 -- | Definitions
 ------------------------------------------------------------------------
-data Module = Module { node    :: !Node
+-- | A dendogram is a Tree whose leaf is one gram only and parents nodes
-                     , modules :: ![Module]
+-- are glusters with score of glue to build it.
-                     , score   :: !Double
-                     }
+data Dendogram  = Gram Node
-  deriving (Show)
+                | Dendogram { gluster :: ![Dendogram]
+                            , score   :: !Double
-- | Divide en conquer method
+                            }
-modulare :: DynGraph gr
+  deriving (Show, Eq)
-         => gr a b
-         ->  Module
+--------------------------------------------------------------------------
-         -> [Module]
+insert :: DynGraph gr
-         -> [Module]
+       => gr n e
-modulare _ m []  = [m]
+       ->  Dendogram
-modulare _ m [n] = sortOn (Down . score) [m,n]
+       -> [Dendogram]
-modulare g m0@(Module n ms0 _) m1@(m@(Module n1 _ _):ms)
+       -> [Dendogram]
-  | score1 > score2 = [Module n1 (modulare g m0 ms0) score1] ++ ms
+insert g d ds = maximumBy (\a b -> compare (hasScore g a) (hasScore g b))
-  | otherwise       = [m]  ++ (modulare g m0 ms)
+              $ insertD g d ds
-    where
-      score1 = sum $ map (modularity g) [m0, m]
+insertD :: DynGraph gr
-      score2 = sum $ map (modularity g) ([m0]++ms)
+       => gr n e
+       ->   Dendogram
-- | need to flatten the Tree of modules to get a list of nodes
+       ->  [Dendogram]
-flat :: Module -> [Node]
+       -> [[Dendogram]]
-flat (Module n [] _) = [n]
+insertD _ d []     = [[d]]
-flat (Module n ns _) = [n] ++ (concat $ map flat ns)
+insertD g d (x:xs) = [[inDendo g d x] ++ xs]
+                  ++ (map (\y -> [x] ++ y) $ insertD g d xs)
-- | TODO Sum or Union -> QuickCheck
-modularity :: DynGraph gr => gr a b -> Module -> Double
+--------------------------------------------------------------------------
-modularity g m = modularity' g (flat m)
+toDendo :: DynGraph gr
+        => gr n e
-modularity' :: DynGraph gr => gr a b -> [Node] -> Double
+        -> Node
-modularity' gr ns = coverage - edgeDensity
+        -> Dendogram
-    where
+toDendo g n = Dendogram [Gram n] s
-        coverage :: Double
+  where
-        coverage  = sizeSubGraph / sizeAllGraph
+    s = modularity g (Set.singleton n)
-            where
-                sizeSubGraph :: Double
+toDendoD :: DynGraph gr
-                sizeSubGraph = fromIntegral ( size $ subgraph ns gr )
+         => gr n e
+         -> Dendogram
-                sizeAllGraph :: Double
+         -> Dendogram
-                sizeAllGraph = fromIntegral (size gr)
+toDendoD g d@(Gram _) = Dendogram [d] (hasScore g d)
+toDendoD _ d@(Dendogram _ _) = d
-        edgeDensity :: Double
-        edgeDensity = (sum (Prelude.map (\node -> (degree node) / links ) ns)) ** 2
+--------------------------------------------------------------------------
-            where
+inDendo :: DynGraph gr
-                degree :: Node -> Double
+        => gr n e
-                degree node = fromIntegral (deg gr node)
+        -> Dendogram
+        -> Dendogram
-                links :: Double
+        -> Dendogram
-                links       = fromIntegral (2 * (size gr))
+inDendo g d1@(Gram _) d2@(Gram _)   = Dendogram n' (hasScore g n')
+  where
+    ds = inDendo g d1 (toDendoD g d2)
+    ss = hasScore g ds
+    n' | hasScore g d1 + hasScore g d2 > ss = [d1,d2]
+       | otherwise = [ds]
+inDendo g n@(Gram _) d@(Dendogram _ _) = inDendo g (toDendoD g n) d
+inDendo g d@(Dendogram _ _) n@(Gram _) = inDendo g n d
+inDendo g d1@(Dendogram _ _) d2@(Dendogram _ _) = Dendogram n' (hasScore g n')
+  where
+    ds = inDendo g d1 d2
+    ss = hasScore g ds
+    n' | hasScore g [d1, d2] > ss = [d1,d2]
+       | otherwise = [ds]
+-------------------------------------------------------------------
+nodesD :: Dendogram -> Set Node
+nodesD (Gram n) = Set.singleton n
+nodesD (Dendogram [] _) = Set.empty
+nodesD (Dendogram s _)  = Set.unions $ map nodesD s
+deg :: Graph gr => gr a b -> Dendogram -> Int
+deg g d = Set.size (neighbors g d)
+neighbors :: Graph gr => gr a b -> Dendogram -> Set Node
+neighbors g d = exclusion inNodes ouNodes
+  where
+    inNodes :: Set Node
+    inNodes = nodesD d
+    ouNodes :: Set Node
+    ouNodes = Set.unions $ map (neighbors' g) (Set.toList inNodes)
+    neighbors' :: Graph gr => gr a b -> Node -> Set Node
+    neighbors' g' n' = Set.fromList $ G.neighbors g' n'
+class HasScore a
+  where
+    hasScore :: DynGraph gr => gr n e -> a -> Double
+instance HasScore Dendogram
+  where
+    hasScore :: DynGraph gr => gr a b -> Dendogram -> Double
+    hasScore g m = modularity g (nodesD m)
+instance HasScore [Dendogram]
+  where
+    hasScore g ds = modularity g (Set.unions $ map nodesD ds)
+modulare :: DynGraph gr => gr a b -> Set Node -> Double
+modulare = modularity
+modularity :: DynGraph gr => gr a b -> Set Node -> Double
+modularity gr ns = coverage - edgeDensity
+  where
+    coverage :: Double
+    coverage  = sizeSubGraph / sizeAllGraph 
+      where
+          sizeSubGraph :: Double
+          sizeSubGraph = fromIntegral ( G.size $ subgraph ns gr )
+          sizeAllGraph :: Double
+          sizeAllGraph = fromIntegral (G.size gr)
+    edgeDensity :: Double
+    edgeDensity = (sum (Set.map (\node -> (degree node) / links ) ns)) ** 2
+      where
+          degree :: Node -> Double
+          degree node = fromIntegral (G.deg gr node)
+          links :: Double
+          links       = fromIntegral (2 * (G.size gr))
+subgraph :: DynGraph gr => Set Node -> gr a b -> gr a b
+subgraph ns = G.subgraph (Set.toList ns)
+exclusion :: Ord a => Set a -> Set a -> Set a
+exclusion a b = (Set.\\) b a
+{-
+Union Find algorithm could be used but Inductive Functional Graph
+exactly have been created by Martin Erwig to avoid such style of
+programming to enable clearer Graphs algorithms
+import Control.Monad (mapM)
+-- import Data.UnionFind.ST as ST
+import Control.Monad.Trans.UnionFind as ST
+-- import Control.Monad.ST  as ST
+import Control.Monad.State as ST
+    data UnionFindT p m a
+    runUnionFind :: Monad m => UnionFindT p m a -> m a
+    data Point a
+    fresh :: Monad m => p -> UnionFindT p m (Point p)
+    repr :: Monad m => Point p -> UnionFindT p m (Point p)
+    descriptor :: Monad m => Point p -> UnionFindT p m p
+    union :: Monad m => Point p -> Point p -> UnionFindT p m ()
+    equivalent :: Monad m => Point p -> Point p -> UnionFindT p m Bool
+unionFind :: IO [Int]
+unionFind = ST.runUnionFind $ do
+  ns <- mapM ST.fresh ([1..10] :: [Int])
+  ST.union (ns !! 1) (ns !! 2)
+  ST.union (ns !! 3) (ns !! 4)
+  ST.union (ns !! 5) (ns !! 6)
+  t1 <- ST.equivalent (ns !! 1) (ns !! 2)
+  ST.lift $ putStrLn $ show t1
+  t2 <- ST.equivalent (ns !! 4) (ns !! 5)
+  n  <- mapM ST.repr ns
+  n' <- mapM ST.descriptor n
+  pure n'
+-}
--- a/src/Data/Graph/Clustering/Louvain.hs
+++ b/src/Data/Graph/Clustering/Louvain.hs
@@ -7,7 +7,7 @@ Maintainer  : alexandre.delanoe+louvain@iscpif.fr
 Stability   : experimental
 Portability : POSIX
-Tools to clustering Graphs with louvain clustering algorithm.
+Tools to clustering Graphs with louvain clustering algorithm (not optimized version).
 References:
 * Blondel, Vincent D; Guillaume, Jean-Loup; Lambiotte, Renaud; Lefebvre, Etienne (9 October 2008). "Fast unfolding of communities in large networks". Journal of Statistical Mechanics: Theory and Experiment. 2008 (10): P10008. arXiv:0803.0476 Freely accessible. doi:10.1088/1742-5468/2008/10/P10008.
@@ -64,7 +64,7 @@ dendogram :: (Eq b, DynGraph gr) => gr a b -> Int -> Reverse -> [[Node]]
 dendogram gr n r = stepscom gr n (start gr r)
 start :: DynGraph gr => gr a b -> Reverse -> [[Node]]
-start gr r = order' $ Prelude.map (\x -> [] ++ [x]) ( nodes gr )
+start gr r = order' $ Prelude.map (\x -> [x]) ( nodes gr )
    where
        order' = case r of
                  True  -> reverse
@@ -81,7 +81,8 @@ stepscom gr n ns = foldl' (\xs _ -> stepcom gr' (smallCom xs) xs) ns [1..n]
 ------------------------------------------------------------------------
 stepcom :: DynGraph gr => gr a b -> [Node] -> [[Node]] -> [[Node]]
-stepcom gr n ns = bestModularities gr $ [ns] ++ Prelude.map (\x -> x ++ neighout) (addcom n neighin)
+stepcom gr n ns = bestModularity gr
+                $ [ns] ++ Prelude.map (\x -> x ++ neighout) (addcom n neighin)
    where
        -- | First remove the node (n) of the current partition (ns)
        ns' = filter (/= n) ns
@@ -101,33 +102,32 @@ neighcom :: DynGraph gr => gr a b -> [Node] -> [Node]
 neighcom gr ns = ( nub . filter (not . (`elem` ns)) . concat ) ns'
    where ns' = Prelude.map (neighbors gr) ns
 rotate :: Int -> [a] -> [a]
 rotate _ [] = []
 rotate n xs = zipWith const (drop n (cycle xs)) xs
------------------------------------------------------------------------
-data Module = Node | Module { modules :: [Module]
-                            , score   :: Double
-                            }
 ------------------------------------------------------------------------
 -- | Computing modularity of the partition
 ------------------------------------------------------------------------
-modularities :: DynGraph gr => gr a b -> [[Node]] -> Double
+bestModularity :: DynGraph gr
-modularities gr xs = sum $ Prelude.map (\y -> modularity gr y) xs
+                 => gr a b
+                 -> [[[Node]]]
-compareModularities :: DynGraph gr => gr a b -> [[Node]] -> [[Node]] -> Ordering
+                 ->  [[Node]]
+bestModularity gr ns = maximumBy (compareModularities gr) ns
+compareModularities :: DynGraph gr
+                    => gr a b
+                    -> [[Node]]
+                    -> [[Node]]
+                    -> Ordering
 compareModularities gr xs ys
    | modularities gr xs < modularities gr ys = LT
    | modularities gr xs > modularities gr ys = GT
    | otherwise = EQ
-bestModularities :: DynGraph gr => gr a b -> [[[Node]]] -> [[Node]]
+modularities :: DynGraph gr => gr a b -> [[Node]] -> Double
-bestModularities gr ns = maximumBy (compareModularities gr) ns
+modularities gr xs = sum $ Prelude.map (\y -> modularity gr y) xs
 modularity :: DynGraph gr => gr a b -> [Node] -> Double
 modularity gr ns = coverage - edgeDensity
@@ -151,37 +151,34 @@ modularity gr ns = coverage - edgeDensity
                links       = fromIntegral (2 * (size gr))
 ----------------------------------------------------------
-- | Discover what NP complete means:
+-- | Exampel to discover what NP complete means
 ----------------------------------------------------------
+bestPartition' :: DynGraph gr => gr a b -> [[Node]]
+bestPartition' gr = maximumBy (compareModularities gr) $ gpartition gr
+gpartition :: DynGraph gr => gr a b -> [[[Node]]]
+gpartition gr = separate (nodes gr)
 takeDrop :: Int -> [a] -> [[a]]
 takeDrop n xs = [ (take n xs), drop n xs]
+-- | Naive description of the algo
+-- separate' :: forall a. [a] -> [[[a]]]
+-- separate' xs = [ takeDrop t (rotate r xs)
+--                | t <- [1.. fromIntegral (length xs) - 1 ]
+--                , r <- [0.. fromIntegral (length xs)     ]
+--                ]
+-- Optimization
 -- http://stackoverflow.com/questions/35388734/list-partitioning-implemented-recursively
 separate :: [a] -> [[[a]]]
 separate [] = [[]]
 separate (x:xs) = let recur = separate xs
                      split = do
                        partition <- recur
-                        return $ [x] : partition
+                        pure $ [x] : partition
                      noSplit = do
                        (y:ys) <- recur
-                        return $ (x:y):ys
+                        pure $ (x:y):ys
                  in split ++ noSplit
-- separate' :: forall a. [a] -> [[[a]]]
-- separate' xs = [ takeDrop t (rotate r xs) 
--                | t <- [1.. fromIntegral (length xs) - 1 ]
--                , r <- [0.. fromIntegral (length xs)     ]
--                ]
-gpartition :: DynGraph gr => gr a b -> [[[Node]]]
-gpartition gr = separate (nodes gr)
-bestPartition' :: DynGraph gr => gr a b -> [[Node]]
-bestPartition' gr = maximumBy (compareModularities gr) $ gpartition gr
 ----------------------------------------------------------
--- a/src/Data/Graph/Clustering/Louvain/CplusPlus.hs
+++ b/src/Data/Graph/Clustering/Louvain/CplusPlus.hs
@@ -32,7 +32,6 @@ import qualified Data.Text.IO as T
 import qualified Turtle as TU
 cLouvain :: Text -> Map (Int, Int) Double -> IO [LouvainNode]
 cLouvain _params ms = do
  let inFileD = "/tmp/louvainData.txt"
@@ -53,8 +52,8 @@ cLouvain _params ms = do
  putStrLn "cmdHierarchy"
  let cmdHierarchy = hierarchy <> " " <> outTree <> " -l 1 > " <> outRes
  --pure cmdLouvain
-  -- _ <- inMVarIO $ shell (T.pack cmdLouvain)
+  _ <- inMVarIO $ shell (T.pack cmdLouvain)
-  -- _ <- inMVarIO $ shell (T.pack cmdHierarchy)
+  _ <- inMVarIO $ shell (T.pack cmdHierarchy)
  putStrLn "myResult start"
  myResult <- inMVarIO $ readOutput outRes

--- a/src/Data/Graph/Clustering/Louvain/Utils.hs
+++ b/src/Data/Graph/Clustering/Louvain/Utils.hs
@@ -21,7 +21,6 @@ data LouvainNode = LouvainNode { l_node_id :: Int
                               , l_community_id :: Int
                               } deriving (Show)
 label' :: (Graph gr) => gr a b -> Edge -> Maybe b
 label' gr (u,v) = lookup v (lsuc gr u)