Commit d51bbdff authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] new clustering algo by default (Spinglass)

parent 05a29380
{-|
Module : Gargantext.Core.Viz.Graph.Louvain
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Methods.Graph.Louvain
where
import Gargantext.Prelude
import Data.Map (Map, fromList)
import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..))
type LouvainNodeId = Int
type CommunityId = Int
nodeId2comId :: [LouvainNode] -> Map LouvainNodeId CommunityId
nodeId2comId ns = fromList [(nId,cId) | LouvainNode nId cId <- ns]
comId2nodeId :: [LouvainNode] -> Map CommunityId LouvainNodeId
comId2nodeId ns = fromList [(cId,nId) | LouvainNode nId cId <- ns]
...@@ -154,7 +154,7 @@ computeGraph cId d nt repo = do ...@@ -154,7 +154,7 @@ computeGraph cId d nt repo = do
<$> groupNodesByNgrams ngs <$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs) <$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
graph <- liftBase $ cooc2graph d 0 myCooc graph <- liftBase $ cooc2graphWith Spinglass d 0 myCooc
pure graph pure graph
......
...@@ -11,39 +11,65 @@ Let be a graph with partitions (from Louvain algo), Bridgeness uniformly ...@@ -11,39 +11,65 @@ Let be a graph with partitions (from Louvain algo), Bridgeness uniformly
filters inter-communities links. filters inter-communities links.
TODO rewrite Bridgeness with "equivalence structurale" metrics (Confluence) TODO rewrite Bridgeness with "equivalence structurale" metrics (Confluence)
TODO use Map LouvainNodeId (Map LouvainNodeId) TODO use Map LouvainNodeId (Map LouvainNodeId)
-} -}
module Gargantext.Core.Viz.Graph.Bridgeness (bridgeness) module Gargantext.Core.Viz.Graph.Bridgeness -- (bridgeness)
where where
import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..))
import Data.List (concat, sortOn)
import Data.Map (Map, fromListWith, lookup, toList, mapWithKey, elems)
import Data.Maybe (catMaybes)
import Data.Ord (Down(..)) import Data.Ord (Down(..))
import Gargantext.Prelude import Gargantext.Prelude
import Data.Map (Map, fromListWith, lookup, toList, mapWithKey, elems)
import qualified Data.Map as DM import qualified Data.Map as DM
import Data.Maybe (catMaybes) import Gargantext.Core.Viz.Graph.Tools.IGraph (ClusterNode(..))
import Data.List (concat, sortOn)
import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..))
import Gargantext.Core.Methods.Graph.Louvain (LouvainNodeId, CommunityId, nodeId2comId)
----------------------------------------------------------------------
type Partitions a = Map (Int, Int) Double -> IO [a]
----------------------------------------------------------------------
class ToComId a where
nodeId2comId :: a -> (NodeId,CommunityId)
type NodeId = Int
type CommunityId = Int
----------------------------------------------------------------------
instance ToComId LouvainNode where
nodeId2comId (LouvainNode i1 i2) = (i1, i2)
instance ToComId ClusterNode where
nodeId2comId (ClusterNode i1 i2) = (i1, i2)
----------------------------------------------------------------------
----------------------------------------------------------------------
type Bridgeness = Double type Bridgeness = Double
bridgeness :: Bridgeness bridgeness :: ToComId a => Bridgeness
-> [LouvainNode] -> [a]
-> Map (LouvainNodeId, LouvainNodeId) Double -> Map (NodeId, NodeId) Double
-> Map (LouvainNodeId, LouvainNodeId) Double -> Map (NodeId, NodeId) Double
bridgeness b ns = DM.fromList bridgeness = bridgeness' nodeId2comId
. concat
. DM.elems
. filterComs b bridgeness' :: (a -> (Int, Int))
. groupEdges (nodeId2comId ns) -> Bridgeness
-> [a]
groupEdges :: Map LouvainNodeId CommunityId -> Map (Int, Int) Double
-> Map (LouvainNodeId, LouvainNodeId) Double -> Map (Int, Int) Double
-> Map (CommunityId, CommunityId) [((LouvainNodeId, LouvainNodeId), Double)] bridgeness' f b ns = DM.fromList
. concat
. DM.elems
. filterComs b
. groupEdges (DM.fromList $ map f ns)
groupEdges :: (Ord a, Ord b1)
=> Map b1 a
-> Map (b1, b1) b2
-> Map (a, a) [((b1, b1), b2)]
groupEdges m = fromListWith (<>) groupEdges m = fromListWith (<>)
. catMaybes . catMaybes
. map (\((n1,n2), d) . map (\((n1,n2), d)
...@@ -55,9 +81,11 @@ groupEdges m = fromListWith (<>) ...@@ -55,9 +81,11 @@ groupEdges m = fromListWith (<>)
. toList . toList
-- | TODO : sortOn Confluence -- | TODO : sortOn Confluence
filterComs :: Bridgeness
-> Map (CommunityId, CommunityId) [((LouvainNodeId, LouvainNodeId), Double)] filterComs :: (Ord n1, Eq n2)
-> Map (CommunityId, CommunityId) [((LouvainNodeId, LouvainNodeId), Double)] => p
-> Map (n2, n2) [(a3, n1)]
-> Map (n2, n2) [(a3, n1)]
filterComs _b m = DM.filter (\n -> length n > 0) $ mapWithKey filter' m filterComs _b m = DM.filter (\n -> length n > 0) $ mapWithKey filter' m
where where
filter' (c1,c2) a filter' (c1,c2) a
......
...@@ -80,7 +80,8 @@ fromIndex :: Ord t => Map Index t -> Map (Index, Index) a -> Map (t,t) a ...@@ -80,7 +80,8 @@ fromIndex :: Ord t => Map Index t -> Map (Index, Index) a -> Map (t,t) a
fromIndex ni ns = indexConversion ni ns fromIndex ni ns = indexConversion ni ns
indexConversion :: (Ord b, Ord k) => Map k b -> Map (k,k) a -> Map (b, b) a indexConversion :: (Ord b, Ord k) => Map k b -> Map (k,k) a -> Map (b, b) a
indexConversion index ms = M.fromList $ map (\((k1,k2),c) -> ( ((M.!) index k1, (M.!) index k2), c)) (M.toList ms) indexConversion index ms = M.fromList
$ map (\((k1,k2),c) -> ( ((M.!) index k1, (M.!) index k2), c)) (M.toList ms)
--------------------------------------------------------------------------------- ---------------------------------------------------------------------------------
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
......
...@@ -9,13 +9,11 @@ Portability : POSIX ...@@ -9,13 +9,11 @@ Portability : POSIX
-} -}
module Gargantext.Core.Viz.Graph.Tools module Gargantext.Core.Viz.Graph.Tools
where where
-- import Data.Graph.Clustering.Louvain (hLouvain, {-iLouvainMap-}) -- import Data.Graph.Clustering.Louvain (hLouvain, {-iLouvainMap-})
import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain) import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..))
import Data.Map (Map) import Data.Map (Map)
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Text (Text) import Data.Text (Text)
...@@ -26,8 +24,8 @@ import Gargantext.Core.Methods.Distances (Distance(..), measure) ...@@ -26,8 +24,8 @@ import Gargantext.Core.Methods.Distances (Distance(..), measure)
import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence) import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
import Gargantext.Core.Statistics import Gargantext.Core.Statistics
import Gargantext.Core.Viz.Graph import Gargantext.Core.Viz.Graph
import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness) import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges) import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index) import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index)
import Gargantext.Prelude import Gargantext.Prelude
import IGraph.Random -- (Gen(..)) import IGraph.Random -- (Gen(..))
...@@ -54,12 +52,24 @@ cooc2graph' distance threshold myCooc = distanceMap ...@@ -54,12 +52,24 @@ cooc2graph' distance threshold myCooc = distanceMap
distanceMat = measure distance matCooc distanceMat = measure distance matCooc
distanceMap = Map.filter (> threshold) $ mat2map distanceMat distanceMap = Map.filter (> threshold) $ mat2map distanceMat
data PartitionMethod = Louvain | Spinglass
cooc2graphWith :: PartitionMethod
-> Distance
-> Threshold
-> HashMap (NgramsTerm, NgramsTerm) Int
-> IO Graph
cooc2graphWith Louvain = cooc2graphWith' (cLouvain "1")
cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
cooc2graph :: Distance
-> Threshold cooc2graphWith' :: ToComId a
-> HashMap (NgramsTerm, NgramsTerm) Int => Partitions a
-> IO Graph -> Distance
cooc2graph distance threshold myCooc = do -> Threshold
-> HashMap (NgramsTerm, NgramsTerm) Int
-> IO Graph
cooc2graphWith' doPartitions distance threshold myCooc = do
printDebug "cooc2graph" distance printDebug "cooc2graph" distance
let let
-- TODO remove below -- TODO remove below
...@@ -79,12 +89,13 @@ cooc2graph distance threshold myCooc = do ...@@ -79,12 +89,13 @@ cooc2graph distance threshold myCooc = do
n' = Set.size $ Set.fromList $ as <> bs n' = Set.size $ Set.fromList $ as <> bs
ClustersParams rivers _level = clustersParams nodesApprox ClustersParams rivers _level = clustersParams nodesApprox
printDebug "Start" ("partitions" :: Text)
partitions <- if (Map.size distanceMap > 0) partitions <- if (Map.size distanceMap > 0)
-- then iLouvainMap 100 10 distanceMap -- then iLouvainMap 100 10 distanceMap
-- then hLouvain distanceMap -- then hLouvain distanceMap
then cLouvain "1" distanceMap then doPartitions distanceMap
else panic "Text.Flow: DistanceMap is empty" else panic "Text.Flow: DistanceMap is empty"
printDebug "End" ("partitions" :: Text)
let let
-- bridgeness' = distanceMap -- bridgeness' = distanceMap
...@@ -92,10 +103,11 @@ cooc2graph distance threshold myCooc = do ...@@ -92,10 +103,11 @@ cooc2graph distance threshold myCooc = do
$ bridgeness rivers partitions distanceMap $ bridgeness rivers partitions distanceMap
confluence' = confluence (Map.keys bridgeness') 3 True False confluence' = confluence (Map.keys bridgeness') 3 True False
pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti) myCooc' bridgeness' confluence' partitions pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti)
myCooc' bridgeness' confluence' partitions
------------------------------------------------------------------------
------------------------------------------------------------------------
data ClustersParams = ClustersParams { bridgness :: Double data ClustersParams = ClustersParams { bridgness :: Double
, louvain :: Text , louvain :: Text
} deriving (Show) } deriving (Show)
...@@ -112,16 +124,17 @@ clustersParams x = ClustersParams (fromIntegral x) "0.00000001" -- y ...@@ -112,16 +124,17 @@ clustersParams x = ClustersParams (fromIntegral x) "0.00000001" -- y
---------------------------------------------------------- ----------------------------------------------------------
-- | From data to Graph -- | From data to Graph
data2graph :: [(Text, Int)] data2graph :: ToComId a
=> [(Text, Int)]
-> Map (Int, Int) Int -> Map (Int, Int) Int
-> Map (Int, Int) Double -> Map (Int, Int) Double
-> Map (Int, Int) Double -> Map (Int, Int) Double
-> [LouvainNode] -> [a]
-> Graph -> Graph
data2graph labels coocs bridge conf partitions = Graph nodes edges Nothing data2graph labels coocs bridge conf partitions = Graph nodes edges Nothing
where where
community_id_by_node_id = Map.fromList [ (n, c) | LouvainNode n c <- partitions ] community_id_by_node_id = Map.fromList $ map nodeId2comId partitions
nodes = map (setCoord ForceAtlas labels bridge) nodes = map (setCoord ForceAtlas labels bridge)
[ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) coocs) [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) coocs)
...@@ -146,8 +159,11 @@ data2graph labels coocs bridge conf partitions = Graph nodes edges Nothing ...@@ -146,8 +159,11 @@ data2graph labels coocs bridge conf partitions = Graph nodes edges Nothing
, edge_weight = d , edge_weight = d
, edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
-- , edge_confluence = maybe (panic "E: data2graph edges") identity $ Map.lookup (s,t) conf -- , edge_confluence = maybe (panic "E: data2graph edges") identity $ Map.lookup (s,t) conf
, edge_id = cs (show i) } , edge_id = cs (show i)
| (i, ((s,t), d)) <- zip ([0..]::[Integer]) (Map.toList bridge), s /= t, d > 0 }
| (i, ((s,t), d)) <- zip ([0..]::[Integer] )
(Map.toList bridge)
, s /= t, d > 0
] ]
......
...@@ -19,11 +19,13 @@ import Data.Serialize ...@@ -19,11 +19,13 @@ import Data.Serialize
import Data.Singletons (SingI) import Data.Singletons (SingI)
import IGraph hiding (mkGraph, neighbors, edges, nodes, Node, Graph) import IGraph hiding (mkGraph, neighbors, edges, nodes, Node, Graph)
import Protolude import Protolude
import Gargantext.Core.Viz.Graph.Index
import qualified Data.List as List import qualified Data.List as List
import qualified IGraph as IG import qualified IGraph as IG
import qualified IGraph.Algorithms.Clique as IG import qualified IGraph.Algorithms.Clique as IG
import qualified IGraph.Algorithms.Community as IG import qualified IGraph.Algorithms.Community as IG
import qualified IGraph.Random as IG import qualified IGraph.Random as IG
import qualified Data.Map as Map
------------------------------------------------------------------ ------------------------------------------------------------------
-- | Main Types -- | Main Types
...@@ -55,21 +57,39 @@ maximalCliques g = IG.maximalCliques g (min',max') ...@@ -55,21 +57,39 @@ maximalCliques g = IG.maximalCliques g (min',max')
------------------------------------------------------------------ ------------------------------------------------------------------
type Seed = Int type Seed = Int
spinglass :: Seed -> Map (Int, Int) Double -> IO [ClusterNode]
spinglass s g = toClusterNode
<$> map catMaybes
<$> map (map (\n -> Map.lookup n fromI))
<$> partitions_spinglass' s g''
where
g'' = mkGraphUfromEdges (Map.keys g')
(toI, fromI) = createIndices g
g' = toIndex toI g
-- | Tools to analyze graphs -- | Tools to analyze graphs
partitions_spinglass :: (Serialize v, Serialize e) partitions_spinglass' :: (Serialize v, Serialize e)
=> Seed -> IG.Graph 'U v e -> IO [[Int]] => Seed -> IG.Graph 'U v e -> IO [[Int]]
partitions_spinglass s g = do partitions_spinglass' s g = do
gen <- IG.withSeed s pure gen <- IG.withSeed s pure
pure $ IG.findCommunity g Nothing Nothing IG.spinglass gen pure $ IG.findCommunity g Nothing Nothing IG.spinglass gen
------------------------------------------------------------------
data ClusterNode = ClusterNode { cl_node_id :: Int
, cl_community_id :: Int
}
toClusterNode :: [[Int]] -> [ClusterNode]
toClusterNode ns = List.concat
$ map (\(cId, ns') -> map (\n -> ClusterNode n cId) ns')
$ List.zip [1..] ns
------------------------------------------------------------------
mkGraph :: (SingI d, Ord v, mkGraph :: (SingI d, Ord v,
Serialize v, Serialize e) => Serialize v, Serialize e) =>
[v] -> [LEdge e] -> IG.Graph d v e [v] -> [LEdge e] -> IG.Graph d v e
mkGraph = IG.mkGraph mkGraph = IG.mkGraph
------------------------------------------------------------------ ------------------------------------------------------------------
mkGraphUfromEdges :: [(Int, Int)] -> Graph_Undirected mkGraphUfromEdges :: [(Int, Int)] -> Graph_Undirected
mkGraphUfromEdges es = mkGraph (List.replicate n ()) $ zip es $ repeat () mkGraphUfromEdges es = mkGraph (List.replicate n ()) $ zip es $ repeat ()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment