Commit d51bbdff authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] new clustering algo by default (Spinglass)

parent 05a29380
{-|
Module : Gargantext.Core.Viz.Graph.Louvain
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Methods.Graph.Louvain
where
import Gargantext.Prelude
import Data.Map (Map, fromList)
import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..))
type LouvainNodeId = Int
type CommunityId = Int
nodeId2comId :: [LouvainNode] -> Map LouvainNodeId CommunityId
nodeId2comId ns = fromList [(nId,cId) | LouvainNode nId cId <- ns]
comId2nodeId :: [LouvainNode] -> Map CommunityId LouvainNodeId
comId2nodeId ns = fromList [(cId,nId) | LouvainNode nId cId <- ns]
......@@ -154,7 +154,7 @@ computeGraph cId d nt repo = do
<$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
graph <- liftBase $ cooc2graph d 0 myCooc
graph <- liftBase $ cooc2graphWith Spinglass d 0 myCooc
pure graph
......
......@@ -11,39 +11,65 @@ Let be a graph with partitions (from Louvain algo), Bridgeness uniformly
filters inter-communities links.
TODO rewrite Bridgeness with "equivalence structurale" metrics (Confluence)
TODO use Map LouvainNodeId (Map LouvainNodeId)
-}
module Gargantext.Core.Viz.Graph.Bridgeness (bridgeness)
module Gargantext.Core.Viz.Graph.Bridgeness -- (bridgeness)
where
import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..))
import Data.List (concat, sortOn)
import Data.Map (Map, fromListWith, lookup, toList, mapWithKey, elems)
import Data.Maybe (catMaybes)
import Data.Ord (Down(..))
import Gargantext.Prelude
import Data.Map (Map, fromListWith, lookup, toList, mapWithKey, elems)
import qualified Data.Map as DM
import Data.Maybe (catMaybes)
import Data.List (concat, sortOn)
import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..))
import Gargantext.Core.Methods.Graph.Louvain (LouvainNodeId, CommunityId, nodeId2comId)
import Gargantext.Core.Viz.Graph.Tools.IGraph (ClusterNode(..))
----------------------------------------------------------------------
type Partitions a = Map (Int, Int) Double -> IO [a]
----------------------------------------------------------------------
class ToComId a where
nodeId2comId :: a -> (NodeId,CommunityId)
type NodeId = Int
type CommunityId = Int
----------------------------------------------------------------------
instance ToComId LouvainNode where
nodeId2comId (LouvainNode i1 i2) = (i1, i2)
instance ToComId ClusterNode where
nodeId2comId (ClusterNode i1 i2) = (i1, i2)
----------------------------------------------------------------------
----------------------------------------------------------------------
type Bridgeness = Double
bridgeness :: Bridgeness
-> [LouvainNode]
-> Map (LouvainNodeId, LouvainNodeId) Double
-> Map (LouvainNodeId, LouvainNodeId) Double
bridgeness b ns = DM.fromList
. concat
. DM.elems
. filterComs b
. groupEdges (nodeId2comId ns)
groupEdges :: Map LouvainNodeId CommunityId
-> Map (LouvainNodeId, LouvainNodeId) Double
-> Map (CommunityId, CommunityId) [((LouvainNodeId, LouvainNodeId), Double)]
bridgeness :: ToComId a => Bridgeness
-> [a]
-> Map (NodeId, NodeId) Double
-> Map (NodeId, NodeId) Double
bridgeness = bridgeness' nodeId2comId
bridgeness' :: (a -> (Int, Int))
-> Bridgeness
-> [a]
-> Map (Int, Int) Double
-> Map (Int, Int) Double
bridgeness' f b ns = DM.fromList
. concat
. DM.elems
. filterComs b
. groupEdges (DM.fromList $ map f ns)
groupEdges :: (Ord a, Ord b1)
=> Map b1 a
-> Map (b1, b1) b2
-> Map (a, a) [((b1, b1), b2)]
groupEdges m = fromListWith (<>)
. catMaybes
. map (\((n1,n2), d)
......@@ -55,9 +81,11 @@ groupEdges m = fromListWith (<>)
. toList
-- | TODO : sortOn Confluence
filterComs :: Bridgeness
-> Map (CommunityId, CommunityId) [((LouvainNodeId, LouvainNodeId), Double)]
-> Map (CommunityId, CommunityId) [((LouvainNodeId, LouvainNodeId), Double)]
filterComs :: (Ord n1, Eq n2)
=> p
-> Map (n2, n2) [(a3, n1)]
-> Map (n2, n2) [(a3, n1)]
filterComs _b m = DM.filter (\n -> length n > 0) $ mapWithKey filter' m
where
filter' (c1,c2) a
......
......@@ -80,7 +80,8 @@ fromIndex :: Ord t => Map Index t -> Map (Index, Index) a -> Map (t,t) a
fromIndex ni ns = indexConversion ni ns
indexConversion :: (Ord b, Ord k) => Map k b -> Map (k,k) a -> Map (b, b) a
indexConversion index ms = M.fromList $ map (\((k1,k2),c) -> ( ((M.!) index k1, (M.!) index k2), c)) (M.toList ms)
indexConversion index ms = M.fromList
$ map (\((k1,k2),c) -> ( ((M.!) index k1, (M.!) index k2), c)) (M.toList ms)
---------------------------------------------------------------------------------
-------------------------------------------------------------------------------
......
......@@ -9,13 +9,11 @@ Portability : POSIX
-}
module Gargantext.Core.Viz.Graph.Tools
where
-- import Data.Graph.Clustering.Louvain (hLouvain, {-iLouvainMap-})
import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..))
import Data.Map (Map)
import Data.HashMap.Strict (HashMap)
import Data.Text (Text)
......@@ -26,8 +24,8 @@ import Gargantext.Core.Methods.Distances (Distance(..), measure)
import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
import Gargantext.Core.Statistics
import Gargantext.Core.Viz.Graph
import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness)
import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges)
import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index)
import Gargantext.Prelude
import IGraph.Random -- (Gen(..))
......@@ -54,12 +52,24 @@ cooc2graph' distance threshold myCooc = distanceMap
distanceMat = measure distance matCooc
distanceMap = Map.filter (> threshold) $ mat2map distanceMat
data PartitionMethod = Louvain | Spinglass
cooc2graphWith :: PartitionMethod
-> Distance
-> Threshold
-> HashMap (NgramsTerm, NgramsTerm) Int
-> IO Graph
cooc2graphWith Louvain = cooc2graphWith' (cLouvain "1")
cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
cooc2graph :: Distance
-> Threshold
-> HashMap (NgramsTerm, NgramsTerm) Int
-> IO Graph
cooc2graph distance threshold myCooc = do
cooc2graphWith' :: ToComId a
=> Partitions a
-> Distance
-> Threshold
-> HashMap (NgramsTerm, NgramsTerm) Int
-> IO Graph
cooc2graphWith' doPartitions distance threshold myCooc = do
printDebug "cooc2graph" distance
let
-- TODO remove below
......@@ -79,12 +89,13 @@ cooc2graph distance threshold myCooc = do
n' = Set.size $ Set.fromList $ as <> bs
ClustersParams rivers _level = clustersParams nodesApprox
printDebug "Start" ("partitions" :: Text)
partitions <- if (Map.size distanceMap > 0)
-- then iLouvainMap 100 10 distanceMap
-- then hLouvain distanceMap
then cLouvain "1" distanceMap
then doPartitions distanceMap
else panic "Text.Flow: DistanceMap is empty"
printDebug "End" ("partitions" :: Text)
let
-- bridgeness' = distanceMap
......@@ -92,10 +103,11 @@ cooc2graph distance threshold myCooc = do
$ bridgeness rivers partitions distanceMap
confluence' = confluence (Map.keys bridgeness') 3 True False
pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti) myCooc' bridgeness' confluence' partitions
pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti)
myCooc' bridgeness' confluence' partitions
------------------------------------------------------------------------
------------------------------------------------------------------------
data ClustersParams = ClustersParams { bridgness :: Double
, louvain :: Text
} deriving (Show)
......@@ -112,16 +124,17 @@ clustersParams x = ClustersParams (fromIntegral x) "0.00000001" -- y
----------------------------------------------------------
-- | From data to Graph
data2graph :: [(Text, Int)]
data2graph :: ToComId a
=> [(Text, Int)]
-> Map (Int, Int) Int
-> Map (Int, Int) Double
-> Map (Int, Int) Double
-> [LouvainNode]
-> [a]
-> Graph
data2graph labels coocs bridge conf partitions = Graph nodes edges Nothing
where
community_id_by_node_id = Map.fromList [ (n, c) | LouvainNode n c <- partitions ]
community_id_by_node_id = Map.fromList $ map nodeId2comId partitions
nodes = map (setCoord ForceAtlas labels bridge)
[ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) coocs)
......@@ -146,8 +159,11 @@ data2graph labels coocs bridge conf partitions = Graph nodes edges Nothing
, edge_weight = d
, edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
-- , edge_confluence = maybe (panic "E: data2graph edges") identity $ Map.lookup (s,t) conf
, edge_id = cs (show i) }
| (i, ((s,t), d)) <- zip ([0..]::[Integer]) (Map.toList bridge), s /= t, d > 0
, edge_id = cs (show i)
}
| (i, ((s,t), d)) <- zip ([0..]::[Integer] )
(Map.toList bridge)
, s /= t, d > 0
]
......
......@@ -19,11 +19,13 @@ import Data.Serialize
import Data.Singletons (SingI)
import IGraph hiding (mkGraph, neighbors, edges, nodes, Node, Graph)
import Protolude
import Gargantext.Core.Viz.Graph.Index
import qualified Data.List as List
import qualified IGraph as IG
import qualified IGraph.Algorithms.Clique as IG
import qualified IGraph.Algorithms.Community as IG
import qualified IGraph.Random as IG
import qualified Data.Map as Map
------------------------------------------------------------------
-- | Main Types
......@@ -55,21 +57,39 @@ maximalCliques g = IG.maximalCliques g (min',max')
------------------------------------------------------------------
type Seed = Int
spinglass :: Seed -> Map (Int, Int) Double -> IO [ClusterNode]
spinglass s g = toClusterNode
<$> map catMaybes
<$> map (map (\n -> Map.lookup n fromI))
<$> partitions_spinglass' s g''
where
g'' = mkGraphUfromEdges (Map.keys g')
(toI, fromI) = createIndices g
g' = toIndex toI g
-- | Tools to analyze graphs
partitions_spinglass :: (Serialize v, Serialize e)
partitions_spinglass' :: (Serialize v, Serialize e)
=> Seed -> IG.Graph 'U v e -> IO [[Int]]
partitions_spinglass s g = do
partitions_spinglass' s g = do
gen <- IG.withSeed s pure
pure $ IG.findCommunity g Nothing Nothing IG.spinglass gen
------------------------------------------------------------------
data ClusterNode = ClusterNode { cl_node_id :: Int
, cl_community_id :: Int
}
toClusterNode :: [[Int]] -> [ClusterNode]
toClusterNode ns = List.concat
$ map (\(cId, ns') -> map (\n -> ClusterNode n cId) ns')
$ List.zip [1..] ns
------------------------------------------------------------------
mkGraph :: (SingI d, Ord v,
Serialize v, Serialize e) =>
[v] -> [LEdge e] -> IG.Graph d v e
mkGraph = IG.mkGraph
------------------------------------------------------------------
mkGraphUfromEdges :: [(Int, Int)] -> Graph_Undirected
mkGraphUfromEdges es = mkGraph (List.replicate n ()) $ zip es $ repeat ()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment