Commit 03cac30c authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] Clustering with more granularity and structure

parent 3ccd26ab
...@@ -31,6 +31,7 @@ import Data.Tuple.Extra (swap) ...@@ -31,6 +31,7 @@ import Data.Tuple.Extra (swap)
import Debug.Trace (trace) import Debug.Trace (trace)
import Gargantext.Core.Methods.Similarities (Similarity(..)) import Gargantext.Core.Methods.Similarities (Similarity(..))
import Gargantext.Prelude import Gargantext.Prelude
import Prelude (pi)
import Graph.Types (ClusterNode(..)) import Graph.Types (ClusterNode(..))
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
...@@ -40,6 +41,7 @@ import qualified Data.IntMap as Dico ...@@ -40,6 +41,7 @@ import qualified Data.IntMap as Dico
---------------------------------------------------------------------- ----------------------------------------------------------------------
type Partitions = Map (Int, Int) Double -> IO [ClusterNode] type Partitions = Map (Int, Int) Double -> IO [ClusterNode]
type Partitions' = Map (Int, Int) Double -> IO [Set NodeId]
---------------------------------------------------------------------- ----------------------------------------------------------------------
nodeId2comId :: ClusterNode -> (NodeId, CommunityId) nodeId2comId :: ClusterNode -> (NodeId, CommunityId)
nodeId2comId (ClusterNode i1 i2) = (i1, i2) nodeId2comId (ClusterNode i1 i2) = (i1, i2)
...@@ -48,9 +50,24 @@ type NodeId = Int ...@@ -48,9 +50,24 @@ type NodeId = Int
type CommunityId = Int type CommunityId = Int
---------------------------------------------------------------------- ----------------------------------------------------------------------
---------------------------------------------------------------------- -- recursiveClustering : to get more granularity of a given clustering
-- recursiveClustering : get get more granularity of a given clustering
-- tested with spinglass clustering only (WIP) -- tested with spinglass clustering only (WIP)
recursiveClustering' :: Partitions' -> Map (Int, Int) Double -> IO [[Set NodeId]]
recursiveClustering' f mp = do
let
n :: Double
n = fromIntegral $ Set.size
$ Set.unions $ List.concat
$ map (\(k1,k2) -> map Set.singleton [k1, k2])
$ Map.keys mp
t :: Int
t = round $ 2 * n / sqrt n
ss <- f mp
mapM (\s -> if Set.size s > t then f (removeNodes s mp) else pure [s]) ss
----------------------------------------------------------------------
recursiveClustering :: Partitions -> Map (Int, Int) Double -> IO [ClusterNode] recursiveClustering :: Partitions -> Map (Int, Int) Double -> IO [ClusterNode]
recursiveClustering f mp = do recursiveClustering f mp = do
let let
...@@ -61,18 +78,23 @@ recursiveClustering f mp = do ...@@ -61,18 +78,23 @@ recursiveClustering f mp = do
$ Map.keys mp $ Map.keys mp
t :: Int t :: Int
t = round $ (n / 2) * (sqrt n) / 100 t = round $ 2 * n / sqrt n
(toSplit,others) <- List.span (\a -> Set.size a > t) <$> clusterNodes2sets <$> f mp (toSplit,others) <- List.span (\a -> Set.size a > t) <$> clusterNodes2sets <$> f mp
cls' <- mapM f $ map (\s -> removeNodes s mp) toSplit cls' <- mapM f $ map (\s -> removeNodes s mp) toSplit
pure $ setNodes2clusterNodes $ others <> (List.concat $ map clusterNodes2sets cls') pure $ setNodes2clusterNodes $ others <> (List.concat $ map clusterNodes2sets cls')
----------------------------------------------------------------------
setNodes2clusterNodes :: [Set NodeId] -> [ClusterNode] setNodes2clusterNodes :: [Set NodeId] -> [ClusterNode]
setNodes2clusterNodes ns = List.concat $ map (\(n,ns') -> toCluster n ns') $ zip [1..] ns setNodes2clusterNodes ns = List.concat $ map (\(n,ns') -> toCluster n ns') $ zip [1..] ns
where where
toCluster :: CommunityId -> Set NodeId -> [ClusterNode] toCluster :: CommunityId -> Set NodeId -> [ClusterNode]
toCluster cId setNodeId = map (\n -> ClusterNode n cId) (Set.toList setNodeId) toCluster cId setNodeId = map (\n -> ClusterNode n cId) (Set.toList setNodeId)
clusterNodes2map :: [ClusterNode] -> Map NodeId Int
clusterNodes2map = Map.fromList . map (\(ClusterNode nId cId) -> (nId, cId))
removeNodes :: Set NodeId removeNodes :: Set NodeId
-> Map (NodeId, NodeId) Double -> Map (NodeId, NodeId) Double
-> Map (NodeId, NodeId) Double -> Map (NodeId, NodeId) Double
...@@ -84,7 +106,6 @@ clusterNodes2sets = Dico.elems ...@@ -84,7 +106,6 @@ clusterNodes2sets = Dico.elems
. Dico.fromListWith (<>) . Dico.fromListWith (<>)
. (map ((Tuple.second Set.singleton) . swap . nodeId2comId)) . (map ((Tuple.second Set.singleton) . swap . nodeId2comId))
----------------------------------------------------------------------
---------------------------------------------------------------------- ----------------------------------------------------------------------
data Bridgeness = Bridgeness_Basic { bridgeness_partitions :: [ClusterNode] data Bridgeness = Bridgeness_Basic { bridgeness_partitions :: [ClusterNode]
, bridgeness_filter :: Double , bridgeness_filter :: Double
...@@ -92,48 +113,51 @@ data Bridgeness = Bridgeness_Basic { bridgeness_partitions :: [ClusterNode] ...@@ -92,48 +113,51 @@ data Bridgeness = Bridgeness_Basic { bridgeness_partitions :: [ClusterNode]
| Bridgeness_Advanced { bridgeness_similarity :: Similarity | Bridgeness_Advanced { bridgeness_similarity :: Similarity
, bridgness_confluence :: Confluence , bridgness_confluence :: Confluence
} }
| Bridgeness_Recursive { br_partitions :: [[Set NodeId]]
, br_filter :: Double
}
type Confluence = Map (NodeId, NodeId) Double
type Confluence = Map (NodeId, NodeId) Double
-- Filter Links between the Clusters
-- Links: Map (NodeId, NodeId) Double
-- List of Clusters: [Set NodeId]
bridgeness :: Bridgeness bridgeness :: Bridgeness
-> Map (NodeId, NodeId) Double -> Map (NodeId, NodeId) Double
-> Map (NodeId, NodeId) Double -> Map (NodeId, NodeId) Double
bridgeness (Bridgeness_Recursive sn f) m =
Map.unions $ [linksBetween] <> map (\s -> bridgeness (Bridgeness_Basic (setNodes2clusterNodes s) f) m') sn
where
(linksBetween, m') = Map.partitionWithKey (\(n1,n2) _v -> Map.lookup n1 mapNodeIdClusterId
/= Map.lookup n2 mapNodeIdClusterId
) $ bridgeness (Bridgeness_Basic clusters (pi*f)) m
clusters = setNodes2clusterNodes (map Set.unions sn)
mapNodeIdClusterId = clusterNodes2map clusters
bridgeness (Bridgeness_Advanced sim c) m = Map.fromList bridgeness (Bridgeness_Advanced sim c) m = Map.fromList
$ List.filter (\x -> if sim == Conditional then snd x > 0.2 else snd x > 0.02) $ List.filter (\x -> if sim == Conditional then snd x > 0.2 else snd x > 0.02)
$ map (\(ks, (v1,_v2)) -> (ks,v1)) $ map (\(ks, (v1,_v2)) -> (ks,v1))
-- $ List.take (if sim == Conditional then 2*n else 3*n) -- $ List.take (if sim == Conditional then 2*n else 3*n)
-- $ List.sortOn (Down . (snd . snd)) -- $ List.sortOn (Down . (snd . snd))
$ Map.toList $ Map.toList
$ trace ("bridgeness3 m c" <> show (m,c)) -- $ trace ("bridgeness3 m c" <> show (m,c))
$ Map.intersectionWithKey $ Map.intersectionWithKey
(\k v1 v2 -> trace ("intersectionWithKey " <> (show (k, v1, v2))) (v1, v2)) m c (\k v1 v2 -> trace ("intersectionWithKey " <> (show (k, v1, v2))) (v1, v2)) m c
{-
where
!m' = Map.toList m
n :: Int
!n = trace ("bridgeness m size: " <> (show $ List.length m'))
$ round
$ (fromIntegral $ List.length m') / (log $ fromIntegral nodesNumber :: Double)
nodesNumber :: Int
nodesNumber = Set.size $ Set.fromList $ as <> bs
where
(as, bs) = List.unzip $ Map.keys m
-}
bridgeness (Bridgeness_Basic ns b) m = Map.fromList bridgeness (Bridgeness_Basic ns b) m = Map.fromList
$ List.concat $ List.concat
$ Map.elems $ Map.elems
$ filterComs b $ filterComs (round b)
$ groupEdges (Map.fromList $ map nodeId2comId ns) m $ groupEdges (Map.fromList $ map nodeId2comId ns) m
groupEdges :: (Ord a, Ord b1)
=> Map b1 a groupEdges :: (Ord comId, Ord nodeId)
-> Map (b1, b1) b2 => Map nodeId comId
-> Map (a, a) [((b1, b1), b2)] -> Map (nodeId, nodeId) value
-> Map (comId, comId) [((nodeId, nodeId), value)]
groupEdges m = fromListWith (<>) groupEdges m = fromListWith (<>)
. catMaybes . catMaybes
. map (\((n1,n2), d) . map (\((n1,n2), d)
...@@ -144,17 +168,16 @@ groupEdges m = fromListWith (<>) ...@@ -144,17 +168,16 @@ groupEdges m = fromListWith (<>)
) )
. toList . toList
-- | TODO : sortOn Confluence
filterComs :: (Ord n1, Eq n2) filterComs :: (Ord n1, Eq n2)
=> p => Int
-> Map (n2, n2) [(a3, n1)] -> Map (n2, n2) [(a3, n1)]
-> Map (n2, n2) [(a3, n1)] -> Map (n2, n2) [(a3, n1)]
filterComs _b m = Map.filter (\n -> length n > 0) $ mapWithKey filter' m filterComs b m = Map.filter (\n -> length n > 0) $ mapWithKey filter' m
where where
filter' (c1,c2) a filter' (c1,c2) a
| c1 == c2 = a | c1 == c2 = a
-- TODO use n here -- TODO use n here
| otherwise = take (2*n) $ List.sortOn (Down . snd) a | otherwise = take (b * 2*n) $ List.sortOn (Down . snd) a
where where
n :: Int n :: Int
n = round $ 100 * a' / t n = round $ 100 * a' / t
......
...@@ -25,9 +25,9 @@ import Gargantext.API.Ngrams.Types (NgramsTerm(..)) ...@@ -25,9 +25,9 @@ import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Methods.Similarities (Similarity(..), measure) import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
-- import Gargantext.Core.Methods.Similarities.Conditional (conditional) -- import Gargantext.Core.Methods.Similarities.Conditional (conditional)
import Gargantext.Core.Statistics import Gargantext.Core.Statistics
import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId, recursiveClustering) import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId, recursiveClustering, recursiveClustering', setNodes2clusterNodes)
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..)) import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass) import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass')
import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap) import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
import Gargantext.Core.Viz.Graph.Types (Attributes(..), Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..)) import Gargantext.Core.Viz.Graph.Types (Attributes(..), Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..))
import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Database.Schema.Ngrams (NgramsType(..))
...@@ -109,7 +109,6 @@ cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2") ...@@ -109,7 +109,6 @@ cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
--cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2") --cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
-- TODO: change these options, or make them configurable in UI? -- TODO: change these options, or make them configurable in UI?
cooc2graphWith' :: Partitions cooc2graphWith' :: Partitions
-> BridgenessMethod -> BridgenessMethod
-> MultiPartite -> MultiPartite
...@@ -118,29 +117,13 @@ cooc2graphWith' :: Partitions ...@@ -118,29 +117,13 @@ cooc2graphWith' :: Partitions
-> Strength -> Strength
-> HashMap (NgramsTerm, NgramsTerm) Int -> HashMap (NgramsTerm, NgramsTerm) Int
-> IO Graph -> IO Graph
cooc2graphWith' doPartitions bridgenessMethod multi similarity threshold strength myCooc = do cooc2graphWith' _doPartitions _bridgenessMethod multi similarity@Conditional threshold strength myCooc = do
let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
distanceMap `seq` diag `seq` ti `seq` return () distanceMap `seq` diag `seq` ti `seq` return ()
--{- -- Debug
-- To Work with Igraph
saveAsFileDebug "/tmp/distanceMap" ( List.intercalate ";"
$ Set.toList
$ Set.fromList
$ map (\(k1,k2) -> if k1 < k2
then show (k1+1) <> " " <> show (k2+1)
else show (k2+1) <> " " <> show (k1+1)
)
$ Map.keys
$ Map.filter (>0.005) distanceMap
)
saveAsFileDebug "/tmp/distanceMap.data" distanceMap
saveAsFileDebug "/tmp/distanceMap.cooc" myCooc
-- printDebug "similarities" similarities
--}
partitions <- if (Map.size distanceMap > 0) partitions <- if (Map.size distanceMap > 0)
then recursiveClustering doPartitions distanceMap -- then recursiveClustering doPartitions distanceMap
then recursiveClustering' (spinglass' 1) distanceMap
else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty" else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
, "Maybe you should add more Map Terms in your list" , "Maybe you should add more Map Terms in your list"
, "Tutorial: TODO" , "Tutorial: TODO"
...@@ -149,12 +132,39 @@ cooc2graphWith' doPartitions bridgenessMethod multi similarity threshold strengt ...@@ -149,12 +132,39 @@ cooc2graphWith' doPartitions bridgenessMethod multi similarity threshold strengt
let let
!confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
!bridgeness' = bridgeness (Bridgeness_Recursive partitions 1.0) distanceMap
{-
!bridgeness' = if bridgenessMethod == BridgenessMethod_Basic !bridgeness' = if bridgenessMethod == BridgenessMethod_Basic
then bridgeness (Bridgeness_Basic partitions 1.0) distanceMap then bridgeness (Bridgeness_Basic partitions 1.0) distanceMap
else bridgeness (Bridgeness_Advanced similarity confluence') distanceMap else bridgeness (Bridgeness_Advanced similarity confluence') distanceMap
-}
pure $ data2graph multi ti diag bridgeness' confluence' (setNodes2clusterNodes $ List.concat partitions)
cooc2graphWith' doPartitions bridgenessMethod multi Distributional threshold strength myCooc = do
let (distanceMap, diag, ti) = doSimilarityMap Distributional threshold strength myCooc
distanceMap `seq` diag `seq` ti `seq` return ()
partitions <- if (Map.size distanceMap > 0)
then recursiveClustering doPartitions distanceMap
else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
, "Maybe you should add more Map Terms in your list"
, "Tutorial: TODO"
]
length partitions `seq` return ()
let
!confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
!bridgeness' = if bridgenessMethod == BridgenessMethod_Basic
then bridgeness (Bridgeness_Basic partitions 10.0) distanceMap
else bridgeness (Bridgeness_Advanced Distributional confluence') distanceMap
pure $ data2graph multi ti diag bridgeness' confluence' partitions pure $ data2graph multi ti diag bridgeness' confluence' partitions
type Reverse = Bool type Reverse = Bool
doSimilarityMap :: Similarity doSimilarityMap :: Similarity
......
...@@ -29,6 +29,7 @@ import qualified IGraph.Algorithms.Clique as IG ...@@ -29,6 +29,7 @@ import qualified IGraph.Algorithms.Clique as IG
import qualified IGraph.Algorithms.Community as IG import qualified IGraph.Algorithms.Community as IG
import qualified IGraph.Algorithms.Structure as IG import qualified IGraph.Algorithms.Structure as IG
import qualified IGraph.Random as IG import qualified IGraph.Random as IG
import qualified Data.Set as Set
------------------------------------------------------------------ ------------------------------------------------------------------
-- | Main Types -- | Main Types
...@@ -74,6 +75,23 @@ spinglass s g = toClusterNode ...@@ -74,6 +75,23 @@ spinglass s g = toClusterNode
(toI, fromI) = createIndices g (toI, fromI) = createIndices g
spinglass' :: Seed -> Map (Int, Int) Double -> IO [Set Int]
spinglass' s g = map Set.fromList
<$> map catMaybes
<$> map (map (\n -> Map.lookup n fromI))
<$> List.concat
<$> mapM (partitions_spinglass' s) g'
where
-- Not connected components of the graph make crash spinglass
g' = IG.decompose $ mkGraphUfromEdges
$ Map.keys
$ toIndex toI g
(toI, fromI) = createIndices g
-- | Tools to analyze graphs -- | Tools to analyze graphs
partitions_spinglass' :: (Serialize v, Serialize e) partitions_spinglass' :: (Serialize v, Serialize e)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment