Commit 03cac30c authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] Clustering with more granularity and structure

parent 3ccd26ab
......@@ -31,6 +31,7 @@ import Data.Tuple.Extra (swap)
import Debug.Trace (trace)
import Gargantext.Core.Methods.Similarities (Similarity(..))
import Gargantext.Prelude
import Prelude (pi)
import Graph.Types (ClusterNode(..))
import qualified Data.List as List
import qualified Data.Map.Strict as Map
......@@ -40,6 +41,7 @@ import qualified Data.IntMap as Dico
----------------------------------------------------------------------
type Partitions = Map (Int, Int) Double -> IO [ClusterNode]
type Partitions' = Map (Int, Int) Double -> IO [Set NodeId]
----------------------------------------------------------------------
nodeId2comId :: ClusterNode -> (NodeId, CommunityId)
nodeId2comId (ClusterNode i1 i2) = (i1, i2)
......@@ -48,9 +50,24 @@ type NodeId = Int
type CommunityId = Int
----------------------------------------------------------------------
----------------------------------------------------------------------
-- recursiveClustering : get get more granularity of a given clustering
-- recursiveClustering : to get more granularity of a given clustering
-- tested with spinglass clustering only (WIP)
recursiveClustering' :: Partitions' -> Map (Int, Int) Double -> IO [[Set NodeId]]
recursiveClustering' f mp = do
let
n :: Double
n = fromIntegral $ Set.size
$ Set.unions $ List.concat
$ map (\(k1,k2) -> map Set.singleton [k1, k2])
$ Map.keys mp
t :: Int
t = round $ 2 * n / sqrt n
ss <- f mp
mapM (\s -> if Set.size s > t then f (removeNodes s mp) else pure [s]) ss
----------------------------------------------------------------------
recursiveClustering :: Partitions -> Map (Int, Int) Double -> IO [ClusterNode]
recursiveClustering f mp = do
let
......@@ -61,18 +78,23 @@ recursiveClustering f mp = do
$ Map.keys mp
t :: Int
t = round $ (n / 2) * (sqrt n) / 100
t = round $ 2 * n / sqrt n
(toSplit,others) <- List.span (\a -> Set.size a > t) <$> clusterNodes2sets <$> f mp
cls' <- mapM f $ map (\s -> removeNodes s mp) toSplit
pure $ setNodes2clusterNodes $ others <> (List.concat $ map clusterNodes2sets cls')
----------------------------------------------------------------------
setNodes2clusterNodes :: [Set NodeId] -> [ClusterNode]
setNodes2clusterNodes ns = List.concat $ map (\(n,ns') -> toCluster n ns') $ zip [1..] ns
where
toCluster :: CommunityId -> Set NodeId -> [ClusterNode]
toCluster cId setNodeId = map (\n -> ClusterNode n cId) (Set.toList setNodeId)
clusterNodes2map :: [ClusterNode] -> Map NodeId Int
clusterNodes2map = Map.fromList . map (\(ClusterNode nId cId) -> (nId, cId))
removeNodes :: Set NodeId
-> Map (NodeId, NodeId) Double
-> Map (NodeId, NodeId) Double
......@@ -84,7 +106,6 @@ clusterNodes2sets = Dico.elems
. Dico.fromListWith (<>)
. (map ((Tuple.second Set.singleton) . swap . nodeId2comId))
----------------------------------------------------------------------
----------------------------------------------------------------------
data Bridgeness = Bridgeness_Basic { bridgeness_partitions :: [ClusterNode]
, bridgeness_filter :: Double
......@@ -92,48 +113,51 @@ data Bridgeness = Bridgeness_Basic { bridgeness_partitions :: [ClusterNode]
| Bridgeness_Advanced { bridgeness_similarity :: Similarity
, bridgness_confluence :: Confluence
}
| Bridgeness_Recursive { br_partitions :: [[Set NodeId]]
, br_filter :: Double
}
type Confluence = Map (NodeId, NodeId) Double
type Confluence = Map (NodeId, NodeId) Double
-- Filter Links between the Clusters
-- Links: Map (NodeId, NodeId) Double
-- List of Clusters: [Set NodeId]
bridgeness :: Bridgeness
-> Map (NodeId, NodeId) Double
-> Map (NodeId, NodeId) Double
bridgeness (Bridgeness_Recursive sn f) m =
Map.unions $ [linksBetween] <> map (\s -> bridgeness (Bridgeness_Basic (setNodes2clusterNodes s) f) m') sn
where
(linksBetween, m') = Map.partitionWithKey (\(n1,n2) _v -> Map.lookup n1 mapNodeIdClusterId
/= Map.lookup n2 mapNodeIdClusterId
) $ bridgeness (Bridgeness_Basic clusters (pi*f)) m
clusters = setNodes2clusterNodes (map Set.unions sn)
mapNodeIdClusterId = clusterNodes2map clusters
bridgeness (Bridgeness_Advanced sim c) m = Map.fromList
$ List.filter (\x -> if sim == Conditional then snd x > 0.2 else snd x > 0.02)
$ map (\(ks, (v1,_v2)) -> (ks,v1))
-- $ List.take (if sim == Conditional then 2*n else 3*n)
-- $ List.sortOn (Down . (snd . snd))
$ Map.toList
$ trace ("bridgeness3 m c" <> show (m,c))
-- $ trace ("bridgeness3 m c" <> show (m,c))
$ Map.intersectionWithKey
(\k v1 v2 -> trace ("intersectionWithKey " <> (show (k, v1, v2))) (v1, v2)) m c
{-
where
!m' = Map.toList m
n :: Int
!n = trace ("bridgeness m size: " <> (show $ List.length m'))
$ round
$ (fromIntegral $ List.length m') / (log $ fromIntegral nodesNumber :: Double)
nodesNumber :: Int
nodesNumber = Set.size $ Set.fromList $ as <> bs
where
(as, bs) = List.unzip $ Map.keys m
-}
bridgeness (Bridgeness_Basic ns b) m = Map.fromList
$ List.concat
$ Map.elems
$ filterComs b
$ filterComs (round b)
$ groupEdges (Map.fromList $ map nodeId2comId ns) m
groupEdges :: (Ord a, Ord b1)
=> Map b1 a
-> Map (b1, b1) b2
-> Map (a, a) [((b1, b1), b2)]
groupEdges :: (Ord comId, Ord nodeId)
=> Map nodeId comId
-> Map (nodeId, nodeId) value
-> Map (comId, comId) [((nodeId, nodeId), value)]
groupEdges m = fromListWith (<>)
. catMaybes
. map (\((n1,n2), d)
......@@ -144,17 +168,16 @@ groupEdges m = fromListWith (<>)
)
. toList
-- | TODO : sortOn Confluence
filterComs :: (Ord n1, Eq n2)
=> p
=> Int
-> Map (n2, n2) [(a3, n1)]
-> Map (n2, n2) [(a3, n1)]
filterComs _b m = Map.filter (\n -> length n > 0) $ mapWithKey filter' m
filterComs b m = Map.filter (\n -> length n > 0) $ mapWithKey filter' m
where
filter' (c1,c2) a
| c1 == c2 = a
-- TODO use n here
| otherwise = take (2*n) $ List.sortOn (Down . snd) a
| otherwise = take (b * 2*n) $ List.sortOn (Down . snd) a
where
n :: Int
n = round $ 100 * a' / t
......
......@@ -25,9 +25,9 @@ import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
-- import Gargantext.Core.Methods.Similarities.Conditional (conditional)
import Gargantext.Core.Statistics
import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId, recursiveClustering)
import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId, recursiveClustering, recursiveClustering', setNodes2clusterNodes)
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass')
import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
import Gargantext.Core.Viz.Graph.Types (Attributes(..), Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..))
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
......@@ -109,7 +109,6 @@ cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
--cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
-- TODO: change these options, or make them configurable in UI?
cooc2graphWith' :: Partitions
-> BridgenessMethod
-> MultiPartite
......@@ -118,29 +117,13 @@ cooc2graphWith' :: Partitions
-> Strength
-> HashMap (NgramsTerm, NgramsTerm) Int
-> IO Graph
cooc2graphWith' doPartitions bridgenessMethod multi similarity threshold strength myCooc = do
cooc2graphWith' _doPartitions _bridgenessMethod multi similarity@Conditional threshold strength myCooc = do
let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
distanceMap `seq` diag `seq` ti `seq` return ()
--{- -- Debug
-- To Work with Igraph
saveAsFileDebug "/tmp/distanceMap" ( List.intercalate ";"
$ Set.toList
$ Set.fromList
$ map (\(k1,k2) -> if k1 < k2
then show (k1+1) <> " " <> show (k2+1)
else show (k2+1) <> " " <> show (k1+1)
)
$ Map.keys
$ Map.filter (>0.005) distanceMap
)
saveAsFileDebug "/tmp/distanceMap.data" distanceMap
saveAsFileDebug "/tmp/distanceMap.cooc" myCooc
-- printDebug "similarities" similarities
--}
partitions <- if (Map.size distanceMap > 0)
then recursiveClustering doPartitions distanceMap
-- then recursiveClustering doPartitions distanceMap
then recursiveClustering' (spinglass' 1) distanceMap
else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
, "Maybe you should add more Map Terms in your list"
, "Tutorial: TODO"
......@@ -149,12 +132,39 @@ cooc2graphWith' doPartitions bridgenessMethod multi similarity threshold strengt
let
!confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
!bridgeness' = bridgeness (Bridgeness_Recursive partitions 1.0) distanceMap
{-
!bridgeness' = if bridgenessMethod == BridgenessMethod_Basic
then bridgeness (Bridgeness_Basic partitions 1.0) distanceMap
else bridgeness (Bridgeness_Advanced similarity confluence') distanceMap
-}
pure $ data2graph multi ti diag bridgeness' confluence' (setNodes2clusterNodes $ List.concat partitions)
cooc2graphWith' doPartitions bridgenessMethod multi Distributional threshold strength myCooc = do
let (distanceMap, diag, ti) = doSimilarityMap Distributional threshold strength myCooc
distanceMap `seq` diag `seq` ti `seq` return ()
partitions <- if (Map.size distanceMap > 0)
then recursiveClustering doPartitions distanceMap
else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty"
, "Maybe you should add more Map Terms in your list"
, "Tutorial: TODO"
]
length partitions `seq` return ()
let
!confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
!bridgeness' = if bridgenessMethod == BridgenessMethod_Basic
then bridgeness (Bridgeness_Basic partitions 10.0) distanceMap
else bridgeness (Bridgeness_Advanced Distributional confluence') distanceMap
pure $ data2graph multi ti diag bridgeness' confluence' partitions
type Reverse = Bool
doSimilarityMap :: Similarity
......
......@@ -29,6 +29,7 @@ import qualified IGraph.Algorithms.Clique as IG
import qualified IGraph.Algorithms.Community as IG
import qualified IGraph.Algorithms.Structure as IG
import qualified IGraph.Random as IG
import qualified Data.Set as Set
------------------------------------------------------------------
-- | Main Types
......@@ -74,6 +75,23 @@ spinglass s g = toClusterNode
(toI, fromI) = createIndices g
spinglass' :: Seed -> Map (Int, Int) Double -> IO [Set Int]
spinglass' s g = map Set.fromList
<$> map catMaybes
<$> map (map (\n -> Map.lookup n fromI))
<$> List.concat
<$> mapM (partitions_spinglass' s) g'
where
-- Not connected components of the graph make crash spinglass
g' = IG.decompose $ mkGraphUfromEdges
$ Map.keys
$ toIndex toI g
(toI, fromI) = createIndices g
-- | Tools to analyze graphs
partitions_spinglass' :: (Serialize v, Serialize e)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment