Commit d7b909c3 authored by Grégoire Locqueville's avatar Grégoire Locqueville

Add normalization function for dictionaries of clusters in tests

This will allow for computing whether two lists of `ClusterNode`s are
"equivalent" in the sense that they correspond to the same partition
parent 5c82b490
Pipeline #7303 passed with stages
in 69 minutes and 46 seconds
......@@ -709,6 +709,7 @@ common testDependencies
, filepath ^>= 1.4.2.2
, fmt
, gargantext
, gargantext-graph
, gargantext-prelude
, generic-arbitrary >= 1.0.1 && < 2
, haskell-bee
......
......@@ -14,14 +14,16 @@ module Test.Graph.Clustering where
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.IntMap qualified as IM
import Data.List qualified as List
import Data.Set qualified as Set
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Methods.Similarities (Similarity(..))
-- import Gargantext.Core.Viz.Graph
import Gargantext.Core.Viz.Graph.Tools (doSimilarityMap)
import Gargantext.Core.Viz.Graph.Tools.IGraph (spinglass)
import Gargantext.Core.Viz.Graph.Types
import Gargantext.Prelude
import Graph.Types (ClusterNode(..))
import Test.Hspec
myCooc :: HashMap (NgramsTerm, NgramsTerm) Int
......@@ -39,3 +41,45 @@ test = do
let
result = List.length partitions > 1
shouldBe True result
-- | "Get rid" of the keys of a dictionary representing a partition.
-- If `partition1` and `partition2` are the same up to changing the dictionary
-- keys, then `normalizePartition partition1 == normalizePartition partition2`.
-- May return `Nothing` if the input is not a true partition, i.e. a node
-- belongs to two clusters at once. (This is not guaranteed, so don't use this to
-- test whether the partition is a true one; however, the function still correctly
-- normalizes all values that result in `Just`, even those that are not true
-- partitions)
-- This works by relabeling each cluster by its smallest element.
normalizePartition :: IM.IntMap (Set.Set Int) -- ^ A dictionary whose values are the sets of the partition, and whose keys we want to get rid of.
-> Maybe (IM.IntMap (Set.Set Int))
normalizePartition = foldM addCluster IM.empty
where
-- | Add a cluster to a dictionary of clusters labeled by their smallest elements.
addCluster :: IM.IntMap (Set.Set Int)
-> Set.Set Int
-> Maybe (IM.IntMap (Set.Set Int))
addCluster imap cluster = let clusterMin = minimum cluster in
if IM.member clusterMin imap
-- There's already a cluster whose label/smallest element is the same as the one we want to add!
then Nothing
-- All good, we can add the cluster:
else Just $ IM.insert clusterMin cluster imap
-- | Partition a set of nodes, each labeled with a cluster number, into a
-- dictionary whose keys are cluster IDs and whose values are sets of nodes
-- belonging to the corresponding cluster
splitClusterSet :: Set.Set ClusterNode -- ^ A set of node IDs and their associated cluster ID
-> IM.IntMap (Set.Set Int)
splitClusterSet = foldl' (flip insertClusterNode) IM.empty
-- | Given a collection of sets of nodes each labeled with a cluster number,
-- insert a new node in a given cluster
insertClusterNode :: ClusterNode -- ^ The nodeID to add, and the cluster ID it should be added to
-> IM.IntMap (Set.Set Int) -- ^ The collection of clusters to which one should add a new node
-> IM.IntMap (Set.Set Int)
insertClusterNode (ClusterNode nodeId clusterId) =
IM.alter addToCluster clusterId
where addToCluster :: Maybe (Set.Set Int) -> Maybe (Set.Set Int)
addToCluster Nothing = Just $ Set.singleton nodeId
addToCluster (Just s) = Just $ Set.insert nodeId s
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment