Commit 62f59938 authored by Grégoire Locqueville's avatar Grégoire Locqueville

Some comments, and clearer layout of some functions

parent 96566444
...@@ -133,9 +133,13 @@ coocOnSingleContext fun ts = xs ...@@ -133,9 +133,13 @@ coocOnSingleContext fun ts = xs
occurrences :: [Terms] -> Map Grouped (Map Terms Int) occurrences :: [Terms] -> Map Grouped (Map Terms Int)
occurrences = occurrencesOn _terms_stem occurrences = occurrencesOn _terms_stem
-- | Constructs the occurence map corresponding to a given collection -- | Constructs the occurence map corresponding to a given collection:
-- the value at key `key` is the number of times `key` appears in the collection
-- Note: Compared to `occurences`, this is the more elementary function, maybe
-- it would make more sense to rename this one into `occurences` and the other
-- into something more descriptive
occurrencesSimple :: (Foldable f, Ord a, Num n) occurrencesSimple :: (Foldable f, Ord a, Num n)
=> f a -- ^ The collection whose items will be counted => f a -- ^ The collection whose items will be counted
-> Map a n -- ^ A map whose keys are items of the input -> Map a n -- ^ A map whose keys are items of the input
-- collection, and whose values are the number of -- collection, and whose values are the number of
-- times those items appear in the input collection -- times those items appear in the input collection
......
...@@ -100,21 +100,12 @@ cooc2graphWith bridgenessMethod multi similarity threshold strength myCooc = do ...@@ -100,21 +100,12 @@ cooc2graphWith bridgenessMethod multi similarity threshold strength myCooc = do
-- | A converter from the partition type returned by `spinglass` -- | A converter from the partition type returned by `spinglass`
-- to the partition type required by `bridgeness` -- to the partition type required by `bridgeness`
partitionsToClusterNodes :: [Set Int] -> [ClusterNode] partitionsToClusterNodes :: [Set Int] -> [ClusterNode]
partitionsToClusterNodes setlist = partitionsToClusterNodes setlist = setlist
setlist & & fmap toList -- Convert sets to lists
-- Convert sets to lists: & zip [1 ..] -- Assign an integer index to each cluster
fmap toList & & fmap (\(id, clusterIds) -> zip (repeat id) clusterIds) -- Attach cluster IDs to individual nodes rather than whole clusters
-- Assign an integer index to each cluster: & join -- Flatten list of clusters of nodes labeled by cluster indices into a list of labeled nodes
zip [1 ..] & & fmap (\(clusterId, nodeId) -> ClusterNode nodeId clusterId) -- Turn pairs into `ClusterNode`s
-- Attach cluster IDs to individual nodes instead to whole clusters
fmap (\(id, clusterIds) -> zip (repeat id) clusterIds) &
-- Flatten list of clusters of nodes labeled by cluster indices
-- into a list of labeled nodes:
join &
-- Turn pairs into `ClusterNode`s
fmap (\(clusterId, nodeId) -> ClusterNode nodeId clusterId)
type Reverse = Bool
doSimilarityMap :: Similarity doSimilarityMap :: Similarity
-> Threshold -> Threshold
...@@ -218,8 +209,9 @@ data2graph multi labels' occurences bridge conf partitions = ...@@ -218,8 +209,9 @@ data2graph multi labels' occurences bridge conf partitions =
| (label, n) <- labels | (label, n) <- labels
, Set.member n toKeep , Set.member n toKeep
] ]
-- Filter out nodes not connected to any other node -- Remove vertices not connected to any other node, i.e. vertices that have
(bridge', toKeep) = nodesFilter (\v -> v >= 1) bridge -- zero edge joining them to other vertices
(bridge', toKeep) = nodesFilter (> 0) bridge
edges = [ Edge { edge_source = show s edges = [ Edge { edge_source = show s
, edge_hidden = Nothing , edge_hidden = Nothing
......
...@@ -29,30 +29,41 @@ import qualified IGraph.Algorithms.Structure as IG ...@@ -29,30 +29,41 @@ import qualified IGraph.Algorithms.Structure as IG
import qualified IGraph.Random as IG import qualified IGraph.Random as IG
import qualified Data.Set as Set import qualified Data.Set as Set
------------------------------------------------------------------
-- | Partitions
spinglass :: Int -> Map (Int, Int) Double -> IO [Set Int] -- | Cluster a graph using the Spinglass algorithm
spinglass seed graph = map Set.fromList -- Warning: Currently, this does not take the weights into account, all vertices
<$> List.concat -- and edges are treated equally.
<$> mapM (spinglassAux seed) connectedComponents -- TODO Take the weights into account
where spinglass :: Int -- ^ Random seed
-- Non-connected graphs make spinglass crash -> Map (Int, Int) Double -- ^ Weight map of the graph
connectedComponents = IG.decompose -- decompose graph into connected components -> IO [Set Int] -- ^ A list of clusters, in the form of sets of vertex IDs
$ edgeList2UGraph -- convert into IGraph type spinglass seed graph = graph
$ Map.keys graph -- retrieve edges in the form of `(Int, Int)` -- Non-connected graphs make Spinglass crash, so we
-- decompose the graph into connected components perform
-- the algorithm on each component, and then put the
-- clusterings together.
& Map.keys -- get all edges in the form of pairs of vertex IDs
& edgeList2UGraph -- turn that into an IGraph graph
& IG.decompose -- split the graph into connected components
& mapM (spinglassAux seed) -- perform Spinglass on each subgraph
<&> List.concat -- put all clusterings together
<&> map Set.fromList -- convert clusters from list to set
-- | Helper function for `spinglass` -- | Helper function for `spinglass`. Same as `spinglass`, except the input and
-- output are represented using different types
spinglassAux :: (Serialize v, Serialize e, Show v) spinglassAux :: (Serialize v, Serialize e, Show v)
=> Int -> IG.Graph 'U v e -> IO [[v]] => Int -- ^ Random seed
-> IG.Graph 'U v e -- ^ Input graph
-> IO [[v]] -- ^ List of clusters, in the form of lists of vertex labels
spinglassAux seed graph = do spinglassAux seed graph = do
gen <- IG.withSeed seed pure gen <- IG.withSeed seed pure -- initialize random generator
rawClusters <- IG.findCommunity graph Nothing Nothing IG.spinglass gen rawClusters <- IG.findCommunity graph Nothing Nothing IG.spinglass gen -- perform clustering
-- Retrieve node labels from internal node IDs: -- The clusters we get are composed of vertex IDs corresponding to the internal
-- representation of IGraph graphs, so we need to retrieve the vertex labels:
let clusterLabels = (fmap . fmap) (IG.nodeLab graph) rawClusters let clusterLabels = (fmap . fmap) (IG.nodeLab graph) rawClusters
saveAsFileDebug "/tmp/res" clusterLabels saveAsFileDebug "/tmp/res" clusterLabels -- log the result
pure clusterLabels pure clusterLabels -- return the result
-- | Make an undirected IGraph graph from a list of edges between `Int`s. -- | Make an undirected IGraph graph from a list of edges between `Int`s.
...@@ -60,7 +71,7 @@ spinglassAux seed graph = do ...@@ -60,7 +71,7 @@ spinglassAux seed graph = do
-- edges are not labeled. -- edges are not labeled.
edgeList2UGraph :: [(Int, Int)] -> IG.Graph 'U Int () edgeList2UGraph :: [(Int, Int)] -> IG.Graph 'U Int ()
edgeList2UGraph edgeList = edgeList2UGraph edgeList =
-- We're not using `IG.mkGraph` because of the following ticket: -- We're not using `IG.mkGraph` because of the issue raised in the following ticket:
-- https://gitlab.iscpif.fr/gargantext/haskell-igraph/issues/4 -- https://gitlab.iscpif.fr/gargantext/haskell-igraph/issues/4
IG.fromLabeledEdges $ fmap (\edge -> (edge, ())) $ edgeList IG.fromLabeledEdges $ fmap (\edge -> (edge, ())) $ edgeList
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment