Commit 62f59938 authored by Grégoire Locqueville's avatar Grégoire Locqueville

Some comments, and clearer layout of some functions

parent 96566444
......@@ -133,9 +133,13 @@ coocOnSingleContext fun ts = xs
occurrences :: [Terms] -> Map Grouped (Map Terms Int)
occurrences = occurrencesOn _terms_stem
-- | Constructs the occurence map corresponding to a given collection
-- | Constructs the occurence map corresponding to a given collection:
-- the value at key `key` is the number of times `key` appears in the collection
-- Note: Compared to `occurences`, this is the more elementary function, maybe
-- it would make more sense to rename this one into `occurences` and the other
-- into something more descriptive
occurrencesSimple :: (Foldable f, Ord a, Num n)
=> f a -- ^ The collection whose items will be counted
=> f a -- ^ The collection whose items will be counted
-> Map a n -- ^ A map whose keys are items of the input
-- collection, and whose values are the number of
-- times those items appear in the input collection
......
......@@ -100,21 +100,12 @@ cooc2graphWith bridgenessMethod multi similarity threshold strength myCooc = do
-- | A converter from the partition type returned by `spinglass`
-- to the partition type required by `bridgeness`
partitionsToClusterNodes :: [Set Int] -> [ClusterNode]
partitionsToClusterNodes setlist =
setlist &
-- Convert sets to lists:
fmap toList &
-- Assign an integer index to each cluster:
zip [1 ..] &
-- Attach cluster IDs to individual nodes instead to whole clusters
fmap (\(id, clusterIds) -> zip (repeat id) clusterIds) &
-- Flatten list of clusters of nodes labeled by cluster indices
-- into a list of labeled nodes:
join &
-- Turn pairs into `ClusterNode`s
fmap (\(clusterId, nodeId) -> ClusterNode nodeId clusterId)
type Reverse = Bool
partitionsToClusterNodes setlist = setlist
& fmap toList -- Convert sets to lists
& zip [1 ..] -- Assign an integer index to each cluster
& fmap (\(id, clusterIds) -> zip (repeat id) clusterIds) -- Attach cluster IDs to individual nodes rather than whole clusters
& join -- Flatten list of clusters of nodes labeled by cluster indices into a list of labeled nodes
& fmap (\(clusterId, nodeId) -> ClusterNode nodeId clusterId) -- Turn pairs into `ClusterNode`s
doSimilarityMap :: Similarity
-> Threshold
......@@ -218,8 +209,9 @@ data2graph multi labels' occurences bridge conf partitions =
| (label, n) <- labels
, Set.member n toKeep
]
-- Filter out nodes not connected to any other node
(bridge', toKeep) = nodesFilter (\v -> v >= 1) bridge
-- Remove vertices not connected to any other node, i.e. vertices that have
-- zero edge joining them to other vertices
(bridge', toKeep) = nodesFilter (> 0) bridge
edges = [ Edge { edge_source = show s
, edge_hidden = Nothing
......
......@@ -29,30 +29,41 @@ import qualified IGraph.Algorithms.Structure as IG
import qualified IGraph.Random as IG
import qualified Data.Set as Set
------------------------------------------------------------------
-- | Partitions
spinglass :: Int -> Map (Int, Int) Double -> IO [Set Int]
spinglass seed graph = map Set.fromList
<$> List.concat
<$> mapM (spinglassAux seed) connectedComponents
where
-- Non-connected graphs make spinglass crash
connectedComponents = IG.decompose -- decompose graph into connected components
$ edgeList2UGraph -- convert into IGraph type
$ Map.keys graph -- retrieve edges in the form of `(Int, Int)`
-- | Cluster a graph using the Spinglass algorithm
-- Warning: Currently, this does not take the weights into account, all vertices
-- and edges are treated equally.
-- TODO Take the weights into account
spinglass :: Int -- ^ Random seed
-> Map (Int, Int) Double -- ^ Weight map of the graph
-> IO [Set Int] -- ^ A list of clusters, in the form of sets of vertex IDs
spinglass seed graph = graph
-- Non-connected graphs make Spinglass crash, so we
-- decompose the graph into connected components perform
-- the algorithm on each component, and then put the
-- clusterings together.
& Map.keys -- get all edges in the form of pairs of vertex IDs
& edgeList2UGraph -- turn that into an IGraph graph
& IG.decompose -- split the graph into connected components
& mapM (spinglassAux seed) -- perform Spinglass on each subgraph
<&> List.concat -- put all clusterings together
<&> map Set.fromList -- convert clusters from list to set
-- | Helper function for `spinglass`
-- | Helper function for `spinglass`. Same as `spinglass`, except the input and
-- output are represented using different types
spinglassAux :: (Serialize v, Serialize e, Show v)
=> Int -> IG.Graph 'U v e -> IO [[v]]
=> Int -- ^ Random seed
-> IG.Graph 'U v e -- ^ Input graph
-> IO [[v]] -- ^ List of clusters, in the form of lists of vertex labels
spinglassAux seed graph = do
gen <- IG.withSeed seed pure
rawClusters <- IG.findCommunity graph Nothing Nothing IG.spinglass gen
-- Retrieve node labels from internal node IDs:
gen <- IG.withSeed seed pure -- initialize random generator
rawClusters <- IG.findCommunity graph Nothing Nothing IG.spinglass gen -- perform clustering
-- The clusters we get are composed of vertex IDs corresponding to the internal
-- representation of IGraph graphs, so we need to retrieve the vertex labels:
let clusterLabels = (fmap . fmap) (IG.nodeLab graph) rawClusters
saveAsFileDebug "/tmp/res" clusterLabels
pure clusterLabels
saveAsFileDebug "/tmp/res" clusterLabels -- log the result
pure clusterLabels -- return the result
-- | Make an undirected IGraph graph from a list of edges between `Int`s.
......@@ -60,7 +71,7 @@ spinglassAux seed graph = do
-- edges are not labeled.
edgeList2UGraph :: [(Int, Int)] -> IG.Graph 'U Int ()
edgeList2UGraph edgeList =
-- We're not using `IG.mkGraph` because of the following ticket:
-- We're not using `IG.mkGraph` because of the issue raised in the following ticket:
-- https://gitlab.iscpif.fr/gargantext/haskell-igraph/issues/4
IG.fromLabeledEdges $ fmap (\edge -> (edge, ())) $ edgeList
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment