Merge branch 'glocqueville/update-igraph' into 'dev'

Update IGraph See merge request !411

Merge branch 'glocqueville/update-igraph' into 'dev'
Update IGraph See merge request !411
f5f0ae1b · Alfredo Di Napoli · 45371e41 · 93d91e0c · f5f0ae1b · f5f0ae1b
Commit f5f0ae1b authored May 26, 2025 by Alfredo Di Napoli
14 changed files
--- a/bin/update-project-dependencies
+++ b/bin/update-project-dependencies
@@ -16,8 +16,8 @@ fi
 # with the `sha256sum` result calculated on the `cabal.project` and
 # `cabal.project.freeze`. This ensures the files stay deterministic so that CI
 # cache can kick in.
-expected_cabal_project_hash="963418e37a17d4bb67d4b885613144b36d290f612eea80355e82abc7e76b450c"
-expected_cabal_project_freeze_hash="cd52143d3a9d285360b59c6371d3e258552c1bc115bd612024db3de1f7593ff7" 
+expected_cabal_project_hash="7d021a8e3d0b68421e26bdfe4e1da82f6ea26b6c420fc984b3c30c14bc5fea98"
+expected_cabal_project_freeze_hash="553b98aadb35506a305bd740cdd71f5fadc1e6d55d10f91cf39daa6735a63d78"


 cabal --store-dir=$STORE_DIR v2-build --dry-run

--- a/cabal.project
+++ b/cabal.project
@@ -87,7 +87,7 @@ source-repository-package
 source-repository-package
    type: git
    location: https://gitlab.iscpif.fr/gargantext/gargantext-graph.git
-    tag: a08ceed71b297a811f90cb86c3c61dc0b153036b
+    tag: 316d48b6a89593faaf1f2102e9714cea7e416e56
    subdir: gargantext-graph-core

 -- Support for GHC 9.6.x
@@ -99,7 +99,7 @@ source-repository-package
 source-repository-package
    type: git
    location: https://gitlab.iscpif.fr/gargantext/haskell-igraph.git
-    tag: 9f8a2f4a014539826a4eab3215cc70c0813f20cb
+    tag: 05e62da3aa466b7d0608d4918b030dc024119b32

 source-repository-package
    type: git

--- a/cabal.project.freeze
+++ b/cabal.project.freeze
@@ -25,8 +25,6 @@ constraints: any.Boolean ==0.2.4,
             any.StateVar ==1.2.2,
             any.accelerate ==1.3.0.0,
             accelerate +bounds-checks -debug -internal-checks -nofib -unsafe-checks,
-             any.accelerate-arithmetic ==1.0.0.1,
-             any.accelerate-utility ==1.0.0.1,
             any.adjunctions ==4.4.2,
             any.aeson ==2.1.2.1,
             aeson -cffi +ordered-keymap,
@@ -79,6 +77,8 @@ constraints: any.Boolean ==0.2.4,
             any.blaze-builder ==0.4.2.3,
             any.blaze-html ==0.9.2.0,
             any.blaze-markup ==0.8.3.0,
+             any.blaze-textual ==0.2.3.1,
+             blaze-textual -developer -integer-simple +native,
             any.boolexpr ==0.3,
             any.boring ==0.2.2,
             boring +tagged,
@@ -103,6 +103,7 @@ constraints: any.Boolean ==0.2.4,
             cassava-conduit +small_base,
             any.cborg ==0.2.10.0,
             cborg +optimize-gmp,
+             any.cborg-json ==0.2.6.0,
             any.cereal ==0.5.8.3,
             cereal -bytestring-builder,
             any.charset ==0.3.11,
@@ -161,6 +162,8 @@ constraints: any.Boolean ==0.2.4,
             any.dense-linear-algebra ==0.1.0.0,
             any.digest ==0.0.2.1,
             digest -have_arm64_crc32c -have_builtin_prefetch -have_mm_prefetch -have_sse42 -have_strong_getauxval -have_weak_getauxval +pkg-config,
+             any.direct-sqlite ==2.3.29,
+             direct-sqlite +dbstat +fulltextsearch +haveusleep +json1 -mathfunctions -systemlib +urifilenames,
             any.directory ==1.3.8.5,
             any.discrimination ==0.5,
             any.distributive ==0.6.2.1,
@@ -170,7 +173,6 @@ constraints: any.Boolean ==0.2.4,
             any.double-conversion ==2.0.5.0,
             double-conversion -developer +embedded_double_conversion,
             any.easy-file ==0.2.5,
-             any.eigen ==3.3.7.0,
             any.either ==5.0.2,
             any.ekg-core ==0.1.1.8,
             any.ekg-json ==0.1.1.1,
@@ -223,7 +225,7 @@ constraints: any.Boolean ==0.2.4,
             any.haskell-bee ==0.1.0.0,
             any.haskell-bee-pgmq ==0.1.0.0,
             any.haskell-bee-tests ==0.1.0.0,
-             any.haskell-igraph ==0.10.4,
+             any.haskell-igraph ==0.10.4.1,
             any.haskell-lexer ==1.1.2,
             any.haskell-pgmq ==0.1.0.0,
             any.haskell-src-exts ==1.23.1,
@@ -300,7 +302,6 @@ constraints: any.Boolean ==0.2.4,
             any.linear ==1.23,
             linear -herbie +template-haskell,
             any.list-t ==1.0.5.7,
-             any.lockfree-queue ==0.2.4,
             any.logict ==0.8.2.0,
             any.loop ==0.3.0,
             any.lzma ==0.0.1.1,
@@ -460,6 +461,7 @@ constraints: any.Boolean ==0.2.4,
             any.servant-blaze ==0.9.1,
             any.servant-client ==0.20.2,
             any.servant-client-core ==0.20.2,
+             any.servant-conduit ==0.16.1,
             any.servant-ekg ==0.3.1,
             any.servant-openapi3 ==2.0.1.6,
             any.servant-routes ==0.1.0.0,
@@ -487,6 +489,7 @@ constraints: any.Boolean ==0.2.4,
             any.split ==0.2.5,
             any.splitmix ==0.1.1,
             splitmix -optimised-mixer,
+             any.sqlite-simple ==0.4.19.0,
             any.statistics ==0.16.3.0,
             statistics -benchpapi,
             any.stemmer ==0.5.2,
@@ -571,7 +574,6 @@ constraints: any.Boolean ==0.2.4,
             any.uri-encode ==1.5.0.7,
             uri-encode +network-uri -tools,
             any.utf8-string ==1.0.2,
-             any.utility-ht ==0.0.17.2,
             any.uuid ==1.3.16,
             any.uuid-types ==1.0.6,
             any.validity ==0.12.1.0,

--- a/gargantext.cabal
+++ b/gargantext.cabal
@@ -552,7 +552,7 @@ library
    , graphviz ^>= 2999.20.1.0
    , haskell-bee
    , haskell-bee-pgmq
-    , haskell-igraph ^>= 0.10.4
+    , haskell-igraph ^>= 0.10.4.1
    , haskell-pgmq >= 0.1.0.0 && < 0.2
    , haskell-throttle
    , hlcm ^>= 0.2.2
@@ -627,7 +627,6 @@ library
    , servant-websockets >= 2.0.0 && < 2.1
    , servant-xml-conduit ^>= 0.1.0.4
    , shelly >= 1.9 && < 2
-    , singletons ^>= 3.0.2
    , singletons-th >= 3.1 && < 3.3
    , smtp-mail >= 0.3.0.0
    , split >= 0.2.3.4

--- a/src/Gargantext/Core/Text/Learn.hs
+++ b/src/Gargantext/Core/Text/Learn.hs
@@ -31,7 +31,7 @@ import Data.Text (pack, unpack, toLower)
 import Data.Tuple.Extra (both)
 import GHC.Generics
 import Gargantext.Core (Lang(..), allLangs)
-import Gargantext.Core.Text.Metrics.Count (occurrencesWith)
+import Gargantext.Core.Text.Metrics.Count (countOccurrences)
 import Gargantext.Core.Text.Samples.DE qualified as DE
 import Gargantext.Core.Text.Samples.EN qualified as EN
 import Gargantext.Core.Text.Samples.ES qualified as ES
@@ -197,7 +197,7 @@ wordToBook ns n txt = EventBook ef en
  where
    chks = allChunks ns n txt
    en = DM.fromList $ map (\(n',ns') -> (n', length ns')) $ zip ns chks
-    ef = foldl' DM.union DM.empty $ map (occurrencesWith identity) chks
+    ef = foldl' DM.union DM.empty $ map countOccurrences chks

 op :: (Freq -> Freq -> Freq) -> EventBook -> EventBook -> EventBook
 op f (EventBook ef1 en1)

--- a/src/Gargantext/Core/Text/Metrics/Count.hs
+++ b/src/Gargantext/Core/Text/Metrics/Count.hs
@@ -133,14 +133,24 @@ coocOnSingleContext fun ts = xs
 occurrences :: [Terms] -> Map Grouped (Map Terms Int)
 occurrences = occurrencesOn _terms_stem

+-- | Constructs the occurence map corresponding to a given collection:
+-- the value at key `key` is the number of times `key` appears in the collection
+-- Note: Compared to `occurences`, this is the more elementary function, maybe
+-- it would make more sense to rename this one into `occurences` and the other
+-- into something more descriptive
+countOccurrences :: (Foldable f, Ord a)
+                 => f a       -- ^ The collection whose items will be counted
+                 -> Map a Int -- ^ A map whose keys are items of the input
+                              -- collection, and whose values are the number of
+                              -- times those items appear in the input collection
+countOccurrences collection =
+  foldl' (\occurenceMap item -> insertWith (+) item 1 occurenceMap)
+         empty
+         collection
+
 occurrencesOn :: (Ord a, Ord b) => (a -> b) -> [a] -> Map b (Map a Int)
 occurrencesOn f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a 1) m) empty

-occurrencesWith :: (Foldable list, Ord k, Num a, Show k, Show a, Show (list b)) => (b -> k) -> list b -> Map k a
-occurrencesWith f xs = trace (show (xs,m) :: Text)  m
-  where
-    m = foldl' (\x y -> insertWith (+) (f y) 1 x) empty xs
-
 -- TODO add groups and filter stops

 sumOcc :: Ord a => [Occ a] -> Occ a

--- a/src/Gargantext/Core/Viz/Chart.hs
+++ b/src/Gargantext/Core/Viz/Chart.hs
@@ -21,7 +21,7 @@ import Gargantext.API.Ngrams.NgramsTree ( toTree, NgramsTree )
 import Gargantext.API.Ngrams.Tools ( filterListWithRoot, getListNgrams, getRepo, mapTermListRoot )
 import Gargantext.API.Ngrams.Types ( NgramsTerm(NgramsTerm) )
 import Gargantext.Core.NodeStory.Types ( NodeStoryEnv )
-import Gargantext.Core.Text.Metrics.Count (occurrencesWith)
+import Gargantext.Core.Text.Metrics.Count (countOccurrences)
 import Gargantext.Core.Text.Ngrams (NgramsType)
 import Gargantext.Core.Types.Main ( ListType )
 import Gargantext.Database.Admin.Types.Node ( NodeType(NodeList), CorpusId, contextId2NodeId )
@@ -43,7 +43,7 @@ histoData cId = do
                $ V.fromList
                $ sortOn fst -- TODO Vector.sortOn
                $ toList
-                $ occurrencesWith identity dates
+                $ countOccurrences dates
  pure (Histo ls css)



--- a/src/Gargantext/Core/Viz/Graph/Tools.hs
+++ b/src/Gargantext/Core/Viz/Graph/Tools.hs
@@ -31,9 +31,9 @@ import Gargantext.Core.Statistics ( pcaReduceTo, Dimension(Dimension) )
 import Gargantext.Core.Text.Ngrams (NgramsType(..))
 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, nodeId2comId, setNodes2clusterNodes)
 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
-import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass')
+import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
 import Gargantext.Core.Viz.Graph.Types (Attributes(..), BridgenessMethod, Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..), LegendField(..))
-import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
+import Gargantext.Core.Viz.Graph.Utils (edgesFilter, filterNodesByCount)
 import Gargantext.Prelude
 import Graph.BAC.ProxemyOptim qualified as BAC
 import Graph.Types (ClusterNode(..))
@@ -42,11 +42,6 @@ import IGraph.Algorithms.Layout qualified as Layout
 import IGraph.Random ( Gen ) -- (Gen(..))


-------------------------------------------------------------
-defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
-- defaultClustering x = pure $ BAC.defaultClustering x
-defaultClustering x = spinglass 1 x
-
 -------------------------------------------------------------
 type Threshold = Double

@@ -82,7 +77,7 @@ cooc2graphWith bridgenessMethod multi similarity threshold strength myCooc = do
  distanceMap `seq` diag `seq` ti `seq` pure ()

  partitions <- if Map.size distanceMap > 0
-      then spinglass' 1 distanceMap
+      then spinglass 1 distanceMap
      else panic $ Text.unwords [ "I can not compute the graph you request"
                                , "because either the quantity of documents"
                                , "or the quantity of terms"
@@ -102,84 +97,53 @@ cooc2graphWith bridgenessMethod multi similarity threshold strength myCooc = do

  pure $ data2graph multi ti diag bridgeness' confluence' (setNodes2clusterNodes partitions)

-- | A converter from the partition type returned by `spinglass'`
-- to the partition type required by `bridgeness`
+-- | Given a list of sets, each identifying the nodes in a cluster, returns
+-- a list of 'ClusterNode' where each node has been uniquely labeled
+-- by its community ID. This allows flattening the input sets without conflicts
+-- on nodes with the same ID (as they would belong to a different community).
 partitionsToClusterNodes :: [Set Int] -> [ClusterNode]
-partitionsToClusterNodes setlist =
-  setlist                                                &
-  -- Convert sets to lists:
-  fmap toList                                            &
-  -- Assign an integer index to each cluster:
-  zip [1 ..]                                             &
-  -- Attach cluster IDs to individual nodes instead to whole clusters
-  fmap (\(id, clusterIds) -> zip (repeat id) clusterIds) &
-  -- Flatten list of clusters of nodes labeled by cluster indices
-  -- into a list of labeled nodes:
-  join                                                   &
-  -- Turn pairs into `ClusterNode`s
-  fmap (\(clusterId, nodeId) -> ClusterNode nodeId clusterId)
-
-type Reverse = Bool
+partitionsToClusterNodes setlist = setlist
+  & fmap toList -- Convert sets to lists
+  & zip [1 ..] -- Assign an integer index to each cluster
+  & fmap (\(id, clusterIds) -> zip (repeat id) clusterIds) -- Attach cluster IDs to individual nodes rather than whole clusters
+  & join -- Flatten list of clusters of nodes labeled by cluster indices into a list of labeled nodes
+  & fmap (\(clusterId, nodeId) -> ClusterNode nodeId clusterId) -- Turn pairs into `ClusterNode`s

 doSimilarityMap :: Similarity
-              -> Threshold
-              -> Strength
-              -> HashMap (NgramsTerm, NgramsTerm) Int
-              -> ( Map (Int,Int) Double
-                 , Map (Index, Index) Int
-                 , Map NgramsTerm Index
-                 )
-
-doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
-  where
-    myCooc' = Map.fromList $ HashMap.toList myCooc
-
-    (_diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
-                       $ Map.fromList
-                       $ HashMap.toList myCooc
-
-    (ti, _it) = createIndices theMatrix
-    tiSize  = Map.size ti
-
-    similarities = (\m -> m `seq` m)
-                 $ (\m -> m `seq` measure Conditional m)
-                 $ (\m -> m `seq` map2mat Square 0 tiSize m)
-                 $ theMatrix `seq` toIndex ti theMatrix
-
-    links = round (let n :: Double = fromIntegral (Map.size ti) in 10 * n * (log n)^(2::Int))
-    distanceMap = Map.fromList
-                $ List.take links
-                $ (if strength == Weak then List.reverse else identity)
-                $ List.sortOn snd
-                $ Map.toList
-                $ Map.filter (> threshold)
-                $ similarities `seq` mat2map similarities
-
-doSimilarityMap distriType threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
+                -> Threshold
+                -> Strength
+                -> HashMap (NgramsTerm, NgramsTerm) Int -- cooccurrence map
+                -> ( Map (Int,Int) Double   -- weight map
+                   , Map (Index, Index) Int -- cooccurrence map
+                   , Map NgramsTerm Index   -- ???
+                   )
+doSimilarityMap similarityType threshold strength coocMap =
+  (weightMap, toIndex ti coocMap', ti)
  where
-    -- TODO remove below
-    (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
+    coocMap' = case similarityType of
+      Conditional -> Map.fromList $ HashMap.toList coocMap
+      Distributional -> diag
+    (diag, theMatrix) = Map.partitionWithKey (\(x, y) _ -> x == y)
                      $ Map.fromList
-                      $ HashMap.toList myCooc
-
+                      $ HashMap.toList coocMap
    (ti, _it) = createIndices theMatrix
-    tiSize  = Map.size ti
-
-    similarities = (\m -> m `seq` m)
-                 $ (\m -> m `seq` measure distriType m)
+    similarities = (\m -> m `seq` measure similarityType m)
                 $ (\m -> m `seq` map2mat Square 0 tiSize m)
                 $ theMatrix `seq` toIndex ti theMatrix
-
-    links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
-
-    distanceMap = Map.fromList
-                $ List.take links
-                $ (if strength == Weak then List.reverse else identity)
-                $ List.sortOn snd
-                $ Map.toList
-                $ edgesFilter
-                $ (\m -> m `seq` Map.filter (> threshold) m)
-                $ similarities `seq` mat2map similarities
+    tiSize = Map.size ti
+    links = let n = fromIntegral $ Map.size ti :: Double
+                factor = if similarityType == Conditional then 10 else 1
+            in round $ factor * n * log n^(2::Int)
+    filterMap = case similarityType of
+      Conditional -> Map.filter (> threshold)
+      Distributional -> edgesFilter . (\m -> m `seq` Map.filter (> threshold) m)
+    weightMap = Map.fromList
+              $ List.take links
+              $ (if strength == Weak then List.reverse else identity)
+              $ List.sortOn snd
+              $ Map.toList
+              $ filterMap
+              $ similarities `seq` mat2map similarities

 ----------------------------------------------------------
 -- | From data to Graph
@@ -223,8 +187,9 @@ data2graph multi labels' occurences bridge conf partitions =
            | (label, n) <- labels
            , Set.member n toKeep
            ]
-
-    (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge
+    -- Remove vertices not connected to any other node, i.e. vertices that have
+    -- zero edge joining them to other vertices
+    (bridge', toKeep) = filterNodesByCount (> 0) bridge

    edges = [ Edge { edge_source = show s
                   , edge_hidden = Nothing
@@ -299,7 +264,7 @@ getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") ident

 -- | KamadaKawai Layout
 -- TODO TEST: check labels, nodeId and coordinates
-layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
+layout :: Map (Int, Int) Double -> Int -> Gen s -> (Double, Double)
 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
  where
    coord :: (Map Int (Double,Double))

--- a/src/Gargantext/Core/Viz/Graph/Tools/IGraph.hs
+++ b/src/Gargantext/Core/Viz/Graph/Tools/IGraph.hs
@@ -13,117 +13,72 @@ Reference:
 -}

 module Gargantext.Core.Viz.Graph.Tools.IGraph
-  where
+  ( spinglass
+  , mkGraphUfromEdges
+  ) where

-import Data.Serialize
-import Data.Singletons (SingI)
-import Gargantext.Core.Viz.Graph.Index
-import Graph.Types (ClusterNode(..))
+import Data.Serialize (Serialize)
 import IGraph hiding (mkGraph, neighbors, edges, nodes, Node, Graph)
 import Protolude
-import Gargantext.Prelude (saveAsFileDebug)
 import qualified Data.List                   as List
 import qualified Data.Map.Strict             as Map
 import qualified IGraph                      as IG
-import qualified IGraph.Algorithms.Clique    as IG
 import qualified IGraph.Algorithms.Community as IG
 import qualified IGraph.Algorithms.Structure as IG
 import qualified IGraph.Random               as IG
 import qualified Data.Set                    as Set

------------------------------------------------------------------
-- | Main Types
-type Graph_Undirected = IG.Graph 'U () ()
-type Graph_Directed   = IG.Graph 'D () ()
-
-type Node  = IG.Node
-type Graph = IG.Graph
-
------------------------------------------------------------------
-- | Main Graph management Functions
-neighbors :: IG.Graph d v e -> IG.Node -> [IG.Node]
-neighbors = IG.neighbors
-
-edges :: IG.Graph d v e -> [Edge]
-edges = IG.edges
-
-nodes :: IG.Graph d v e -> [IG.Node]
-nodes = IG.nodes
-
------------------------------------------------------------------
-- | Partitions
-maximalCliques :: IG.Graph d v e -> [[Int]]
-maximalCliques g = IG.maximalCliques g (min',max')
-  where
-    min' = 0
-    max' = 0
-
------------------------------------------------------------------
-type Seed = Int
-
-spinglass :: Seed -> Map (Int, Int) Double -> IO [ClusterNode]
-spinglass s g = toClusterNode
-             <$> map catMaybes
-             <$> map (map (\n -> Map.lookup n fromI))
-             <$> List.concat
-             <$> mapM (partitions_spinglass' s) g'
-  where
-    -- Not connected components of the graph make crash spinglass
-    g' = IG.decompose $ mkGraphUfromEdges
-                      $ Map.keys
-                      $ toIndex toI g
-
-    (toI, fromI) = createIndices g
-
-spinglass' :: Seed -> Map (Int, Int) Double -> IO [Set Int]
-spinglass' s g = map Set.fromList
-             <$> map catMaybes
-             <$> map (map (\n -> Map.lookup n fromI))
-             <$> List.concat
-             <$> mapM (partitions_spinglass' s) g'
-  where
-    -- Not connected components of the graph make crash spinglass
-    g' = IG.decompose $ mkGraphUfromEdges
-                      $ Map.keys
-                      $ toIndex toI g
-
-    (toI, fromI) = createIndices g
-
-
-
-

-- | Tools to analyze graphs
-partitions_spinglass' :: (Serialize v, Serialize e)
-                         => Seed -> IG.Graph 'U v e -> IO [[Int]]
-partitions_spinglass' s g = do
-  gen <- IG.withSeed s pure
-  res <- IG.findCommunity g Nothing Nothing IG.spinglass gen
-  -- res <- IG.findCommunity g Nothing Nothing IG.leiden gen
-  -- res <- IG.findCommunity g Nothing Nothing IG.infomap  gen
-  saveAsFileDebug "/tmp/res" res
-  pure res
-
-
-toClusterNode :: [[Int]] -> [ClusterNode]
-toClusterNode ns = List.concat
-                 $ map (\(cId, ns') -> map (\n -> ClusterNode n cId) ns')
-                 $ List.zip [1..] ns
-
------------------------------------------------------------------
-mkGraph :: (SingI d, Ord v,
-             Serialize v, Serialize e) =>
-     [v] -> [LEdge e] -> IG.Graph d v e
-mkGraph = IG.mkGraph
-
------------------------------------------------------------------
-mkGraphUfromEdges :: [(Int, Int)] -> Graph_Undirected
-mkGraphUfromEdges es = mkGraph (List.replicate n ()) $ zip es $ repeat ()
+-- | Cluster a graph using the Spinglass algorithm
+-- Warning: Currently, this does not take the weights into account, all vertices
+-- and edges are treated equally.
+-- TODO Take the weights into account
+spinglass :: Int                   -- ^ Random seed
+          -> Map (Int, Int) Double -- ^ Weight map of the graph
+          -> IO [Set Int]          -- ^ A list of clusters, in the form of sets of vertex IDs
+spinglass seed graph = graph
+   -- Non-connected graphs make Spinglass crash, so we
+   -- decompose the graph into connected components perform
+   -- the algorithm on each component, and then put the
+   -- clusterings together.
+   &  Map.keys                 -- get all edges in the form of pairs of vertex IDs
+   &  edgeList2UGraph          -- turn that into an IGraph graph
+   &  IG.decompose             -- split the graph into connected components
+   &  mapM (spinglassAux seed) -- perform Spinglass on each subgraph
+  <&> List.concat              -- put all clusterings together
+  <&> map Set.fromList         -- convert clusters from list to set
+
+
+-- | Helper function for `spinglass`. Same as `spinglass`, except the input and
+-- output are represented using different types
+spinglassAux :: (Serialize v, Serialize e, Show v)
+             => Int             -- ^ Random seed
+             -> IG.Graph 'U v e -- ^ Input graph
+             -> IO [[v]]        -- ^ List of clusters, in the form of lists of vertex labels
+spinglassAux seed graph = IG.withSeed seed $ \gen -> do -- initialize random generator
+  rawClusters <- IG.findCommunity graph Nothing Nothing IG.spinglass gen -- perform clustering
+  -- The clusters we get are composed of vertex IDs corresponding to the internal
+  -- representation of IGraph graphs, so we need to retrieve the vertex labels:
+  let clusterLabels = (fmap . fmap) (IG.nodeLab graph) rawClusters
+  pure clusterLabels -- return the result
+
+
+-- | Make an undirected IGraph graph from a list of edges between `Int`s.
+-- The output graph's vertices are labeled with the original `Int`s, and the
+-- edges are not labeled.
+edgeList2UGraph :: [(Int, Int)] -> IG.Graph 'U Int ()
+edgeList2UGraph edgeList =
+ -- We're not using `IG.mkGraph` because of the issue raised in the following ticket:
+ -- https://gitlab.iscpif.fr/gargantext/haskell-igraph/issues/4
+  IG.fromLabeledEdges $ fmap (\edge -> (edge, ())) $ edgeList
+
+
+-- | Make an "anonymous" (i.e. without labels) graph out of a list of edges.
+-- Warning: there is no guarantee, as far as I know, that the underlying
+-- representation of the nodes corresponds to the original `Int`.
+mkGraphUfromEdges :: [(Int, Int)] -> IG.Graph 'U () ()
+mkGraphUfromEdges es = IG.mkGraph (List.replicate n ()) $ fmap makeLEdge es
  where
-    (a,b) = List.unzip es
-    n = List.length (List.nub $ a <> b)
-
-{-
-mkGraphDfromEdges :: [(Int, Int)] -> Graph_Directed
-mkGraphDfromEdges = undefined
-}
+    makeLEdge e = (e, ())
+    n         = Set.size nodes
+    nodes     = Set.fromList $ map fst es
--- a/src/Gargantext/Core/Viz/Graph/Utils.hs
+++ b/src/Gargantext/Core/Viz/Graph/Utils.hs
@@ -23,7 +23,7 @@ import Data.Matrix hiding (identity)
 import Data.Set qualified as Set
 import Data.Vector (Vector)
 import Data.Vector qualified as Vector
-import Gargantext.Core.Text.Metrics.Count (occurrencesWith)
+import Gargantext.Core.Text.Metrics.Count (countOccurrences)
 import Gargantext.Prelude

 ------------------------------------------------------------------------
@@ -81,14 +81,23 @@ edgesFilter m = Map.fromList $ catMaybes results
    keys    = Set.toList $ Set.fromList (x <> y)
    (x,y)   = unzip $ Map.keys m

-nodesFilter :: (Show a, Show b, Ord a, Ord b, Num b) => (b -> Bool) -> Map (a,a) b -> (Map (a,a) b, Set a)
-nodesFilter f m = (m', toKeep)
+-- | Filter nodes depending on how many times they (co)occur.
+-- More specifically, for a given value `x :: a`, this sums all entries in the
+-- map that have `x` as a value, and then it filters by applying `f` to the sum.
+-- Warning: This counts the value at `(x, x)` twice.
+filterNodesByCount :: (Show node, Ord node)
+                   => (Int -> Bool) -- ^ Filtering function
+                   -> Map (node, node) b   -- ^ Input map
+                   -> (Map (node, node) b, Set node)
+                   -- ^ The map without the filtered out nodes, and the set of
+                   -- remaining nodes
+filterNodesByCount f m = (m', toKeep)
  where
    m' = Map.filterWithKey (\(a,b) _ -> Set.member a toKeep && Set.member b toKeep) m
    toKeep = Set.fromList
           $ Map.keys
           $ Map.filter f
-           $ occurrencesWith identity
+           $ countOccurrences
           $ tupleConcat
           $ List.unzip
           $ Map.keys m

--- a/stack.yaml
+++ b/stack.yaml
@@ -170,7 +170,7 @@
  git: "https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git"
  subdirs:
  - .
- commit: a08ceed71b297a811f90cb86c3c61dc0b153036b
+- commit: 316d48b6a89593faaf1f2102e9714cea7e416e56
  git: "https://gitlab.iscpif.fr/gargantext/gargantext-graph.git"
  subdirs:
  - "gargantext-graph-core"
@@ -190,7 +190,7 @@
  git: "https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude"
  subdirs:
  - .
- commit: 9f8a2f4a014539826a4eab3215cc70c0813f20cb
+- commit: 05e62da3aa466b7d0608d4918b030dc024119b32
  git: "https://gitlab.iscpif.fr/gargantext/haskell-igraph.git"
  subdirs:
  - .

--- a/test/Test/Graph/Clustering.hs
+++ b/test/Test/Graph/Clustering.hs
--- a/test/Test/Graph/Distance.hs
+++ b/test/Test/Graph/Distance.hs
@@ -15,14 +15,61 @@ commentary with @some markup@.

 module Test.Graph.Distance where

-{-
-import Gargantext.Core.Methods.Matrix.Accelerate.Utils (cross', matrix)
+import Data.HashMap.Strict qualified as HM
+import Data.Map qualified as Map
+import Gargantext.API.Ngrams.Types (NgramsTerm(..))
+import Gargantext.Core.Methods.Similarities (Similarity(..))
+import Gargantext.Core.Viz.Graph.Tools (doSimilarityMap)
+import Gargantext.Core.Viz.Graph.Types (Strength(..))
 import Gargantext.Prelude
+import Test.Hspec (Spec, describe, it, shouldBe)

-test :: IO ()
-test = hspec $ do
-  describe "Cross" $ do
-    let result = cross' $ matrix 3 ([1,1..] :: [Double])
-    it "compare" $ do
-      shouldBe result (matrix 3 ([2,2..] :: [Double]))
-}
+-- | The terms are composed of 4 food items: 2 "bases" ("feuille" and "pate")
+-- and 2 "fillings" ("tomate" and "boeuf").
+-- The corpus is composed of 4 recipes (one context = one recipe),
+-- one for each possible pairing of a base and a filling.
+recipeCooc :: HM.HashMap (NgramsTerm, NgramsTerm) Int
+recipeCooc = HM.fromList
+  [ ((NgramsTerm "feuille", NgramsTerm "pate"   ), 0)
+  , ((NgramsTerm "boeuf"  , NgramsTerm "tomate" ), 0)
+  , ((NgramsTerm "boeuf"  , NgramsTerm "pate"   ), 1)
+  , ((NgramsTerm "tomate" , NgramsTerm "tomate" ), 2)
+  , ((NgramsTerm "pate"   , NgramsTerm "tomate" ), 1)
+  , ((NgramsTerm "boeuf"  , NgramsTerm "feuille"), 1)
+  , ((NgramsTerm "pate"   , NgramsTerm "pate"   ), 2)
+  , ((NgramsTerm "boeuf"  , NgramsTerm "boeuf"  ), 2)
+  , ((NgramsTerm "feuille", NgramsTerm "feuille"), 2)
+  , ((NgramsTerm "feuille", NgramsTerm "tomate" ), 1)
+  ]
+
+test :: Spec
+test = do
+  describe "Recipe O2 Weight Map" $ do
+    let (recipeWeights, _coocs, dictionary) =
+          doSimilarityMap Distributional 0 Strong recipeCooc
+    it "has the right edges" $ do
+      -- Distributional weight map of the `recipeCooc` virtual corpus
+      let weights = do -- Maybe monad
+            -- Lookup nodes
+            feuilleID <- Map.lookup (NgramsTerm "feuille") dictionary
+            pateID    <- Map.lookup (NgramsTerm "pate"   ) dictionary
+            boeufID   <- Map.lookup (NgramsTerm "boeuf"  ) dictionary
+            tomateID  <- Map.lookup (NgramsTerm "tomate" ) dictionary
+            -- Lookup edge weights, if any
+            let feuillePate   = Map.lookup (feuilleID, pateID) recipeWeights
+            let boeufTomate   = Map.lookup (boeufID, tomateID) recipeWeights
+            let feuilleTomate = Map.lookup (feuilleID, tomateID) recipeWeights
+            let feuilleBoeuf  = Map.lookup (feuilleID, boeufID) recipeWeights
+            let pateTomate    = Map.lookup (pateID, tomateID) recipeWeights
+            let pateBoeuf     = Map.lookup (pateID, boeufID) recipeWeights
+            return [ feuillePate, boeufTomate, feuilleTomate
+                   , feuilleBoeuf, pateTomate, pateBoeuf ]
+      -- We check that there are edges exactly where we expect there to be:
+      (fmap . fmap) positiveWeight weights `shouldBe`
+        Just [True, True, False, False, False, False]
+
+-- | Checks whether `Map.lookup` outputs something, and that that something is
+-- strictly positive, denoting the presence of an edge.
+positiveWeight :: (Num a, Ord a) => Maybe a -> Bool
+positiveWeight Nothing  = False
+positiveWeight (Just w) = w > 0
--- a/test/drivers/tasty/Main.hs
+++ b/test/drivers/tasty/Main.hs
@@ -20,7 +20,8 @@ import qualified Test.Core.Text.Corpus.Query     as CorpusQuery
 import qualified Test.Core.Text.Corpus.TSV       as TSVParser
 import qualified Test.Core.Utils                 as Utils
 import qualified Test.Core.Worker                as Worker
-import qualified Test.Graph.Clustering           as Graph
+import qualified Test.Graph.Clustering           as Clustering
+import qualified Test.Graph.Distance             as Distance
 import qualified Test.Ngrams.Lang.Occurrences    as Occurrences
 import qualified Test.Ngrams.NLP                 as NLP
 import qualified Test.Ngrams.Query               as NgramsQuery
@@ -48,7 +49,8 @@ protectStdoutBuffering action =
 main :: IO ()
 main = do
  utilSpec         <- testSpec "Utils" Utils.test
-  clusteringSpec   <- testSpec "Graph Clustering" Graph.test
+  clusteringSpec   <- testSpec "Graph Clustering" Clustering.test
+  distanceSpec   <- testSpec "Graph Distance" Distance.test
  dateSplitSpec    <- testSpec "Date split" PD.testDateSplit
  cryptoSpec       <- testSpec "Crypto" Crypto.test
  nlpSpec          <- testSpec "NLP" NLP.test
@@ -60,6 +62,7 @@ main = do
  protectStdoutBuffering $ defaultMain $ testGroup "Gargantext"
    [ utilSpec
    , clusteringSpec
+    , distanceSpec
    , dateSplitSpec
    , cryptoSpec
    , nlpSpec