Introduce massiv for small tasks

This commit starts introducing `massiv` in the codebase, initially for simple functions like `termDivNan`. The main goal is to extend the linear algebra toolkit up to the point where we can implement `distributional` in terms of `massive` and measure its performance.

Introduce massiv for small tasks
This commit starts introducing `massiv` in the codebase, initially for simple functions like `termDivNan`. The main goal is to extend the linear algebra toolkit up to the point where we can implement `distributional` in terms of `massive` and measure its performance.
e124af3e · Alfredo Di Napoli · Alfredo Di Napoli · 4371781e · e124af3e · e124af3e
Commit e124af3e authored Jan 21, 2025 by Alfredo Di Napoli Committed by Alfredo Di Napoli Feb 27, 2025
4 changed files
--- a/gargantext.cabal
+++ b/gargantext.cabal
@@ -556,6 +556,7 @@ library
    , json-stream ^>= 0.4.2.4
    , lens >= 5.2.2 && < 5.3
    , lens-aeson < 1.3
+    , massiv < 1.1
    , matrix ^>= 0.3.6.1
    , mime-mail >= 0.5.1
    , monad-control ^>= 1.0.3.1
@@ -611,7 +612,7 @@ library
    , singletons ^>= 3.0.2
    , singletons-th >= 3.1 && < 3.2
    , smtp-mail >= 0.3.0.0
-    , split >= 0.2.0
+    , split >= 0.2.3.4
    , stemmer == 0.5.2
    , stm >= 2.5.1.0 && < 2.6
    , stm-containers >= 1.2.0.3 && < 1.3
@@ -696,7 +697,7 @@ executable gargantext
    , servant-routes < 0.2
    , servant-websockets >= 2.0.0 && < 2.1
    , shelly
-    , split ^>= 0.2.3.4
+    , split >= 0.2.3.4
    , text ^>= 2.0.2
    , toml-parser >= 2.0.1.0 && < 3
    , tree-diff
@@ -734,6 +735,8 @@ common testDependencies
    , http-client-tls == 0.3.6.1
    , http-types
    , lens >= 5.2.2 && < 5.3
+    , massiv < 1.1
+    , massiv-test < 1.2
    , monad-control >= 1.0.3 && < 1.1
    , mtl ^>= 2.2.2
    , network-uri
@@ -753,6 +756,7 @@ common testDependencies
    , shelly >= 1.9 && < 2
    , stm >= 2.5.1.0 && < 2.6
    , streaming-commons
+    , split
    , tasty-hunit
    , tasty-quickcheck
    , text ^>= 2.0.2

--- a/src/Gargantext/Core/LinearAlgebra.hs
+++ b/src/Gargantext/Core/LinearAlgebra.hs
@@ -18,6 +18,9 @@ module Gargantext.Core.LinearAlgebra (
  -- * Functions
  , createIndices
+  -- * Operations on matrixes
+  , termDivNan
  ) where
 import Data.Bimap (Bimap)
@@ -27,6 +30,8 @@ import Data.Map.Strict qualified as M
 import Data.Set qualified as S
 import Data.Set (Set)
 import Prelude
+import Data.Massiv.Array qualified as A
+import Data.Massiv.Array (D, Matrix)
 newtype Index = Index { _Index :: Int }
  deriving newtype (Eq, Show, Ord, Num, Enum)
@@ -39,3 +44,75 @@ createIndices = set2indices . map2set
    set2indices :: Ord t => Set t -> Bimap Index t
    set2indices s = foldr (uncurry Bimap.insert) Bimap.empty (zip [0..] $ S.toList s)
+-- | `distributional m` returns the distributional distance between terms each
+-- pair of terms as a matrix.  The argument m is the matrix $[n_{ij}]_{i,j}$
+-- where $n_{ij}$ is the coocccurrence between term $i$ and term $j$.
+--
+-- ## Basic example with Matrix of size 3: 
+--
+-- >>> theMatrixInt 3
+-- Matrix (Z :. 3 :. 3)
+--   [ 7, 4, 0,
+--     4, 5, 3,
+--     0, 3, 4]
+--
+-- >>> distributional $ theMatrixInt 3
+-- Matrix (Z :. 3 :. 3)
+--   [ 1.0, 0.0, 0.9843749999999999,
+--     0.0, 1.0,                0.0,
+--     1.0, 0.0,                1.0]
+--
+-- ## Basic example with Matrix of size 4: 
+--
+-- >>> theMatrixInt 4
+-- Matrix (Z :. 4 :. 4)
+--   [ 4, 1, 2, 1,
+--     1, 4, 0, 0,
+--     2, 0, 3, 3,
+--     1, 0, 3, 3]
+--
+-- >>> distributional $ theMatrixInt 4
+-- Matrix (Z :. 4 :. 4)
+--   [                  1.0,                   0.0, 0.5714285714285715, 0.8421052631578947,
+--                      0.0,                   1.0,                1.0,                1.0,
+--     8.333333333333333e-2,             4.6875e-2,                1.0,               0.25,
+--       0.3333333333333333, 5.7692307692307696e-2,                1.0,                1.0]
+--
+-- /IMPORTANT/: As this function computes the diagonal matrix in order to carry on the computation
+-- the input has to be a square matrix, or this function will fail at runtime.
+{-
+distributional :: A.Matrix r Int -> A.Matrix r Double
+distributional m' = interpret $ result
+ where
+    m = map A.fromIntegral $ use m'
+    n = dim m'
+    diag_m = diag m
+    d_1 = replicate (constant (Z :. n :. All)) diag_m
+    d_2 = replicate (constant (Z :. All :. n)) diag_m
+    mi    = (.*) (termDivNan m d_1) (termDivNan m d_2)
+    -- w = (.-) mi d_mi
+    -- The matrix permutations is taken care of below by directly replicating
+    -- the matrix mi, making the matrix w unneccessary and saving one step.
+    w_1 = replicate (constant (Z :. All :. n :. All)) mi
+    w_2 = replicate (constant (Z :. n :. All :. All)) mi
+    w' = zipWith min w_1 w_2
+    -- The matrix ii = [r_{i,j,k}]_{i,j,k} has r_(i,j,k) = 0 if k = i OR k = j 
+    -- and r_(i,j,k) = 1 otherwise (i.e. k /= i AND k /= j). 
+    ii = generate (constant (Z :. n :. n :. n)) 
+        (lift1 (\( i A.:. j A.:. k) -> cond ((&&) ((/=) k i) ((/=) k j)) 1 0))
+    z_1 = sum ((A..*) w' ii)
+    z_2 = sum ((A..*) w_1 ii)
+    result = termDivNan z_1 z_2
+-}
+-- | Term by term division where divisions by 0 produce 0 rather than NaN.
+termDivNan :: (Eq a, Fractional a) => Matrix D a -> Matrix D a -> Matrix D a
+termDivNan = A.zipWith (\i j -> if j == 0 then 0 else i / j)
--- a/src/Gargantext/Orphans/Accelerate.hs
+++ b/src/Gargantext/Orphans/Accelerate.hs
@@ -6,47 +6,34 @@ module Gargantext.Orphans.Accelerate where
 import Prelude
 import Test.QuickCheck
-import Data.Array.Accelerate (DIM0, DIM1, DIM2, DIM3, Z (..), (:.) (..), Array, Elt, fromList, use, arrayShape)
+import Data.Array.Accelerate (DIM2, Z (..), (:.) (..), Array, Elt, fromList, arrayShape)
 import Data.Array.Accelerate qualified as A
-import qualified Data.Array.Accelerate.Sugar.Shape as AS
+import qualified Data.List.Split as Split
-import qualified Data.Array.Accelerate.Interpreter as Naive
-instance Arbitrary DIM0 where
+instance (Show e, Elt e, Arbitrary e) => Arbitrary (Array DIM2 e) where
-  arbitrary = return Z
-instance Arbitrary DIM1 where
-  arbitrary = (Z :.) <$> choose (0,1024)
-  shrink = \(Z :. i) -> if i <= 0 then [] else [Z :. i - 1 ]
-instance Arbitrary DIM2 where
-  arbitrary = do
-    x <- choose (0,128)
-    y <- choose (0,48)
-    return (Z :. y :. x)
-  shrink = \(Z :. r :. c) ->
-    if | r <= 0 -> []
-       | c <= 0 -> []
-       | otherwise -> [Z :. (r - 1)
-                         :. (c - 1)
-                      ]
-instance Arbitrary DIM3 where
-  arbitrary = do
-    x <- choose (0,64)
-    y <- choose (0,32)
-    z <- choose (0,16)
-    return (Z :. z :. y :. x)
-instance (Elt e, Arbitrary e) => Arbitrary (Array DIM2 e) where
  arbitrary = do
-    sh <- arbitrary
+    x <- choose (1,128)
-    fromList sh <$> vectorOf (AS.size sh) arbitrary
+    y <- choose (1,48)
+    let sh = Z :. x :. y
+    fromList sh <$> vectorOf (x * y) arbitrary
  shrink arr = sliceArray arr
 -- Slice the array to the new shape, keeping the square dimensions.
-sliceArray :: Elt e => Array DIM2 e -> [Array DIM2 e]
+sliceArray :: (Elt e, Show e) => Array DIM2 e -> [Array DIM2 e]
 sliceArray arr =
  case arrayShape arr of
    (Z :. x :. y) -> case (x, y) of
-      (0,0) -> []
+      (_,1) -> [ resizeArray arr (max 1 (x - 1)) y  ]
-      _     -> [ Naive.run $ A.init $ A.transpose $ A.init $ use arr ]
+      (1,_) -> [ resizeArray arr x (max 1 ( y - 1)) ]
+      _     -> [ resizeArray arr (max 1 (x - 1)) (max 1 y)
+               , resizeArray arr (max 1 x) (max 1 (y - 1))
+               , resizeArray arr (max 1 (x - 1)) (max 1 (y - 1))
+               ]
+resizeArray :: (Show e, Elt e) => Array DIM2 e -> Int -> Int -> Array DIM2 e
+resizeArray arr rows cols =
+  let (Z :. _originRows :. originCols) = arrayShape arr
+      vals   = A.toList arr
+      chunks = map (take cols) $ Split.chunksOf originCols vals
+      m'     = mconcat $ take rows chunks
+  in A.fromList (Z :. rows :. cols) m'
--- a/test/Test/Core/LinearAlgebra.hs
+++ b/test/Test/Core/LinearAlgebra.hs
@@ -20,6 +20,14 @@ import Gargantext.Orphans.Accelerate (sliceArray)
 import Prelude hiding ((^))
 import Test.Tasty
 import Test.Tasty.QuickCheck
+import Data.Massiv.Array qualified as Massiv
+import qualified Data.Array.Accelerate as A
+import qualified Data.List.Split.Internals as Split
+import Gargantext.Prelude (headMay)
+--
+-- Utility types and functions
+--
 newtype SquareMatrix a = SquareMatrix { _SquareMatrix :: Matrix a }
  deriving newtype (Show, Eq)
@@ -49,17 +57,6 @@ compareImplementations' :: (Arbitrary a, Eq c, Show c)
 compareImplementations' implementation1 implementation2 mapResults inputData
  = mapResults (implementation1 inputData) === mapResults (implementation2 inputData)
-compareTermDivNan :: (Array TermDivNanShape Double)
-                  -> (Array TermDivNanShape Double)
-                  -> Property
-compareTermDivNan i1 i2
-  = Naive.run (Legacy.termDivNan (use i1) (use i2)) === Naive.run (Legacy.termDivNan (use i1) (use i2))
-compareDistributional :: Matrix Int
-                      -> Property
-compareDistributional i1
-  = Legacy.distributionalWith Naive.run i1 === Legacy.distributionalWith Naive.run i1
 mapCreateIndices :: Ord t => (Map t Legacy.Index, Map Legacy.Index t) -> Bimap LA.Index t
 mapCreateIndices (_m1, m2) = Bimap.fromList $ map (first LA.Index) $ M.toList m2
@@ -68,6 +65,22 @@ type TermDivNanShape = Z :. Int :. Int
 twoByTwo :: Matrix Int
 twoByTwo = fromList (Z :. 2 :. 2) (Prelude.replicate 4 0)
+accelerate2MassivMatrix :: (Massiv.Unbox a, Elt a) => Matrix a -> Massiv.Matrix Massiv.D a
+accelerate2MassivMatrix m =
+  let (Z :. _r :. c) = A.arrayShape m
+  in Massiv.delay $ Massiv.fromLists' @Massiv.U Massiv.Par $ Split.chunksOf c (A.toList m)
+massiv2AccelerateMatrix :: Elt a => Massiv.Matrix Massiv.D a -> Matrix a
+massiv2AccelerateMatrix m =
+  let m' = Massiv.toLists2 m
+      r  = Prelude.length m'
+      c  = maybe 0 Prelude.length (headMay m')
+  in A.fromList (Z :. r :. c) (mconcat m')
+--
+-- Main test runner
+--
 -- | Needed as the LLVM and Naive backend generates some double with a long exponent which
 -- won't compare verbatim.
 tests :: TestTree
@@ -81,3 +94,21 @@ tests = testGroup "LinearAlgebra" [
                                                                               id)
      ]
    ]
+--
+-- Tests
+--
+compareTermDivNan :: (Array TermDivNanShape Double)
+                  -> (Array TermDivNanShape Double)
+                  -> Property
+compareTermDivNan i1 i2
+  = let massiv     = LA.termDivNan (accelerate2MassivMatrix i1) (accelerate2MassivMatrix i2)
+        accelerate = Naive.run (Legacy.termDivNan (use i1) (use i2))
+    in accelerate === massiv2AccelerateMatrix massiv
+compareDistributional :: Matrix Int
+                      -> Property
+compareDistributional i1
+  = Legacy.distributionalWith Naive.run i1 === Legacy.distributionalWith Naive.run i1