Commit e124af3e authored by Alfredo Di Napoli's avatar Alfredo Di Napoli Committed by Alfredo Di Napoli

Introduce massiv for small tasks

This commit starts introducing `massiv` in the codebase,
initially for simple functions like `termDivNan`. The main
goal is to extend the linear algebra toolkit up to the
point where we can implement `distributional` in terms of
`massive` and measure its performance.
parent 4371781e
......@@ -556,6 +556,7 @@ library
, json-stream ^>= 0.4.2.4
, lens >= 5.2.2 && < 5.3
, lens-aeson < 1.3
, massiv < 1.1
, matrix ^>= 0.3.6.1
, mime-mail >= 0.5.1
, monad-control ^>= 1.0.3.1
......@@ -611,7 +612,7 @@ library
, singletons ^>= 3.0.2
, singletons-th >= 3.1 && < 3.2
, smtp-mail >= 0.3.0.0
, split >= 0.2.0
, split >= 0.2.3.4
, stemmer == 0.5.2
, stm >= 2.5.1.0 && < 2.6
, stm-containers >= 1.2.0.3 && < 1.3
......@@ -696,7 +697,7 @@ executable gargantext
, servant-routes < 0.2
, servant-websockets >= 2.0.0 && < 2.1
, shelly
, split ^>= 0.2.3.4
, split >= 0.2.3.4
, text ^>= 2.0.2
, toml-parser >= 2.0.1.0 && < 3
, tree-diff
......@@ -734,6 +735,8 @@ common testDependencies
, http-client-tls == 0.3.6.1
, http-types
, lens >= 5.2.2 && < 5.3
, massiv < 1.1
, massiv-test < 1.2
, monad-control >= 1.0.3 && < 1.1
, mtl ^>= 2.2.2
, network-uri
......@@ -753,6 +756,7 @@ common testDependencies
, shelly >= 1.9 && < 2
, stm >= 2.5.1.0 && < 2.6
, streaming-commons
, split
, tasty-hunit
, tasty-quickcheck
, text ^>= 2.0.2
......
......@@ -18,6 +18,9 @@ module Gargantext.Core.LinearAlgebra (
-- * Functions
, createIndices
-- * Operations on matrixes
, termDivNan
) where
import Data.Bimap (Bimap)
......@@ -27,6 +30,8 @@ import Data.Map.Strict qualified as M
import Data.Set qualified as S
import Data.Set (Set)
import Prelude
import Data.Massiv.Array qualified as A
import Data.Massiv.Array (D, Matrix)
newtype Index = Index { _Index :: Int }
deriving newtype (Eq, Show, Ord, Num, Enum)
......@@ -39,3 +44,75 @@ createIndices = set2indices . map2set
set2indices :: Ord t => Set t -> Bimap Index t
set2indices s = foldr (uncurry Bimap.insert) Bimap.empty (zip [0..] $ S.toList s)
-- | `distributional m` returns the distributional distance between terms each
-- pair of terms as a matrix. The argument m is the matrix $[n_{ij}]_{i,j}$
-- where $n_{ij}$ is the coocccurrence between term $i$ and term $j$.
--
-- ## Basic example with Matrix of size 3:
--
-- >>> theMatrixInt 3
-- Matrix (Z :. 3 :. 3)
-- [ 7, 4, 0,
-- 4, 5, 3,
-- 0, 3, 4]
--
-- >>> distributional $ theMatrixInt 3
-- Matrix (Z :. 3 :. 3)
-- [ 1.0, 0.0, 0.9843749999999999,
-- 0.0, 1.0, 0.0,
-- 1.0, 0.0, 1.0]
--
-- ## Basic example with Matrix of size 4:
--
-- >>> theMatrixInt 4
-- Matrix (Z :. 4 :. 4)
-- [ 4, 1, 2, 1,
-- 1, 4, 0, 0,
-- 2, 0, 3, 3,
-- 1, 0, 3, 3]
--
-- >>> distributional $ theMatrixInt 4
-- Matrix (Z :. 4 :. 4)
-- [ 1.0, 0.0, 0.5714285714285715, 0.8421052631578947,
-- 0.0, 1.0, 1.0, 1.0,
-- 8.333333333333333e-2, 4.6875e-2, 1.0, 0.25,
-- 0.3333333333333333, 5.7692307692307696e-2, 1.0, 1.0]
--
-- /IMPORTANT/: As this function computes the diagonal matrix in order to carry on the computation
-- the input has to be a square matrix, or this function will fail at runtime.
{-
distributional :: A.Matrix r Int -> A.Matrix r Double
distributional m' = interpret $ result
where
m = map A.fromIntegral $ use m'
n = dim m'
diag_m = diag m
d_1 = replicate (constant (Z :. n :. All)) diag_m
d_2 = replicate (constant (Z :. All :. n)) diag_m
mi = (.*) (termDivNan m d_1) (termDivNan m d_2)
-- w = (.-) mi d_mi
-- The matrix permutations is taken care of below by directly replicating
-- the matrix mi, making the matrix w unneccessary and saving one step.
w_1 = replicate (constant (Z :. All :. n :. All)) mi
w_2 = replicate (constant (Z :. n :. All :. All)) mi
w' = zipWith min w_1 w_2
-- The matrix ii = [r_{i,j,k}]_{i,j,k} has r_(i,j,k) = 0 if k = i OR k = j
-- and r_(i,j,k) = 1 otherwise (i.e. k /= i AND k /= j).
ii = generate (constant (Z :. n :. n :. n))
(lift1 (\( i A.:. j A.:. k) -> cond ((&&) ((/=) k i) ((/=) k j)) 1 0))
z_1 = sum ((A..*) w' ii)
z_2 = sum ((A..*) w_1 ii)
result = termDivNan z_1 z_2
-}
-- | Term by term division where divisions by 0 produce 0 rather than NaN.
termDivNan :: (Eq a, Fractional a) => Matrix D a -> Matrix D a -> Matrix D a
termDivNan = A.zipWith (\i j -> if j == 0 then 0 else i / j)
......@@ -6,47 +6,34 @@ module Gargantext.Orphans.Accelerate where
import Prelude
import Test.QuickCheck
import Data.Array.Accelerate (DIM0, DIM1, DIM2, DIM3, Z (..), (:.) (..), Array, Elt, fromList, use, arrayShape)
import Data.Array.Accelerate (DIM2, Z (..), (:.) (..), Array, Elt, fromList, arrayShape)
import Data.Array.Accelerate qualified as A
import qualified Data.Array.Accelerate.Sugar.Shape as AS
import qualified Data.Array.Accelerate.Interpreter as Naive
import qualified Data.List.Split as Split
instance Arbitrary DIM0 where
arbitrary = return Z
instance Arbitrary DIM1 where
arbitrary = (Z :.) <$> choose (0,1024)
shrink = \(Z :. i) -> if i <= 0 then [] else [Z :. i - 1 ]
instance Arbitrary DIM2 where
arbitrary = do
x <- choose (0,128)
y <- choose (0,48)
return (Z :. y :. x)
shrink = \(Z :. r :. c) ->
if | r <= 0 -> []
| c <= 0 -> []
| otherwise -> [Z :. (r - 1)
:. (c - 1)
]
instance Arbitrary DIM3 where
arbitrary = do
x <- choose (0,64)
y <- choose (0,32)
z <- choose (0,16)
return (Z :. z :. y :. x)
instance (Elt e, Arbitrary e) => Arbitrary (Array DIM2 e) where
instance (Show e, Elt e, Arbitrary e) => Arbitrary (Array DIM2 e) where
arbitrary = do
sh <- arbitrary
fromList sh <$> vectorOf (AS.size sh) arbitrary
x <- choose (1,128)
y <- choose (1,48)
let sh = Z :. x :. y
fromList sh <$> vectorOf (x * y) arbitrary
shrink arr = sliceArray arr
-- Slice the array to the new shape, keeping the square dimensions.
sliceArray :: Elt e => Array DIM2 e -> [Array DIM2 e]
sliceArray :: (Elt e, Show e) => Array DIM2 e -> [Array DIM2 e]
sliceArray arr =
case arrayShape arr of
(Z :. x :. y) -> case (x, y) of
(0,0) -> []
_ -> [ Naive.run $ A.init $ A.transpose $ A.init $ use arr ]
(_,1) -> [ resizeArray arr (max 1 (x - 1)) y ]
(1,_) -> [ resizeArray arr x (max 1 ( y - 1)) ]
_ -> [ resizeArray arr (max 1 (x - 1)) (max 1 y)
, resizeArray arr (max 1 x) (max 1 (y - 1))
, resizeArray arr (max 1 (x - 1)) (max 1 (y - 1))
]
resizeArray :: (Show e, Elt e) => Array DIM2 e -> Int -> Int -> Array DIM2 e
resizeArray arr rows cols =
let (Z :. _originRows :. originCols) = arrayShape arr
vals = A.toList arr
chunks = map (take cols) $ Split.chunksOf originCols vals
m' = mconcat $ take rows chunks
in A.fromList (Z :. rows :. cols) m'
......@@ -20,6 +20,14 @@ import Gargantext.Orphans.Accelerate (sliceArray)
import Prelude hiding ((^))
import Test.Tasty
import Test.Tasty.QuickCheck
import Data.Massiv.Array qualified as Massiv
import qualified Data.Array.Accelerate as A
import qualified Data.List.Split.Internals as Split
import Gargantext.Prelude (headMay)
--
-- Utility types and functions
--
newtype SquareMatrix a = SquareMatrix { _SquareMatrix :: Matrix a }
deriving newtype (Show, Eq)
......@@ -49,17 +57,6 @@ compareImplementations' :: (Arbitrary a, Eq c, Show c)
compareImplementations' implementation1 implementation2 mapResults inputData
= mapResults (implementation1 inputData) === mapResults (implementation2 inputData)
compareTermDivNan :: (Array TermDivNanShape Double)
-> (Array TermDivNanShape Double)
-> Property
compareTermDivNan i1 i2
= Naive.run (Legacy.termDivNan (use i1) (use i2)) === Naive.run (Legacy.termDivNan (use i1) (use i2))
compareDistributional :: Matrix Int
-> Property
compareDistributional i1
= Legacy.distributionalWith Naive.run i1 === Legacy.distributionalWith Naive.run i1
mapCreateIndices :: Ord t => (Map t Legacy.Index, Map Legacy.Index t) -> Bimap LA.Index t
mapCreateIndices (_m1, m2) = Bimap.fromList $ map (first LA.Index) $ M.toList m2
......@@ -68,6 +65,22 @@ type TermDivNanShape = Z :. Int :. Int
twoByTwo :: Matrix Int
twoByTwo = fromList (Z :. 2 :. 2) (Prelude.replicate 4 0)
accelerate2MassivMatrix :: (Massiv.Unbox a, Elt a) => Matrix a -> Massiv.Matrix Massiv.D a
accelerate2MassivMatrix m =
let (Z :. _r :. c) = A.arrayShape m
in Massiv.delay $ Massiv.fromLists' @Massiv.U Massiv.Par $ Split.chunksOf c (A.toList m)
massiv2AccelerateMatrix :: Elt a => Massiv.Matrix Massiv.D a -> Matrix a
massiv2AccelerateMatrix m =
let m' = Massiv.toLists2 m
r = Prelude.length m'
c = maybe 0 Prelude.length (headMay m')
in A.fromList (Z :. r :. c) (mconcat m')
--
-- Main test runner
--
-- | Needed as the LLVM and Naive backend generates some double with a long exponent which
-- won't compare verbatim.
tests :: TestTree
......@@ -81,3 +94,21 @@ tests = testGroup "LinearAlgebra" [
id)
]
]
--
-- Tests
--
compareTermDivNan :: (Array TermDivNanShape Double)
-> (Array TermDivNanShape Double)
-> Property
compareTermDivNan i1 i2
= let massiv = LA.termDivNan (accelerate2MassivMatrix i1) (accelerate2MassivMatrix i2)
accelerate = Naive.run (Legacy.termDivNan (use i1) (use i2))
in accelerate === massiv2AccelerateMatrix massiv
compareDistributional :: Matrix Int
-> Property
compareDistributional i1
= Legacy.distributionalWith Naive.run i1 === Legacy.distributionalWith Naive.run i1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment