[FIX] merge

dab6f378 · Alexandre Delanoë · d3d2b646 · 804f9027 · dab6f378 · dab6f378
Commit dab6f378 authored Dec 30, 2020 by Alexandre Delanoë
6 changed files
--- a/package.yaml
+++ b/package.yaml
@@ -108,6 +108,8 @@ library:
  - SHA
  - Unique
  - accelerate
+  - accelerate-utility
+  - accelerate-arithmetic
  - aeson
  - aeson-lens
  - aeson-pretty

--- a/pinned-20.09.nix
+++ b/pinned-20.09.nix
@@ -4,7 +4,6 @@ import (builtins.fetchGit {
  # Descriptive name to make the store path easier to identify
  name = "nixos-20.09";
  url = "https://github.com/nixos/nixpkgs/";
-  # Last commit hash for nixos-unstable
  # `git ls-remote https://github.com/nixos/nixpkgs-channels nixos-20.09`
  ref = "refs/heads/nixos-20.09";
  rev = "19db3e5ea2777daa874563b5986288151f502e27";

--- a/src/Gargantext/Core/Methods/Distances/Accelerate/Distributional.hs
+++ b/src/Gargantext/Core/Methods/Distances/Accelerate/Distributional.hs
 {-|
 Module      : Gargantext.Core.Methods.Distances.Accelerate.Distributional
-Description : 
+Description :
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
 Maintainer  : team@gargantext.org
 Stability   : experimental
 Portability : POSIX
-This module aims at implementig distances of terms context by context is
-the same referential of corpus.
-Implementation use Accelerate library which enables GPU and CPU computation
+* Distributional Distance metric
-See Gargantext.Core.Methods.Graph.Accelerate)
+__Definition :__ Distributional metric is a relative metric which depends on the
+selected list, it represents structural equivalence of mutual information.
+__Objective :__ We want to compute with matrices processing the similarity between term $i$ and term $j$ :
+ distr(i,j)=$\frac{\Sigma_{k \neq i,j} min(\frac{n_{ik}^2}{n_{ii}n_{kk}},\frac{n_{jk}^2}{n_{jj}n_{kk}})}{\Sigma_{k \neq i}\frac{n_{ik}^2}{ n_{ii}n_{kk}}}$
+where $n_{ij}$ is the cooccurrence between term $i$ and term $j$
+* For a vector V=[$x_1$ ... $x_n$], we note $|V|_1=\Sigma_ix_i$
+* operator : .* and ./ cell by cell multiplication and division of the matrix
+* operator * is the matrix multiplication
+* Matrice M=[$n_{ij}$]$_{i,j}$
+* opérateur : Diag(M)=[$n_{ii}$]$_i$ (vecteur)
+* Id= identity matrix
+* O=[1]$_{i,j}$ (matrice one)
+* D(M)=Id .* M
+* O * D(M) =[$n_{jj}$]$_{i,j}$
+* D(M) * O =[$n_{ii}$]$_{i,j}$
+* $V_i=[0~0~0~1~0~0~0]'$ en i
+* MI=(M ./ O * D(M)) .* (M / D(M) * O )
+* distr(i,j)=$\frac{|min(V'_i * (MI-D(MI)),V'_j * (MI-D(MI)))|_1}{|V'_i.(MI-D(MI))|_1}$
+[Specifications written by David Chavalarias on Garg v4 shared NodeWrite, team Pyremiel 2020]
 -}
@@ -30,15 +50,72 @@ import Data.Array.Accelerate.Interpreter (run)
 import Gargantext.Core.Methods.Matrix.Accelerate.Utils
 import qualified Gargantext.Prelude as P
+-- | `distributional m` returns the distributional distance between terms each
+-- pair of terms as a matrix.  The argument m is the matrix $[n_{ij}]_{i,j}$
+-- where $n_{ij}$ is the coocccurrence between term $i$ and term $j$.
+--
+-- ## Basic example with Matrix of size 3: 
+--
+-- >>> theMatrixInt 3
+-- Matrix (Z :. 3 :. 3)
+--   [ 7, 4, 0,
+--     4, 5, 3,
+--     0, 3, 4]
+--
+-- >>> distributional $ theMatrixInt 3
+-- Matrix (Z :. 3 :. 3)
+--   [ 1.0, 0.0, 0.9843749999999999,
+--     0.0, 1.0,                0.0,
+--     1.0, 0.0,                1.0]
+--
+-- ## Basic example with Matrix of size 4: 
+--
+-- >>> theMatrixInt 4
+-- Matrix (Z :. 4 :. 4)
+--   [ 4, 1, 2, 1,
+--     1, 4, 0, 0,
+--     2, 0, 3, 3,
+--     1, 0, 3, 3]
+--
+-- >>> distributional $ theMatrixInt 4
+-- Matrix (Z :. 4 :. 4)
+--   [                  1.0,                   0.0, 0.5714285714285715, 0.8421052631578947,
+--                      0.0,                   1.0,                1.0,                1.0,
+--     8.333333333333333e-2,             4.6875e-2,                1.0,               0.25,
+--       0.3333333333333333, 5.7692307692307696e-2,                1.0,                1.0]
+--
+distributional :: Matrix Int -> Matrix Double
+distributional m' = run result
+ where
+    m = map fromIntegral $ use m'
+    n = dim m'
+    diag_m = diag m
+    d_1 = replicate (constant (Z :. n :. All)) diag_m
+    d_2 = replicate (constant (Z :. All :. n)) diag_m
+    mi    = (.*) ((./) m d_1) ((./) m d_2)
+    -- w = (.-) mi d_mi
+    -- The matrix permutations is taken care of below by directly replicating
+    -- the matrix mi, making the matrix w unneccessary and saving one step.
+    w_1 = replicate (constant (Z :. All :. n :. All)) mi
+    w_2 = replicate (constant (Z :. n :. All :. All)) mi
+    w' = zipWith min w_1 w_2
+    -- The matrix ii = [r_{i,j,k}]_{i,j,k} has r_(i,j,k) = 0 if k = i OR k = j 
+    -- and r_(i,j,k) = 1 otherwise (i.e. k /= i AND k /= j). 
+    ii = generate (constant (Z :. n :. n :. n)) 
+        (lift1 (\(Z :. i :. j :. k) -> cond ((&&) ((/=) k i) ((/=) k j)) 1 0))
+    z_1 = sum ((.*) w' ii)
+    z_2 = sum ((.*) w_1 ii)
+    result = termDivNan z_1 z_2
-- * Metrics of proximity
-----------------------------------------------------------------------
-- ** Distributional Distance
-- | Distributional Distance metric
--
-- Distributional metric is a relative metric which depends on the
-- selected list, it represents structural equivalence of mutual information.
 --
 -- The distributional metric P(c) of @i@ and @j@ terms is: \[
 -- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
@@ -59,8 +136,9 @@ import qualified Gargantext.Prelude as P
 -- Total cooccurrences of terms given a map list of size @m@
 --            \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
 --
-distributional :: Matrix Int -> Matrix Double
-distributional m = -- run {- $ matMiniMax -}
+distributional'' :: Matrix Int -> Matrix Double
+distributional'' m = -- run {- $ matMiniMax -}
                   run  $ diagNull n
                       $ rIJ n
                       $ filterWith 0 100
@@ -107,6 +185,6 @@ rIJ n m = matMiniMax $ divide a b
 -- | Test perfermance with this matrix
 -- TODO : add this in a benchmark folder
 distriTest :: Int -> Matrix Double
-distriTest n = distributional (theMatrix n)
+distriTest n = distributional (theMatrixInt n)
--- a/src/Gargantext/Core/Methods/Matrix/Accelerate/Utils.hs
+++ b/src/Gargantext/Core/Methods/Matrix/Accelerate/Utils.hs
@@ -36,9 +36,96 @@ import Data.Array.Accelerate
 import Data.Array.Accelerate.Interpreter (run)
 import qualified Gargantext.Prelude as P
+-- | Matrix cell by cell multiplication
+(.*) :: ( Shape ix
+        , Slice ix
+        , Elt a
+        , P.Num (Exp a)
+        )
+     => Acc (Array ((ix :. Int) :. Int) a)
+     -> Acc (Array ((ix :. Int) :. Int) a)
+     -> Acc (Array ((ix :. Int) :. Int) a)
+(.*) = zipWith (*)
+(./) :: ( Shape ix
+        , Slice ix
+        , Elt a
+        , P.Num (Exp a)
+        , P.Fractional (Exp a)
+        )
+     => Acc (Array ((ix :. Int) :. Int) a)
+     -> Acc (Array ((ix :. Int) :. Int) a)
+     -> Acc (Array ((ix :. Int) :. Int) a)
+(./) = zipWith (/)
+-- | Term by term division where divisions by 0 produce 0 rather than NaN. 
+termDivNan :: ( Shape ix
+        , Slice ix
+        , Elt a
+        , Eq a
+        , P.Num (Exp a)
+        , P.Fractional (Exp a)
+        )
+     => Acc (Array ((ix :. Int) :. Int) a)
+     -> Acc (Array ((ix :. Int) :. Int) a)
+     -> Acc (Array ((ix :. Int) :. Int) a)
+termDivNan = zipWith (\i j -> cond ((==) j 0) 0 ((/) i j))
+(.-) :: ( Shape ix
+        , Slice ix
+        , Elt a
+        , P.Num (Exp a)
+        , P.Fractional (Exp a)
+        )
+     => Acc (Array ((ix :. Int) :. Int) a)
+     -> Acc (Array ((ix :. Int) :. Int) a)
+     -> Acc (Array ((ix :. Int) :. Int) a)
+(.-) = zipWith (-)
+(.+) :: ( Shape ix
+        , Slice ix
+        , Elt a
+        , P.Num (Exp a)
+        , P.Fractional (Exp a)
+        )
+     => Acc (Array ((ix :. Int) :. Int) a)
+     -> Acc (Array ((ix :. Int) :. Int) a)
+     -> Acc (Array ((ix :. Int) :. Int) a)
+(.+) = zipWith (+)
 -----------------------------------------------------------------------
-runExp :: Elt e => Exp e -> e
+matrixOne :: Num a => Dim -> Acc (Matrix a)
-runExp e = indexArray (run (unit e)) Z
+matrixOne n' = ones
+  where
+    ones  = fill (index2 n n) 1
+    n     = constant n'
+matrixIdentity :: Num a => Dim -> Acc (Matrix a)
+matrixIdentity n' =
+        let zeros = fill (index2 n n) 0
+            ones  = fill (index1 n)   1
+            n = constant n'
+        in
+        permute const zeros (\(unindex1 -> i) -> index2 i i) ones
+matrixEye :: Num a => Dim -> Acc (Matrix a)
+matrixEye n' =
+        let ones   = fill (index2 n n) 1
+            zeros  = fill (index1 n)   0
+            n = constant n'
+        in
+        permute const ones (\(unindex1 -> i) -> index2 i i) zeros
+diagNull :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
+diagNull n m = zipWith (*) m (matrixEye n)
+-----------------------------------------------------------------------
+_runExp :: Elt e => Exp e -> e
+_runExp e = indexArray (run (unit e)) Z
 -----------------------------------------------------------------------
 -- | Define a vector
@@ -89,10 +176,10 @@ dim m = n
 --   [ 12.0, 15.0, 18.0,
 --     12.0, 15.0, 18.0,
 --     12.0, 15.0, 18.0]
-matSumCol :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
+matSumCol :: (Elt a, P.Num (Exp a)) => Dim -> Acc (Matrix a) -> Acc (Matrix a)
 matSumCol r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose mat
-matSumCol' :: Matrix Double -> Matrix Double
+matSumCol' :: (Elt a, P.Num (Exp a)) => Matrix a -> Matrix a
 matSumCol' m = run $ matSumCol n m'
  where
    n  = dim m
@@ -164,23 +251,10 @@ filterWith' :: (Elt a, Ord a) => Exp a -> Exp a -> Acc (Matrix a) -> Acc (Matrix
 filterWith' t v m = map (\x -> ifThenElse (x > t) x v) m
+------------------------------------------------------------------------
-- run $ (identityMatrix (DAA.constant (10::Int)) :: DAA.Acc (DAA.Matrix Int)) Matrix (Z :. 10 :. 10)
+------------------------------------------------------------------------
-identityMatrix :: Num a => Exp Int -> Acc (Matrix a)
-identityMatrix n =
-        let zeros = fill (index2 n n) 0
-            ones  = fill (index1 n)   1
-        in
-        permute const zeros (\(unindex1 -> i) -> index2 i i) ones
-eyeMatrix :: Num a => Dim -> Acc (Matrix a)
-eyeMatrix n' =
-        let ones   = fill (index2 n n) 1
-            zeros  = fill (index1 n)   0
-            n = constant n'
-        in
-        permute const ones (\(unindex1 -> i) -> index2 i i) zeros
 -- | TODO use Lenses
 data Direction = MatCol (Exp Int) | MatRow (Exp Int) | Diag
@@ -259,11 +333,6 @@ selfMatrix' m' = run $ selfMatrix n
    m = use m'
 -}
 -------------------------------------------------
-diagNull :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
-diagNull n m = zipWith (*) m eye
-  where
-    eye = eyeMatrix n
 -------------------------------------------------
 crossProduct :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
 crossProduct n m = {-trace (P.show (run m',run m'')) $-} zipWith (*) m' m''
@@ -296,7 +365,7 @@ cross' :: Matrix Double -> Matrix Double
 cross' mat = run $ cross n mat'
  where
    mat' = use mat
-    n = dim mat
+    n    = dim mat
 {-
@@ -313,23 +382,28 @@ p_ m = zipWith (/) m (n_ m)
                         ) m
 -}
-theMatrix :: Int -> Matrix Int
+theMatrixDouble :: Int -> Matrix Double
-theMatrix n = matrix n (dataMatrix n)
+theMatrixDouble n = run $ map fromIntegral (use $ theMatrixInt n)
+theMatrixInt :: Int -> Matrix Int
+theMatrixInt n = matrix n (dataMatrix n)
  where
    dataMatrix :: Int -> [Int]
    dataMatrix x | (P.==) x 2 = [ 1, 1
                                , 1, 2
                                ]
-                 | (P.==) x 3 =  [ 1, 1, 2
+                 | (P.==) x 3 =  [ 7, 4, 0
-                                 , 1, 2, 3
+                                 , 4, 5, 3
-                                 , 2, 3, 4
+                                 , 0, 3, 4
                                 ]
-                 | (P.==) x 4 =  [ 1, 1, 2, 3
+                 | (P.==) x 4 =  [ 4, 1, 2, 1
-                                 , 1, 2, 3, 4
+                                 , 1, 4, 0, 0
-                                 , 2, 3, 4, 5
+                                 , 2, 0, 3, 3
-                                 , 3, 4, 5, 6
+                                 , 1, 0, 3, 3
                                 ]
                 | P.otherwise = P.undefined
 {-

--- a/src/Gargantext/Core/Viz/Graph/API.hs
+++ b/src/Gargantext/Core/Viz/Graph/API.hs
@@ -91,7 +91,8 @@ getGraph _uId nId = do
  -- TODO Distance in Graph params
  case graph of
    Nothing     -> do
-        graph' <- computeGraph cId Conditional NgramsTerms repo
+        graph' <- computeGraph cId Distributional NgramsTerms repo
+        -- graph' <- computeGraph cId Conditional NgramsTerms repo
        mt     <- defaultGraphMetadata cId "Title" repo
        let graph'' = set graph_metadata (Just mt) graph'
        let hg = HyperdataGraphAPI graph'' camera
@@ -204,7 +205,7 @@ graphRecompute u n logStatus = do
                   , _scst_remaining = Just 1
                   , _scst_events    = Just []
                   }
-  _g <- trace (show u) $ recomputeGraph u n Conditional
+  _g <- trace (show u) $ recomputeGraph u n Distributional -- Conditional
  pure  JobLog { _scst_succeeded = Just 1
               , _scst_failed    = Just 0
               , _scst_remaining = Just 0
@@ -239,7 +240,7 @@ graphVersions nId = do
                       , gv_repo = v }
 recomputeVersions :: UserId -> NodeId -> GargNoServer Graph
-recomputeVersions uId nId = recomputeGraph uId nId Conditional
+recomputeVersions uId nId = recomputeGraph uId nId Distributional -- Conditional
 ------------------------------------------------------------
 graphClone :: UserId

--- a/stack.yaml
+++ b/stack.yaml
@@ -7,6 +7,8 @@ packages:
 #- 'deps/patches-map'
 #- 'deps/servant-job'
 #- 'deps/clustering-louvain'
+#- 'deps/accelerate'
+#- 'deps/accelerate-utility'
 docker:
  enable: false
@@ -20,6 +22,7 @@ nix:
  shell-file: build-shell.nix
 allow-newer: true
 extra-deps:
  # Data Mining Libs
@@ -71,10 +74,18 @@ extra-deps:
 - git: https://gitlab.iscpif.fr/gargantext/clustering-louvain.git
  commit: 7d74f96dfea8e51fbab1793cc0429b2fe741f73d
+  # Accelerate Linear Algebra and specific instances
+  # (UndecidableInstances for newer GHC version)
+- git: https://gitlab.iscpif.fr/anoe/accelerate.git
+  commit: f5c0e0071ec7b6532f9a9cd3eb33d14f340fbcc9
+- git: https://gitlab.iscpif.fr/anoe/accelerate-utility.git
+  commit: 83ada76e78ac10d9559af8ed6bd4064ec81308e4
+- accelerate-arithmetic-1.0.0.1@sha256:555639232aa5cad411e89247b27871d09352b987a754230a288c690b6de6d888,2096
  # Others dependencies (with stack resolver)
 - KMP-0.2.0.0@sha256:6dfbac03ef00ebd9347234732cb86a40f62ab5a80c0cc6bedb8eb51766f7df28,2562
 - Unique-0.4.7.7@sha256:2269d3528271e25d34542e7c24a4e541e27ec33460e1ea00845da95b82eec6fa,2777
- accelerate-1.2.0.1@sha256:bb1928efe602545df4043692916ed427c959110cbd678d03c3f9c3be25d1ae88,20112
+- dependent-sum-0.4@sha256:40c705604f52374fb72616e10234635104a626ede737ddde899777b719df120b,1907
 - duckling-0.1.6.1@sha256:dab60953f405b45fe93e1e745f8cc83e5166e1788b1f4999cc06382e131153d8,47147
 - fclabels-2.0.4@sha256:efcc20c6c903d0a59e36eb1cb547a7bbbbba93b6e20b84b06e919c350891beb2,4492
 - full-text-search-0.2.1.4@sha256:81f6df3327e5b604f99b15e78635e5d6ca996e504c21d268a6d751d7d131aa36,6032
@@ -91,6 +102,4 @@ extra-deps:
 - smtp-mail-0.2.0.0@sha256:b91c81f6dbb41a9ceee8c443385118684ecec55006b77f7d3c0e49cffd2468cf,1211
 - stemmer-0.5.2@sha256:823aec56249ec2619f60a2c0d1384b732894dbbbe642856d337ebfe9629a0efd,4082
 - xmlbf-0.6.1@sha256:57867fcb39e0514d17b3328ff5de8d241a18482fc89bb742d9ed820a6a2a5187,1540
- dependent-sum-0.4@sha256:40c705604f52374fb72616e10234635104a626ede737ddde899777b719df120b,1907
 - xmlbf-xeno-0.2@sha256:39f70fced6052524c290cf595f114661c721452e65fc3e0953a44e7682a6a6b0,950