Commit dab6f378 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] merge

parents d3d2b646 804f9027
Pipeline #1323 failed with stage
...@@ -108,6 +108,8 @@ library: ...@@ -108,6 +108,8 @@ library:
- SHA - SHA
- Unique - Unique
- accelerate - accelerate
- accelerate-utility
- accelerate-arithmetic
- aeson - aeson
- aeson-lens - aeson-lens
- aeson-pretty - aeson-pretty
......
...@@ -4,7 +4,6 @@ import (builtins.fetchGit { ...@@ -4,7 +4,6 @@ import (builtins.fetchGit {
# Descriptive name to make the store path easier to identify # Descriptive name to make the store path easier to identify
name = "nixos-20.09"; name = "nixos-20.09";
url = "https://github.com/nixos/nixpkgs/"; url = "https://github.com/nixos/nixpkgs/";
# Last commit hash for nixos-unstable
# `git ls-remote https://github.com/nixos/nixpkgs-channels nixos-20.09` # `git ls-remote https://github.com/nixos/nixpkgs-channels nixos-20.09`
ref = "refs/heads/nixos-20.09"; ref = "refs/heads/nixos-20.09";
rev = "19db3e5ea2777daa874563b5986288151f502e27"; rev = "19db3e5ea2777daa874563b5986288151f502e27";
......
{-| {-|
Module : Gargantext.Core.Methods.Distances.Accelerate.Distributional Module : Gargantext.Core.Methods.Distances.Accelerate.Distributional
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
This module aims at implementig distances of terms context by context is
the same referential of corpus.
Implementation use Accelerate library which enables GPU and CPU computation * Distributional Distance metric
See Gargantext.Core.Methods.Graph.Accelerate) __Definition :__ Distributional metric is a relative metric which depends on the
selected list, it represents structural equivalence of mutual information.
__Objective :__ We want to compute with matrices processing the similarity between term $i$ and term $j$ :
distr(i,j)=$\frac{\Sigma_{k \neq i,j} min(\frac{n_{ik}^2}{n_{ii}n_{kk}},\frac{n_{jk}^2}{n_{jj}n_{kk}})}{\Sigma_{k \neq i}\frac{n_{ik}^2}{ n_{ii}n_{kk}}}$
where $n_{ij}$ is the cooccurrence between term $i$ and term $j$
* For a vector V=[$x_1$ ... $x_n$], we note $|V|_1=\Sigma_ix_i$
* operator : .* and ./ cell by cell multiplication and division of the matrix
* operator * is the matrix multiplication
* Matrice M=[$n_{ij}$]$_{i,j}$
* opérateur : Diag(M)=[$n_{ii}$]$_i$ (vecteur)
* Id= identity matrix
* O=[1]$_{i,j}$ (matrice one)
* D(M)=Id .* M
* O * D(M) =[$n_{jj}$]$_{i,j}$
* D(M) * O =[$n_{ii}$]$_{i,j}$
* $V_i=[0~0~0~1~0~0~0]'$ en i
* MI=(M ./ O * D(M)) .* (M / D(M) * O )
* distr(i,j)=$\frac{|min(V'_i * (MI-D(MI)),V'_j * (MI-D(MI)))|_1}{|V'_i.(MI-D(MI))|_1}$
[Specifications written by David Chavalarias on Garg v4 shared NodeWrite, team Pyremiel 2020]
-} -}
...@@ -30,15 +50,72 @@ import Data.Array.Accelerate.Interpreter (run) ...@@ -30,15 +50,72 @@ import Data.Array.Accelerate.Interpreter (run)
import Gargantext.Core.Methods.Matrix.Accelerate.Utils import Gargantext.Core.Methods.Matrix.Accelerate.Utils
import qualified Gargantext.Prelude as P import qualified Gargantext.Prelude as P
-- | `distributional m` returns the distributional distance between terms each
-- pair of terms as a matrix. The argument m is the matrix $[n_{ij}]_{i,j}$
-- where $n_{ij}$ is the coocccurrence between term $i$ and term $j$.
--
-- ## Basic example with Matrix of size 3:
--
-- >>> theMatrixInt 3
-- Matrix (Z :. 3 :. 3)
-- [ 7, 4, 0,
-- 4, 5, 3,
-- 0, 3, 4]
--
-- >>> distributional $ theMatrixInt 3
-- Matrix (Z :. 3 :. 3)
-- [ 1.0, 0.0, 0.9843749999999999,
-- 0.0, 1.0, 0.0,
-- 1.0, 0.0, 1.0]
--
-- ## Basic example with Matrix of size 4:
--
-- >>> theMatrixInt 4
-- Matrix (Z :. 4 :. 4)
-- [ 4, 1, 2, 1,
-- 1, 4, 0, 0,
-- 2, 0, 3, 3,
-- 1, 0, 3, 3]
--
-- >>> distributional $ theMatrixInt 4
-- Matrix (Z :. 4 :. 4)
-- [ 1.0, 0.0, 0.5714285714285715, 0.8421052631578947,
-- 0.0, 1.0, 1.0, 1.0,
-- 8.333333333333333e-2, 4.6875e-2, 1.0, 0.25,
-- 0.3333333333333333, 5.7692307692307696e-2, 1.0, 1.0]
--
distributional :: Matrix Int -> Matrix Double
distributional m' = run result
where
m = map fromIntegral $ use m'
n = dim m'
diag_m = diag m
d_1 = replicate (constant (Z :. n :. All)) diag_m
d_2 = replicate (constant (Z :. All :. n)) diag_m
mi = (.*) ((./) m d_1) ((./) m d_2)
-- w = (.-) mi d_mi
-- The matrix permutations is taken care of below by directly replicating
-- the matrix mi, making the matrix w unneccessary and saving one step.
w_1 = replicate (constant (Z :. All :. n :. All)) mi
w_2 = replicate (constant (Z :. n :. All :. All)) mi
w' = zipWith min w_1 w_2
-- The matrix ii = [r_{i,j,k}]_{i,j,k} has r_(i,j,k) = 0 if k = i OR k = j
-- and r_(i,j,k) = 1 otherwise (i.e. k /= i AND k /= j).
ii = generate (constant (Z :. n :. n :. n))
(lift1 (\(Z :. i :. j :. k) -> cond ((&&) ((/=) k i) ((/=) k j)) 1 0))
z_1 = sum ((.*) w' ii)
z_2 = sum ((.*) w_1 ii)
result = termDivNan z_1 z_2
-- * Metrics of proximity
-----------------------------------------------------------------------
-- ** Distributional Distance
-- | Distributional Distance metric
--
-- Distributional metric is a relative metric which depends on the
-- selected list, it represents structural equivalence of mutual information.
-- --
-- The distributional metric P(c) of @i@ and @j@ terms is: \[ -- The distributional metric P(c) of @i@ and @j@ terms is: \[
-- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik}, -- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
...@@ -59,8 +136,9 @@ import qualified Gargantext.Prelude as P ...@@ -59,8 +136,9 @@ import qualified Gargantext.Prelude as P
-- Total cooccurrences of terms given a map list of size @m@ -- Total cooccurrences of terms given a map list of size @m@
-- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\] -- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
-- --
distributional :: Matrix Int -> Matrix Double
distributional m = -- run {- $ matMiniMax -} distributional'' :: Matrix Int -> Matrix Double
distributional'' m = -- run {- $ matMiniMax -}
run $ diagNull n run $ diagNull n
$ rIJ n $ rIJ n
$ filterWith 0 100 $ filterWith 0 100
...@@ -107,6 +185,6 @@ rIJ n m = matMiniMax $ divide a b ...@@ -107,6 +185,6 @@ rIJ n m = matMiniMax $ divide a b
-- | Test perfermance with this matrix -- | Test perfermance with this matrix
-- TODO : add this in a benchmark folder -- TODO : add this in a benchmark folder
distriTest :: Int -> Matrix Double distriTest :: Int -> Matrix Double
distriTest n = distributional (theMatrix n) distriTest n = distributional (theMatrixInt n)
...@@ -36,9 +36,96 @@ import Data.Array.Accelerate ...@@ -36,9 +36,96 @@ import Data.Array.Accelerate
import Data.Array.Accelerate.Interpreter (run) import Data.Array.Accelerate.Interpreter (run)
import qualified Gargantext.Prelude as P import qualified Gargantext.Prelude as P
-- | Matrix cell by cell multiplication
(.*) :: ( Shape ix
, Slice ix
, Elt a
, P.Num (Exp a)
)
=> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
(.*) = zipWith (*)
(./) :: ( Shape ix
, Slice ix
, Elt a
, P.Num (Exp a)
, P.Fractional (Exp a)
)
=> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
(./) = zipWith (/)
-- | Term by term division where divisions by 0 produce 0 rather than NaN.
termDivNan :: ( Shape ix
, Slice ix
, Elt a
, Eq a
, P.Num (Exp a)
, P.Fractional (Exp a)
)
=> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
termDivNan = zipWith (\i j -> cond ((==) j 0) 0 ((/) i j))
(.-) :: ( Shape ix
, Slice ix
, Elt a
, P.Num (Exp a)
, P.Fractional (Exp a)
)
=> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
(.-) = zipWith (-)
(.+) :: ( Shape ix
, Slice ix
, Elt a
, P.Num (Exp a)
, P.Fractional (Exp a)
)
=> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
(.+) = zipWith (+)
----------------------------------------------------------------------- -----------------------------------------------------------------------
runExp :: Elt e => Exp e -> e matrixOne :: Num a => Dim -> Acc (Matrix a)
runExp e = indexArray (run (unit e)) Z matrixOne n' = ones
where
ones = fill (index2 n n) 1
n = constant n'
matrixIdentity :: Num a => Dim -> Acc (Matrix a)
matrixIdentity n' =
let zeros = fill (index2 n n) 0
ones = fill (index1 n) 1
n = constant n'
in
permute const zeros (\(unindex1 -> i) -> index2 i i) ones
matrixEye :: Num a => Dim -> Acc (Matrix a)
matrixEye n' =
let ones = fill (index2 n n) 1
zeros = fill (index1 n) 0
n = constant n'
in
permute const ones (\(unindex1 -> i) -> index2 i i) zeros
diagNull :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
diagNull n m = zipWith (*) m (matrixEye n)
-----------------------------------------------------------------------
_runExp :: Elt e => Exp e -> e
_runExp e = indexArray (run (unit e)) Z
----------------------------------------------------------------------- -----------------------------------------------------------------------
-- | Define a vector -- | Define a vector
...@@ -89,10 +176,10 @@ dim m = n ...@@ -89,10 +176,10 @@ dim m = n
-- [ 12.0, 15.0, 18.0, -- [ 12.0, 15.0, 18.0,
-- 12.0, 15.0, 18.0, -- 12.0, 15.0, 18.0,
-- 12.0, 15.0, 18.0] -- 12.0, 15.0, 18.0]
matSumCol :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double) matSumCol :: (Elt a, P.Num (Exp a)) => Dim -> Acc (Matrix a) -> Acc (Matrix a)
matSumCol r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose mat matSumCol r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose mat
matSumCol' :: Matrix Double -> Matrix Double matSumCol' :: (Elt a, P.Num (Exp a)) => Matrix a -> Matrix a
matSumCol' m = run $ matSumCol n m' matSumCol' m = run $ matSumCol n m'
where where
n = dim m n = dim m
...@@ -164,23 +251,10 @@ filterWith' :: (Elt a, Ord a) => Exp a -> Exp a -> Acc (Matrix a) -> Acc (Matrix ...@@ -164,23 +251,10 @@ filterWith' :: (Elt a, Ord a) => Exp a -> Exp a -> Acc (Matrix a) -> Acc (Matrix
filterWith' t v m = map (\x -> ifThenElse (x > t) x v) m filterWith' t v m = map (\x -> ifThenElse (x > t) x v) m
------------------------------------------------------------------------
-- run $ (identityMatrix (DAA.constant (10::Int)) :: DAA.Acc (DAA.Matrix Int)) Matrix (Z :. 10 :. 10) ------------------------------------------------------------------------
identityMatrix :: Num a => Exp Int -> Acc (Matrix a)
identityMatrix n =
let zeros = fill (index2 n n) 0
ones = fill (index1 n) 1
in
permute const zeros (\(unindex1 -> i) -> index2 i i) ones
eyeMatrix :: Num a => Dim -> Acc (Matrix a)
eyeMatrix n' =
let ones = fill (index2 n n) 1
zeros = fill (index1 n) 0
n = constant n'
in
permute const ones (\(unindex1 -> i) -> index2 i i) zeros
-- | TODO use Lenses -- | TODO use Lenses
data Direction = MatCol (Exp Int) | MatRow (Exp Int) | Diag data Direction = MatCol (Exp Int) | MatRow (Exp Int) | Diag
...@@ -259,11 +333,6 @@ selfMatrix' m' = run $ selfMatrix n ...@@ -259,11 +333,6 @@ selfMatrix' m' = run $ selfMatrix n
m = use m' m = use m'
-} -}
------------------------------------------------- -------------------------------------------------
diagNull :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
diagNull n m = zipWith (*) m eye
where
eye = eyeMatrix n
------------------------------------------------- -------------------------------------------------
crossProduct :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double) crossProduct :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
crossProduct n m = {-trace (P.show (run m',run m'')) $-} zipWith (*) m' m'' crossProduct n m = {-trace (P.show (run m',run m'')) $-} zipWith (*) m' m''
...@@ -296,7 +365,7 @@ cross' :: Matrix Double -> Matrix Double ...@@ -296,7 +365,7 @@ cross' :: Matrix Double -> Matrix Double
cross' mat = run $ cross n mat' cross' mat = run $ cross n mat'
where where
mat' = use mat mat' = use mat
n = dim mat n = dim mat
{- {-
...@@ -313,23 +382,28 @@ p_ m = zipWith (/) m (n_ m) ...@@ -313,23 +382,28 @@ p_ m = zipWith (/) m (n_ m)
) m ) m
-} -}
theMatrix :: Int -> Matrix Int theMatrixDouble :: Int -> Matrix Double
theMatrix n = matrix n (dataMatrix n) theMatrixDouble n = run $ map fromIntegral (use $ theMatrixInt n)
theMatrixInt :: Int -> Matrix Int
theMatrixInt n = matrix n (dataMatrix n)
where where
dataMatrix :: Int -> [Int] dataMatrix :: Int -> [Int]
dataMatrix x | (P.==) x 2 = [ 1, 1 dataMatrix x | (P.==) x 2 = [ 1, 1
, 1, 2 , 1, 2
] ]
| (P.==) x 3 = [ 1, 1, 2 | (P.==) x 3 = [ 7, 4, 0
, 1, 2, 3 , 4, 5, 3
, 2, 3, 4 , 0, 3, 4
] ]
| (P.==) x 4 = [ 1, 1, 2, 3 | (P.==) x 4 = [ 4, 1, 2, 1
, 1, 2, 3, 4 , 1, 4, 0, 0
, 2, 3, 4, 5 , 2, 0, 3, 3
, 3, 4, 5, 6 , 1, 0, 3, 3
] ]
| P.otherwise = P.undefined | P.otherwise = P.undefined
{- {-
......
...@@ -91,7 +91,8 @@ getGraph _uId nId = do ...@@ -91,7 +91,8 @@ getGraph _uId nId = do
-- TODO Distance in Graph params -- TODO Distance in Graph params
case graph of case graph of
Nothing -> do Nothing -> do
graph' <- computeGraph cId Conditional NgramsTerms repo graph' <- computeGraph cId Distributional NgramsTerms repo
-- graph' <- computeGraph cId Conditional NgramsTerms repo
mt <- defaultGraphMetadata cId "Title" repo mt <- defaultGraphMetadata cId "Title" repo
let graph'' = set graph_metadata (Just mt) graph' let graph'' = set graph_metadata (Just mt) graph'
let hg = HyperdataGraphAPI graph'' camera let hg = HyperdataGraphAPI graph'' camera
...@@ -204,7 +205,7 @@ graphRecompute u n logStatus = do ...@@ -204,7 +205,7 @@ graphRecompute u n logStatus = do
, _scst_remaining = Just 1 , _scst_remaining = Just 1
, _scst_events = Just [] , _scst_events = Just []
} }
_g <- trace (show u) $ recomputeGraph u n Conditional _g <- trace (show u) $ recomputeGraph u n Distributional -- Conditional
pure JobLog { _scst_succeeded = Just 1 pure JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0 , _scst_failed = Just 0
, _scst_remaining = Just 0 , _scst_remaining = Just 0
...@@ -239,7 +240,7 @@ graphVersions nId = do ...@@ -239,7 +240,7 @@ graphVersions nId = do
, gv_repo = v } , gv_repo = v }
recomputeVersions :: UserId -> NodeId -> GargNoServer Graph recomputeVersions :: UserId -> NodeId -> GargNoServer Graph
recomputeVersions uId nId = recomputeGraph uId nId Conditional recomputeVersions uId nId = recomputeGraph uId nId Distributional -- Conditional
------------------------------------------------------------ ------------------------------------------------------------
graphClone :: UserId graphClone :: UserId
......
...@@ -7,6 +7,8 @@ packages: ...@@ -7,6 +7,8 @@ packages:
#- 'deps/patches-map' #- 'deps/patches-map'
#- 'deps/servant-job' #- 'deps/servant-job'
#- 'deps/clustering-louvain' #- 'deps/clustering-louvain'
#- 'deps/accelerate'
#- 'deps/accelerate-utility'
docker: docker:
enable: false enable: false
...@@ -20,6 +22,7 @@ nix: ...@@ -20,6 +22,7 @@ nix:
shell-file: build-shell.nix shell-file: build-shell.nix
allow-newer: true allow-newer: true
extra-deps: extra-deps:
# Data Mining Libs # Data Mining Libs
...@@ -71,10 +74,18 @@ extra-deps: ...@@ -71,10 +74,18 @@ extra-deps:
- git: https://gitlab.iscpif.fr/gargantext/clustering-louvain.git - git: https://gitlab.iscpif.fr/gargantext/clustering-louvain.git
commit: 7d74f96dfea8e51fbab1793cc0429b2fe741f73d commit: 7d74f96dfea8e51fbab1793cc0429b2fe741f73d
# Accelerate Linear Algebra and specific instances
# (UndecidableInstances for newer GHC version)
- git: https://gitlab.iscpif.fr/anoe/accelerate.git
commit: f5c0e0071ec7b6532f9a9cd3eb33d14f340fbcc9
- git: https://gitlab.iscpif.fr/anoe/accelerate-utility.git
commit: 83ada76e78ac10d9559af8ed6bd4064ec81308e4
- accelerate-arithmetic-1.0.0.1@sha256:555639232aa5cad411e89247b27871d09352b987a754230a288c690b6de6d888,2096
# Others dependencies (with stack resolver) # Others dependencies (with stack resolver)
- KMP-0.2.0.0@sha256:6dfbac03ef00ebd9347234732cb86a40f62ab5a80c0cc6bedb8eb51766f7df28,2562 - KMP-0.2.0.0@sha256:6dfbac03ef00ebd9347234732cb86a40f62ab5a80c0cc6bedb8eb51766f7df28,2562
- Unique-0.4.7.7@sha256:2269d3528271e25d34542e7c24a4e541e27ec33460e1ea00845da95b82eec6fa,2777 - Unique-0.4.7.7@sha256:2269d3528271e25d34542e7c24a4e541e27ec33460e1ea00845da95b82eec6fa,2777
- accelerate-1.2.0.1@sha256:bb1928efe602545df4043692916ed427c959110cbd678d03c3f9c3be25d1ae88,20112 - dependent-sum-0.4@sha256:40c705604f52374fb72616e10234635104a626ede737ddde899777b719df120b,1907
- duckling-0.1.6.1@sha256:dab60953f405b45fe93e1e745f8cc83e5166e1788b1f4999cc06382e131153d8,47147 - duckling-0.1.6.1@sha256:dab60953f405b45fe93e1e745f8cc83e5166e1788b1f4999cc06382e131153d8,47147
- fclabels-2.0.4@sha256:efcc20c6c903d0a59e36eb1cb547a7bbbbba93b6e20b84b06e919c350891beb2,4492 - fclabels-2.0.4@sha256:efcc20c6c903d0a59e36eb1cb547a7bbbbba93b6e20b84b06e919c350891beb2,4492
- full-text-search-0.2.1.4@sha256:81f6df3327e5b604f99b15e78635e5d6ca996e504c21d268a6d751d7d131aa36,6032 - full-text-search-0.2.1.4@sha256:81f6df3327e5b604f99b15e78635e5d6ca996e504c21d268a6d751d7d131aa36,6032
...@@ -91,6 +102,4 @@ extra-deps: ...@@ -91,6 +102,4 @@ extra-deps:
- smtp-mail-0.2.0.0@sha256:b91c81f6dbb41a9ceee8c443385118684ecec55006b77f7d3c0e49cffd2468cf,1211 - smtp-mail-0.2.0.0@sha256:b91c81f6dbb41a9ceee8c443385118684ecec55006b77f7d3c0e49cffd2468cf,1211
- stemmer-0.5.2@sha256:823aec56249ec2619f60a2c0d1384b732894dbbbe642856d337ebfe9629a0efd,4082 - stemmer-0.5.2@sha256:823aec56249ec2619f60a2c0d1384b732894dbbbe642856d337ebfe9629a0efd,4082
- xmlbf-0.6.1@sha256:57867fcb39e0514d17b3328ff5de8d241a18482fc89bb742d9ed820a6a2a5187,1540 - xmlbf-0.6.1@sha256:57867fcb39e0514d17b3328ff5de8d241a18482fc89bb742d9ed820a6a2a5187,1540
- dependent-sum-0.4@sha256:40c705604f52374fb72616e10234635104a626ede737ddde899777b719df120b,1907
- xmlbf-xeno-0.2@sha256:39f70fced6052524c290cf595f114661c721452e65fc3e0953a44e7682a6a6b0,950 - xmlbf-xeno-0.2@sha256:39f70fced6052524c290cf595f114661c721452e65fc3e0953a44e7682a6a6b0,950
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment