Removed dead code in Core.Methods

87ed0042 · Grégoire Locqueville · 49e8f999 · 87ed0042 · 87ed0042 · 87ed0042
Commit 87ed0042 authored Dec 13, 2024 by Grégoire Locqueville
4 changed files
--- a/src/Gargantext/Core/Methods/Matrix/Accelerate/Utils.hs
+++ b/src/Gargantext/Core/Methods/Matrix/Accelerate/Utils.hs
--- a/src/Gargantext/Core/Methods/Similarities/Accelerate/Conditional.hs
+++ b/src/Gargantext/Core/Methods/Similarities/Accelerate/Conditional.hs
@@ -16,9 +16,7 @@ See Gargantext.Core.Methods.Graph.Accelerate)
 -}

 {-# LANGUAGE TypeFamilies        #-}
-{-# LANGUAGE TypeOperators       #-}
 {-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE ViewPatterns        #-}

 module Gargantext.Core.Methods.Similarities.Accelerate.Conditional
  where
@@ -29,7 +27,6 @@ import Data.Array.Accelerate
 import Data.Array.Accelerate.Interpreter (run)
 import Gargantext.Core.Methods.Matrix.Accelerate.Utils
 import Gargantext.Core.Methods.Similarities.Accelerate.SpeGen
-import qualified Gargantext.Prelude as P


 -- * Metrics of proximity
@@ -78,40 +75,3 @@ measureConditional' m =  run $ x $ map fromIntegral $ use m

    r :: Dim
    r = dim m
-
-
-
-- | To filter the nodes
-- The conditional metric P(i|j) of 2 terms @i@ and @j@, also called
-- "confidence" , is the maximum probability between @i@ and @j@ to see
-- @i@ in the same context of @j@ knowing @j@.
--
-- If N(i) (resp. N(j)) is the number of occurrences of @i@ (resp. @j@)
-- in the corpus and _[n_{ij}\] the number of its occurrences we get:
--
-- \[P_c=max(\frac{n_i}{n_{ij}},\frac{n_j}{n_{ij}} )\]
-conditional' :: Matrix Int -> (Matrix GenericityInclusion, Matrix SpecificityExclusion)
-conditional' m = ( run $ ie $ map fromIntegral $ use m
-                 , run $ sg $ map fromIntegral $ use m
-                 )
-  where
-    x :: Acc (Matrix Double) -> Acc (Matrix Double)
-    x mat = (matProba r mat)
-
-    xs :: Acc (Matrix Double) -> Acc (Matrix Double)
-    xs mat = let mat' = x mat in zipWith (-) (matSumLin r mat') mat'
-    ys :: Acc (Matrix Double) -> Acc (Matrix Double)
-    ys mat = let mat' = x mat in zipWith (-) (matSumCol r mat') mat'
-
-
-    ie :: Acc (Matrix Double) -> Acc (Matrix Double)
-    ie mat = map (\x' -> x' / (2*(n-1))) $ zipWith (+) (xs mat) (ys mat)
-    sg :: Acc (Matrix Double) -> Acc (Matrix Double)
-    sg mat = map (\x' -> x' / (2*(n-1))) $ zipWith (-) (xs mat) (ys mat)
-
-    r :: Dim
-    r = dim m
-
-    n :: Exp Double
-    n = P.fromIntegral r
-
--- a/src/Gargantext/Core/Methods/Similarities/Accelerate/Distributional.hs
+++ b/src/Gargantext/Core/Methods/Similarities/Accelerate/Distributional.hs
@@ -86,88 +86,19 @@ where $n_{ij}$ is the cooccurrence between term $i$ and term $j$
 {-# LANGUAGE TypeFamilies        #-}
 {-# LANGUAGE TypeOperators       #-}
 {-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE ViewPatterns        #-}

 module Gargantext.Core.Methods.Similarities.Accelerate.Distributional
  where

-- import qualified Data.Foldable as P (foldl1)
-- import Debug.Trace (trace)
 import Data.Array.Accelerate as A
-- import Data.Array.Accelerate.Interpreter (run)
 import Data.Array.Accelerate.LLVM.Native (run) -- TODO: try runQ?
 import Gargantext.Core.Methods.Matrix.Accelerate.Utils
-import qualified Gargantext.Prelude as P

 import Debug.Trace
-import Prelude (show, mappend{- , String, (<>), fromIntegral, flip -})
+import Prelude (show, mappend)

 import qualified Prelude

-- | `distributional m` returns the distributional distance between terms each
-- pair of terms as a matrix.  The argument m is the matrix $[n_{ij}]_{i,j}$
-- where $n_{ij}$ is the coocccurrence between term $i$ and term $j$.
--
-- ## Basic example with Matrix of size 3: 
--
-- >>> theMatrixInt 3
-- Matrix (Z :. 3 :. 3)
--   [ 7, 4, 0,
--     4, 5, 3,
--     0, 3, 4]
--
-- >>> distributional $ theMatrixInt 3
-- Matrix (Z :. 3 :. 3)
--   [ 1.0, 0.0, 0.9843749999999999,
--     0.0, 1.0,                0.0,
--     1.0, 0.0,                1.0]
--
-- ## Basic example with Matrix of size 4: 
--
-- >>> theMatrixInt 4
-- Matrix (Z :. 4 :. 4)
--   [ 4, 1, 2, 1,
--     1, 4, 0, 0,
--     2, 0, 3, 3,
--     1, 0, 3, 3]
--
-- >>> distributional $ theMatrixInt 4
-- Matrix (Z :. 4 :. 4)
--   [                  1.0,                   0.0, 0.5714285714285715, 0.8421052631578947,
--                      0.0,                   1.0,                1.0,                1.0,
--     8.333333333333333e-2,             4.6875e-2,                1.0,               0.25,
--       0.3333333333333333, 5.7692307692307696e-2,                1.0,                1.0]
--
-distributional :: Matrix Int -> Matrix Double
-distributional m' = run $ result
- where
-    m = map A.fromIntegral $ use m'
-    n = dim m'
-
-    diag_m = diag m
-
-    d_1 = replicate (constant (Z :. n :. All)) diag_m
-    d_2 = replicate (constant (Z :. All :. n)) diag_m
-
-    mi    = (.*) ((./) m d_1) ((./) m d_2)
-
-    -- w = (.-) mi d_mi
-    
-    -- The matrix permutations is taken care of below by directly replicating
-    -- the matrix mi, making the matrix w unneccessary and saving one step.
-    w_1 = replicate (constant (Z :. All :. n :. All)) mi
-    w_2 = replicate (constant (Z :. n :. All :. All)) mi
-    w' = zipWith min w_1 w_2
-
-    -- The matrix ii = [r_{i,j,k}]_{i,j,k} has r_(i,j,k) = 0 if k = i OR k = j 
-    -- and r_(i,j,k) = 1 otherwise (i.e. k /= i AND k /= j). 
-    ii = generate (constant (Z :. n :. n :. n)) 
-        (lift1 (\(Z :. i :. j :. k) -> cond ((&&) ((/=) k i) ((/=) k j)) 1 0))
-
-    z_1 = sum ((.*) w' ii)
-    z_2 = sum ((.*) w_1 ii)
-
-    result = termDivNan z_1 z_2

 logDistributional2 :: Matrix Int -> Matrix Double
 logDistributional2 m = trace ("logDistributional2, dim=" `mappend` show n) . run
@@ -207,34 +138,6 @@ logDistributional' n m' = trace ("logDistributional'") result
    mi = (.*) (matrixEye n)
              -- (map (lift1 (\x -> let x' = x * to in cond (x' < 0.5) 0 (log x'))) ((./) m ss))
              (map (lift1 (\x -> let x' = x * to in cond (x' < 1) 0 (log x'))) ((./) m ss))
-    -- mi_nnz :: Int
-    -- mi_nnz = flip indexArray Z . run $
-    --   foldAll (+) 0 $ map (\a -> ifThenElse (abs a < 10^(-6 :: Exp Int)) 0 1) mi
-
-    -- mi_total = n*n
-
-    -- reportMat :: String -> Int -> Int -> String
-    -- reportMat name nnz tot = name <> ": " <> show nnz <> "nnz / " <> show tot <>
-    --                          " | " <> show pc <> "%"
-    --   where pc = 100 * Prelude.fromIntegral nnz / Prelude.fromIntegral tot :: Double
-
-    -- Tensor nxnxn. Matrix mi replicated along the 2nd axis.
-    -- w_1 = trace (reportMat "mi" mi_nnz mi_total) $ replicate (constant (Z :. All :. n :. All)) mi
-    -- w1_nnz :: Int
-    -- w1_nnz = flip indexArray Z . run $
-    --   foldAll (+) 0 $ map (\a -> ifThenElse (abs a < 10^(-6 :: Exp Int)) 0 1) w_1
-    -- w1_total = n*n*n
-
-    -- Tensor nxnxn. Matrix mi replicated along the 1st axis.
-    -- w_2 = trace (reportMat "w1" w1_nnz w1_total) $ replicate (constant (Z :. n :. All :. All)) mi
-
-    -- Tensor nxnxn.
-    -- w' = trace "w'" $ zipWith min w_1 w_2
-
-    -- A predicate that is true when the input (i, j, k) satisfy 
-    -- k /= i AND k /= j
-    -- k_diff_i_and_j = lift1 (\(Z :. i :. j :. k) -> ((&&) ((/=) k i) ((/=) k j)))
-
    -- Matrix nxn. 
    sumMin = trace "sumMin" $ sumMin_go n mi -- sum (condOrDefault k_diff_i_and_j 0 w')

@@ -264,112 +167,6 @@ logDistributional' n m' = trace ("logDistributional'") result
 --            \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
 --

-logDistributional :: Matrix Int -> Matrix Double
-logDistributional m' = run $ diagNull n $ result
- where
-    m = map fromIntegral $ use m'
-    n = dim m'
-
-    -- Scalar. Sum of all elements of m.
-    to = the $ sum (flatten m)
-
-    -- Diagonal matrix with the diagonal of m.
-    d_m = (.*) m (matrixIdentity n)
-
-    -- Size n vector. s = [s_i]_i
-    s = sum ((.-) m d_m)
-
-    -- Matrix nxn. Vector s replicated as rows. 
-    s_1 = replicate (constant (Z :. All :. n)) s
-    -- Matrix nxn. Vector s replicated as columns. 
-    s_2 = replicate (constant (Z :. n :. All)) s
-
-    -- Matrix nxn. ss = [s_i * s_j]_{i,j}. Outer product of s with itself. 
-    ss = (.*) s_1 s_2
-
-    -- Matrix nxn. mi = [m_{i,j}]_{i,j} where 
-    -- m_{i,j} = 0 if n_{i,j} = 0 or i = j, 
-    -- m_{i,j} = log(to * n_{i,j} / s_{i,j}) otherwise.
-    mi = (.*) (matrixEye n) 
-        (map (lift1 (\x -> cond (x == 0) 0 (log (x * to)))) ((./) m ss))
-
-    -- Tensor nxnxn. Matrix mi replicated along the 2nd axis.
-    w_1 = replicate (constant (Z :. All :. n :. All)) mi
-
-    -- Tensor nxnxn. Matrix mi replicated along the 1st axis.
-    w_2 = replicate (constant (Z :. n :. All :. All)) mi
-
-    -- Tensor nxnxn.
-    w' = zipWith min w_1 w_2
-
-    -- A predicate that is true when the input (i, j, k) satisfy 
-    -- k /= i AND k /= j
-    k_diff_i_and_j = lift1 (\(Z :. i :. j :. k) -> ((&&) ((/=) k i) ((/=) k j)))
-
-    -- Matrix nxn. 
-    sumMin = sum (condOrDefault k_diff_i_and_j 0 w')
-
-    -- Matrix nxn. All columns are the same.
-    sumM = sum (condOrDefault k_diff_i_and_j 0 w_1)
-
-    result = termDivNan sumMin sumM
-
-
-
-
-distributional'' :: Matrix Int -> Matrix Double
-distributional'' m = -- run {- $ matMaxMini -}
-                   run  $ diagNull n
-                       $ rIJ n
-                       $ filterWith 0 100
-                       $ filter' 0
-                       $ s_mi
-                       $ map A.fromIntegral
-                          {- from Int to Double -}
-                       $ use m
-                          {- push matrix in Accelerate type -}
-  where
-
-    _ri :: Acc (Matrix Double) -> Acc (Matrix Double)
-    _ri mat = mat1 -- zipWith (/) mat1 mat2
-      where
-        mat1 = matSumCol n $ zipWith min (_myMin mat) (_myMin $ filterWith 0 100 $ diagNull n $ transpose mat)
-        _mat2 = total mat
-
-    _myMin :: Acc (Matrix Double) -> Acc (Matrix Double)
-    _myMin = replicate (constant (Z :. n :. All)) . minimum
-
-
-    -- TODO fix NaN
-    -- Quali TEST: OK
-    s_mi :: Acc (Matrix Double) -> Acc (Matrix Double)
-    s_mi m' = zipWith (\x y -> log (x / y)) (diagNull n m')
-            $ zipWith (/) (crossProduct n m') (total m')
-            -- crossProduct n m'
-
-
-    total :: Acc (Matrix Double) -> Acc (Matrix Double)
-    total = replicate (constant (Z :. n :. n)) . sum . sum
-
-    n :: Dim
-    n = dim m
-
-rIJ :: (Elt a, Ord a, P.Fractional (Exp a), P.Num a)
-    => Dim -> Acc (Matrix a) -> Acc (Matrix a)
-rIJ n m = matMaxMini $ divide a b
-  where
-    a = sumRowMin n m
-    b = sumColMin n m
-
-- * For Tests (to be removed)
-- | Test perfermance with this matrix
-- TODO : add this in a benchmark folder
-{-
-distriTest :: Int -> Bool
-distriTest n = logDistributional m == distributional m
-  where
-    m = theMatrixInt n
-}

 -- * sparse utils

@@ -382,12 +179,6 @@ data Ext where
  Along1 :: Int -> Ext
  Along2 :: Int -> Ext

-along1 :: Int -> Ext
-along1 = Along1
-
-along2 :: Int -> Ext
-along2 = Along2
-
 type Delayed sh a = Exp sh -> Exp a

 data ExtArr sh a = ExtArr

--- a/src/Gargantext/Core/Methods/Similarities/Conditional.hs
+++ b/src/Gargantext/Core/Methods/Similarities/Conditional.hs
@@ -10,7 +10,6 @@ Portability : POSIX
 Motivation and definition of the @Conditional@ distance.
 -}

-{-# LANGUAGE BangPatterns      #-}
 {-# LANGUAGE Strict            #-}

 module Gargantext.Core.Methods.Similarities.Conditional