Commit f73244c4 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'dev-refact-graph' into dev

parents df8144ca 0ce69ad8
#!/bin/bash
stack build --profile # --test # --haddock
stack build # --profile # --test # --haddock
#!/bin/bash
stack install --profile # --test --haddock
stack install #--profile # --test --haddock
......@@ -149,7 +149,7 @@ computeGraph cId d nt repo = do
-- TODO split diagonal
myCooc <- Map.filter (>1)
<$> getCoocByNgrams (Diagonal False)
<$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
......
......@@ -10,7 +10,6 @@ Portability : POSIX
This module aims at implementig distances of terms context by context is
the same referential of corpus.
Implementation use Accelerate library which enables GPU and CPU computation:
* Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
......@@ -31,6 +30,7 @@ Implementation use Accelerate library which enables GPU and CPU computation:
module Gargantext.Viz.Graph.Distances.Matrice
where
import qualified Data.Foldable as P (foldl1)
import Debug.Trace (trace)
import Data.Array.Accelerate
import Data.Array.Accelerate.Interpreter (run)
......@@ -138,10 +138,11 @@ divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All))
-- [ 0.0, 4.0, 7.0,
-- 0.0, 5.0, 8.0,
-- 0.0, 6.0, 9.0]
matMiniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
matMiniMax m = map (\x -> ifThenElse (x > miniMax') x 0) (transpose m)
matMiniMax :: (Elt a, Ord a, P.Num a) => Acc (Matrix a) -> Acc (Matrix a)
matMiniMax m = filterWith' miniMax' (constant 0) m
where
miniMax' = (the $ minimum $ maximum m)
miniMax' = the $ minimum $ maximum m
-- | Filters the matrix with a constant
--
......@@ -151,10 +152,19 @@ matMiniMax m = map (\x -> ifThenElse (x > miniMax') x 0) (transpose m)
-- 0.0, 0.0, 8.0,
-- 0.0, 6.0, 9.0]
filter' :: Double -> Acc (Matrix Double) -> Acc (Matrix Double)
filter' t m = map (\x -> ifThenElse (x > (constant t)) x 0) (transpose m)
filter' t m = filterWith t 0 m
filterWith :: Double -> Double -> Acc (Matrix Double) -> Acc (Matrix Double)
filterWith t v m = map (\x -> ifThenElse (x > (constant t)) x (constant v)) (transpose m)
filterWith' :: (Elt a, Ord a) => Exp a -> Exp a -> Acc (Matrix a) -> Acc (Matrix a)
filterWith' t v m = map (\x -> ifThenElse (x > t) x v) m
-----------------------------------------------------------------------
-- * Measures of proximity
-- * Metrics of proximity
-----------------------------------------------------------------------
-- ** Conditional distance
......@@ -162,10 +172,10 @@ filter' t m = map (\x -> ifThenElse (x > (constant t)) x 0) (transpose m)
-- | Conditional distance (basic version)
--
-- 2 main measures are actually implemented in order to compute the
-- 2 main metrics are actually implemented in order to compute the
-- proximity of two terms: conditional and distributional
--
-- Conditional measure is an absolute measure which reflects
-- Conditional metric is an absolute metric which reflects
-- interactions of 2 terms in the corpus.
measureConditional :: Matrix Int -> Matrix Double
--measureConditional m = run (matMiniMax $ matProba (dim m) $ map fromIntegral $ use m)
......@@ -178,7 +188,7 @@ measureConditional m = run $ matProba (dim m)
-- | Conditional distance (advanced version)
--
-- The conditional measure P(i|j) of 2 terms @i@ and @j@, also called
-- The conditional metric P(i|j) of 2 terms @i@ and @j@, also called
-- "confidence" , is the maximum probability between @i@ and @j@ to see
-- @i@ in the same context of @j@ knowing @j@.
--
......@@ -210,12 +220,12 @@ conditional' m = ( run $ ie $ map fromIntegral $ use m
-----------------------------------------------------------------------
-- ** Distributional Distance
-- | Distributional Distance Measure
-- | Distributional Distance metric
--
-- Distributional measure is a relative measure which depends on the
-- Distributional metric is a relative metric which depends on the
-- selected list, it represents structural equivalence of mutual information.
--
-- The distributional measure P(c) of @i@ and @j@ terms is: \[
-- The distributional metric P(c) of @i@ and @j@ terms is: \[
-- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
-- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}>0}^{}} \]
--
......@@ -235,10 +245,10 @@ conditional' m = ( run $ ie $ map fromIntegral $ use m
-- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
--
distributional :: Matrix Int -> Matrix Double
distributional m = run {- -- $ matMiniMax
-- $ ri
-- $ myMin
-}
distributional m = run -- $ matMiniMax
$ diagNull n
$ rIJ n
$ filterWith 0 100
$ filter' 0
$ s_mi
$ map fromIntegral
......@@ -246,18 +256,16 @@ distributional m = run {- -- $ matMiniMax
$ use m
{- push matrix in Accelerate type -}
where
-- filter m = zipWith (\a b -> max a b) m (transpose m)
{-
ri :: Acc (Matrix Double) -> Acc (Matrix Double)
ri mat = mat1 -- zipWith (/) mat1 mat2
_ri :: Acc (Matrix Double) -> Acc (Matrix Double)
_ri mat = mat1 -- zipWith (/) mat1 mat2
where
mat1 = matSumCol n $ zipWith min' (myMin mat) (myMin $ transpose mat)
mat2 = total mat
myMin :: Acc (Matrix Double) -> Acc (Matrix Double)
myMin = replicate (constant (Z :. n :. All)) . minimum
mat1 = matSumCol n $ zipWith min (_myMin mat) (_myMin $ filterWith 0 100 $ diagNull n $ transpose mat)
_mat2 = total mat
_myMin :: Acc (Matrix Double) -> Acc (Matrix Double)
_myMin = replicate (constant (Z :. n :. All)) . minimum
-}
-- TODO fix NaN
-- Quali TEST: OK
......@@ -282,17 +290,87 @@ identityMatrix n =
permute const zeros (\(unindex1 -> i) -> index2 i i) ones
eyeMatrix :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
eyeMatrix n' _m =
eyeMatrix :: Num a => Dim -> Acc (Matrix a)
eyeMatrix n' =
let ones = fill (index2 n n) 1
zeros = fill (index1 n) 0
n = constant n'
in
permute const ones (\(unindex1 -> i) -> index2 i i) zeros
-- | TODO use Lenses
data Direction = MatCol (Exp Int) | MatRow (Exp Int) | Diag
nullOf :: Num a => Dim -> Direction -> Acc (Matrix a)
nullOf n' dir =
let ones = fill (index2 n n) 1
zeros = fill (index2 n n) 0
n = constant n'
in
permute const ones ( lift1 ( \(Z :. (i :: Exp Int) :. (_j:: Exp Int))
-> case dir of
MatCol m -> (Z :. i :. m)
MatRow m -> (Z :. m :. i)
Diag -> (Z :. i :. i)
)
)
zeros
nullOfWithDiag :: Num a => Dim -> Direction -> Acc (Matrix a)
nullOfWithDiag n dir = zipWith (*) (nullOf n dir) (nullOf n Diag)
rIJ' :: Matrix Int -> Matrix Double
rIJ' m = run $ sumRowMin (dim m) m'
where
m' = (map fromIntegral $ use m)
rIJ :: (Elt a, Ord a, P.Fractional (Exp a), P.Num a)
=> Dim -> Acc (Matrix a) -> Acc (Matrix a)
rIJ n m = matMiniMax $ divide a b
where
a = sumRowMin n m
b = sumColMin n m
divide :: (Elt a, Ord a, P.Fractional (Exp a), P.Num a)
=> Acc (Matrix a) -> Acc (Matrix a) -> Acc (Matrix a)
divide = zipWith divide'
where
divide' a b = ifThenElse (b > (constant 0))
(a / b)
(constant 0)
selfMatrix :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
selfMatrix n' _m =
-- | Nominator
sumRowMin :: (Num a, Ord a) => Dim -> Acc (Matrix a) -> Acc (Matrix a)
sumRowMin n m = {-trace (P.show $ run m') $-} m'
where
m' = reshape (shape m) vs
vs = P.foldl1 (++)
$ P.map (\z -> sumRowMin1 n (constant z) m) [0..n-1]
sumRowMin1 :: (Num a, Ord a) => Dim -> Exp Int -> Acc (Matrix a) -> Acc (Vector a)
sumRowMin1 n x m = trace (P.show (run m,run $ transpose m)) $ m''
where
m'' = sum $ zipWith min (transpose m) m
_m' = zipWith (*) (zipWith (*) (nullOf n (MatCol x)) $ nullOfWithDiag n (MatRow x)) m
-- | Denominator
sumColMin :: (Num a, Ord a) => Dim -> Acc (Matrix a) -> Acc (Matrix a)
sumColMin n m = reshape (shape m) vs
where
vs = P.foldl1 (++)
$ P.map (\z -> sumColMin1 n (constant z) m) [0..n-1]
sumColMin1 :: (Num a) => Dim -> Exp Int -> Acc (Matrix a) -> Acc (Matrix a)
sumColMin1 n x m = zipWith (*) (nullOfWithDiag n (MatCol x)) m
{- | WIP fun with indexes
selfMatrix :: Num a => Dim -> Acc (Matrix a)
selfMatrix n' =
let zeros = fill (index2 n n) 0
ones = fill (index2 n n) 1
n = constant n'
......@@ -304,24 +382,24 @@ selfMatrix n' _m =
)) zeros
selfMatrix' :: (Elt a, P.Num (Exp a)) => Array DIM2 a -> Matrix a
selfMatrix' m' = run $ selfMatrix n m
selfMatrix' m' = run $ selfMatrix n
where
n = dim m'
m = use m'
-}
-------------------------------------------------
diagNull :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
diagNull n m = zipWith (*) m eye
where
eye = eyeMatrix n m
eye = eyeMatrix n
-------------------------------------------------
crossProduct :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
crossProduct n m = trace (P.show (run m',run m'')) $ zipWith (*) m' m''
crossProduct n m = {-trace (P.show (run m',run m'')) $-} zipWith (*) m' m''
where
m' = cross n m
m'' = cross n (transpose m)
m'' = transpose $ cross n m
crossT :: Matrix Double -> Matrix Double
crossT = run . transpose . use
......@@ -448,7 +526,40 @@ p_ m = zipWith (/) m (n_ m)
-- * For Tests (to be removed)
-- | Test perfermance with this matrix
-- TODO : add this in a benchmark folder
distriTest :: Matrix Double
distriTest = distributional $ matrix 100 [1..]
distriTest :: Int -> Matrix Double
distriTest n = distributional (theMatrix n)
theMatrix :: Int -> Matrix Int
theMatrix n = matrix n (dataMatrix n)
where
dataMatrix :: Int -> [Int]
dataMatrix x | (P.==) x 2 = [ 1, 1
, 1, 2
]
| (P.==) x 3 = [ 1, 1, 2
, 1, 2, 3
, 2, 3, 4
]
| (P.==) x 4 = [ 1, 1, 2, 3
, 1, 2, 3, 4
, 2, 3, 4, 5
, 3, 4, 5, 6
]
| P.otherwise = P.undefined
{-
theResult :: Int -> Matrix Double
theResult n | (P.==) n 2 = let r = 1.6094379124341003 in [ 0, r, r, 0]
| P.otherwise = [ 1, 1 ]
-}
colMatrix :: Elt e
=> Int -> [e] -> Acc (Array ((Z :. Int) :. Int) e)
colMatrix n ns = replicate (constant (Z :. (n :: Int) :. All)) v
where
v = use $ vector (P.length ns) ns
-----------------------------------------------------------------------
......@@ -60,7 +60,9 @@ cooc2graph distance threshold myCooc = do
let
(ti, _) = createIndices myCooc
myCooc' = toIndex ti myCooc
matCooc = map2mat 0 (Map.size ti) $ Map.filter (> 1) myCooc'
matCooc = map2mat 0 (Map.size ti)
$ Map.filterWithKey (\(a,b) _ -> a /= b)
$ Map.filter (> 1) myCooc'
distanceMat = measure distance matCooc
distanceMap = Map.filter (> threshold) $ mat2map distanceMat
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment