[TestFlow] seems good, need to add tests on it and fix distributional distance.

72a1cfff · Alexandre Delanoë · bb989318 · 72a1cfff · 72a1cfff · 72a1cfff
Commit 72a1cfff authored Aug 27, 2018 by Alexandre Delanoë
9 changed files
--- a/package.yaml
+++ b/package.yaml
@@ -50,8 +50,12 @@ library:
  - Gargantext.Text.Parsers.WOS
  - Gargantext.Text.Search
  - Gargantext.Text.Terms
+  - Gargantext.Text.Terms.Mono
+  - Gargantext.Text.Terms.Multi.Lang.En
+  - Gargantext.Text.Terms.Multi.Lang.Fr
  - Gargantext.Text.Terms.WithList
  - Gargantext.TextFlow
+  - Gargantext.Viz.Graph
  - Gargantext.Viz.Graph.Distances.Matrice
  - Gargantext.Viz.Graph.Index
  dependencies:

--- a/src/Gargantext/API/Settings.hs
+++ b/src/Gargantext/API/Settings.hs
-{-|PI/Application.hs
-API/Count.hs
-API/FrontEnd.hs
-API/Node.hs
-API/Auth.hs
-API.hs
-Database/NodeNodeNgram.hs
-Database/User.hs
-Database/Queries.hs
-
+{-| 
 Module      : Gargantext.API.Settings
 Description : Settings of the API (Server and Client)
 Copyright   : (c) CNRS, 2017-Present

--- a/src/Gargantext/Database.hs
+++ b/src/Gargantext/Database.hs
 {-|
 Module      : Gargantext.Database
-Description : Main commands of BASHQL a Domain Specific Language to deal with Gargantext Database.
+Description : BASHQL to deal with Gargantext Database.
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
 Maintainer  : team@gargantext.org
 Stability   : experimental
 Portability : POSIX

+* BASHQL is a Domain Specific Language to deal with the Database
+
 * BASHQL = functional (Bash * SQL)

 * Which language to chose when working with a database ? To make it
@@ -42,6 +44,7 @@ write Haskell bash translations.
 - FileSystem is now a NodeSystem where each File is a Node in a Directed Graph (DG).

 * References
+
 [0] MIT Press has published "Category theory for the sciences". The book
 can also be purchased on Amazon. Here are reviews by the MAA, by the
 AMS, and by SIAM.
@@ -64,6 +67,7 @@ module Gargantext.Database ( module Gargantext.Database.Utils
                           , del , del'
                           , tree, tree'
                           , postCorpus, postAnnuaire
+                           , Connection
                           )
    where


--- a/src/Gargantext/Database/Node.hs
+++ b/src/Gargantext/Database/Node.hs
@@ -59,6 +59,9 @@ import qualified Data.Profunctor.Product as PP
 -- | Types for Node Database Management
 data PGTSVector

+------------------------------------------------------------------------
+type CorpusId = Int
+------------------------------------------------------------------------

 instance FromField HyperdataCorpus where
    fromField = fromField'

--- a/src/Gargantext/Text/Examples.hs
+++ b/src/Gargantext/Text/Examples.hs
@@ -11,9 +11,9 @@ This file is intended for these purposes:

 - documentation for teaching and research
 - learn basics of Haskell which is a scientific programming language
- behavioral tests (that should be completed with uni-tests and scale-tests
+- behavioral tests (that should be completed with uni-tests and scale-tests)

-This documents defines basic of Text definitions according to Gargantext..
+This document defines basic of Text definitions according to Gargantext..

 - What is a term ?
 - What is a sentence ?
@@ -104,7 +104,18 @@ ex_occ = occurrences <$> L.concat <$> ex_terms
 ex_cooc :: IO (Map (Label, Label) Int)
 ex_cooc = cooc <$> ex_terms

-- | Tests 
+-- | Tests the specificity and genericity
+--
+-- >>> ex_cooc_mat
+-- (fromList [(["glass"],0),(["spoon"],1),(["table"],2),(["wine"],3)],Matrix (Z :. 4 :. 4) 
+--   [ 4, 0, 0, 0,
+--     1, 2, 0, 0,
+--     3, 2, 4, 0,
+--     3, 1, 2, 3],Matrix (Z :. 4 :. 4) 
+--   [ 1.0, 0.25, 0.75, 0.75,
+--     0.0,  1.0,  1.0,  0.5,
+--     0.0,  0.0,  1.0,  0.5,
+--     0.0,  0.0,  0.0,  1.0],(Vector (Z :. 4) [0.5833333333333334,0.5833333333333334,0.75,0.5833333333333334],Vector (Z :. 4) [-0.5833333333333334,-0.4166666666666667,0.41666666666666674,0.5833333333333334]))
 ex_cooc_mat :: IO (Map Label Index, Matrix Int, Matrix Double, (DAA.Vector InclusionExclusion, DAA.Vector SpecificityGenericity))
 ex_cooc_mat = do
  m <- ex_cooc

--- a/src/Gargantext/Text/Metrics.hs
+++ b/src/Gargantext/Text/Metrics.hs
@@ -36,7 +36,7 @@ import qualified Data.Array.Accelerate as DAA

 import GHC.Real (round)

--import Debug.Trace
+import Debug.Trace (trace)

 data MapListSize   = MapListSize   Int
 data InclusionSize = InclusionSize Int
@@ -51,19 +51,19 @@ data FilterConfig = FilterConfig { fc_mapListSize   :: MapListSize
                                 , fc_defaultValue  :: DefaultValue
                             }

-filterCooc :: Ord t => FilterConfig -> Map (t, t) Int -> Map (t, t) Int
+filterCooc :: (Show t, Ord t) => FilterConfig -> Map (t, t) Int -> Map (t, t) Int
 filterCooc fc cc = (filterCooc' fc) ts cc
  where
    ts     = map _scored_terms $ takeSome fc $ coocScored cc

-filterCooc' :: Ord t => FilterConfig -> [t] -> Map (t, t) Int -> Map (t, t) Int
-filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m = -- trace ("coocScored " <> show (length ts)) $
+filterCooc' :: (Show t, Ord t) => FilterConfig -> [t] -> Map (t, t) Int -> Map (t, t) Int
+filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m = trace ("coocScored " <> show ts) $
  foldl' (\m' k -> M.insert k (maybe dv identity $ M.lookup k m) m')
    M.empty selection
  where
    selection  = [(x,y) | x <- ts
                        , y <- ts
-                       -- , x >= y
+                      --  , x >= y
                        ]


@@ -75,7 +75,7 @@ filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m = -- trace ("coocScore
 takeSome :: Ord t => FilterConfig -> [Scored t] -> [Scored t]
 takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Clusters _) _) scores = L.take l
                    $ takeSample n m
-                    $ L.take l' $ sortWith (Down . _scored_incExc) scores
+                    $ L.take l' $ reverse $ sortWith (Down . _scored_incExc) scores
                    -- splitKmeans k scores
  where
    -- TODO: benchmark with accelerate-example kmeans version
@@ -95,10 +95,10 @@ takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Cluste
                                 $ sortWith (Down . _scored_speGen) xs


-data Scored t = Scored { _scored_terms  :: !t
-                       , _scored_incExc :: !InclusionExclusion
-                       , _scored_speGen :: !SpecificityGenericity
-                     } deriving (Show)
+data Scored ts = Scored { _scored_terms :: !ts
+                        , _scored_incExc :: !InclusionExclusion
+                        , _scored_speGen :: !SpecificityGenericity
+                        } deriving (Show)

 -- TODO in the textflow we end up needing these indices, it might be better
 -- to compute them earlier and pass them around.

--- a/src/Gargantext/Text/Terms/Mono/Token/En.hs
+++ b/src/Gargantext/Text/Terms/Mono/Token/En.hs
@@ -17,7 +17,6 @@ module Gargantext.Text.Terms.Mono.Token.En
    ( EitherList(..)
    , Tokenizer
    , tokenize
-    , run
    , defaultTokenizer
    , whitespace
    , uris
@@ -64,8 +63,6 @@ import Gargantext.Prelude
 --    ,"Hyphen-words"
 --    ,"Yes/No questions"
 --    ]
---
-
 type Tokenizer =  Text -> EitherList Text Text

 -- | The EitherList is a newtype-wrapped list of Eithers.

--- a/src/Gargantext/TextFlow.hs
+++ b/src/Gargantext/TextFlow.hs
@@ -21,15 +21,22 @@ import GHC.IO (FilePath)
 import qualified Data.Text as T
 import Data.Text.IO (readFile)

+import Data.Maybe (catMaybes)
+import qualified Data.Set as DS

 import qualified Data.Array.Accelerate as A
 import qualified Data.Map.Strict as M
 ----------------------------------------------
+import Gargantext.Database (Connection)
+
+import Gargantext.Database.Node
+import Gargantext.Core.Types.Node
+
 import Gargantext.Core (Lang)
 import Gargantext.Prelude

 import Gargantext.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map)
-import Gargantext.Viz.Graph.Distances.Matrice (distributional)
+import Gargantext.Viz.Graph.Distances.Matrice (distributional, measureConditional)
 import Gargantext.Viz.Graph (Graph(..), data2graph)
 import Gargantext.Text.Metrics.Count (cooc)
 import Gargantext.Text.Metrics (filterCooc, FilterConfig(..), Clusters(..), SampleBins(..), DefaultValue(..), MapListSize(..), InclusionSize(..))
@@ -38,7 +45,7 @@ import Gargantext.Text.Context (splitBy, SplitContext(Sentences))

 import Gargantext.Text.Parsers.CSV

-import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
+import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain, l_community_id)

 {-
  ____                             _            _
@@ -50,11 +57,27 @@ import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
 -}


+contextText :: [T.Text]
+contextText = ["The dog is an animal."
+              ,"The bird is an animal."
+              ,"The bird is an animal."
+              ,"The bird and the dog are an animal."
+              ,"The table is an object."
+              ,"The pen is an object."
+              ,"This object is a pen or a table?"
+              ,"The girl has a human body."
+              ,"The girl has a human body."
+              ,"The boy has a human body."
+              ,"The boy has a human body."
+              ]
+
+
+
 data TextFlow = CSV FilePath
              | FullText FilePath
              | Contexts [T.Text]
-              | SQL Int
-              | Database T.Text
+              | DB Connection CorpusId
+              | Query T.Text
                -- ExtDatabase Query
                -- IntDatabase NodeId

@@ -64,6 +87,7 @@ textFlow termType workType = do
                FullText path -> splitBy (Sentences 5) <$> readFile path
                CSV      path -> readCsvOn [csv_title, csv_abstract] path
                Contexts ctxt -> pure ctxt
+                SQL con corpusId -> catMaybes <$> map (\n -> hyperdataDocumentV3_title (node_hyperdata n)  <> hyperdataDocumentV3_abstract (node_hyperdata n))<$> getDocumentsV3WithParentId con corpusId
                _             -> undefined

  textFlow' termType contexts
@@ -78,53 +102,61 @@ textFlow' termType contexts = do
  -- TermsType = Mono | Multi | MonoMulti
  -- myterms # filter (\t -> not . elem t stopList)
  --         # groupBy (Stem|GroupList|Ontology)
+  printDebug "terms" myterms
  printDebug "myterms" (sum $ map length myterms)

  -- Bulding the map list
  -- compute copresences of terms, i.e. cooccurrences of terms in same context of text
  -- Cooc = Map (Term, Term) Int
  let myCooc1 = cooc myterms
-  printDebug "myCooc1" (M.size myCooc1)
+  printDebug "myCooc1 size" (M.size myCooc1)

  -- Remove Apax: appears one time only => lighting the matrix
-  let myCooc2 = M.filter (>1) myCooc1
-  printDebug "myCooc2" (M.size myCooc2)
+  let myCooc2 = M.filter (>0) myCooc1
+  printDebug "myCooc2 size" (M.size myCooc2)
+  printDebug "myCooc2" myCooc2
+

  -- Filtering terms with inclusion/Exclusion and Specificity/Genericity scores
-  let myCooc3 = filterCooc ( FilterConfig (MapListSize    100 )
-                                          (InclusionSize  900 )
+  let myCooc3 = filterCooc ( FilterConfig (MapListSize    350 )
+                                          (InclusionSize  500 )
                                          (SampleBins      10 )
                                          (Clusters         3 )
                                          (DefaultValue     0 )
                           ) myCooc2
-  printDebug "myCooc3" $ M.size myCooc3
-  -- putStrLn $ show myCooc3
+  printDebug "myCooc3 size" $ M.size myCooc3
+  printDebug "myCooc3" myCooc3

  -- Cooc -> Matrix
  let (ti, _) = createIndices myCooc3
-  printDebug "ti" $ M.size ti
+  printDebug "ti size" $ M.size ti
+  printDebug "ti" ti

  let myCooc4 = toIndex ti myCooc3
-  printDebug "myCooc4" $ M.size myCooc4
+  printDebug "myCooc4 size" $ M.size myCooc4
+  printDebug "myCooc4" myCooc4

  let matCooc = map2mat (0) (M.size ti) myCooc4
+  printDebug "matCooc shape" $ A.arrayShape matCooc
  printDebug "matCooc" matCooc
-  
+
  -- Matrix -> Clustering
-  --let distanceMat = conditional' matCooc
-  let distanceMat = distributional matCooc
-  printDebug "distanceMat" $ A.arrayShape distanceMat
+  let distanceMat = measureConditional matCooc
+  --let distanceMat = distributional matCooc
+  printDebug "distanceMat shape" $ A.arrayShape distanceMat
  printDebug "distanceMat" distanceMat
 --
-  let distanceMap = mat2map distanceMat
-  printDebug "distanceMap" $ M.size distanceMap
+  --let distanceMap = M.filter (>0) $ mat2map distanceMat
+  let distanceMap = M.map (\n -> 1) $ M.filter (>0) $ mat2map distanceMat
+  printDebug "distanceMap size" $ M.size distanceMap
+  printDebug "distanceMap" distanceMap

 --  let distance = fromIndex fi distanceMap
 --  printDebug "distance" $ M.size distance

  partitions <- cLouvain distanceMap
 -- Building : -> Graph -> JSON
-  printDebug "partitions" $ length partitions
+  printDebug "partitions" $ DS.size $ DS.fromList $ map (l_community_id) partitions
  --printDebug "partitions" partitions
  pure $ data2graph (M.toList ti) myCooc4 distanceMap partitions


--- a/src/Gargantext/Viz/Graph/Distances/Matrice.hs
+++ b/src/Gargantext/Viz/Graph/Distances/Matrice.hs
@@ -7,15 +7,12 @@ Maintainer  : team@gargantext.org
 Stability   : experimental
 Portability : POSIX

+This module aims at implementig distances of terms context by context is
+the same referential of corpus.

-2 main measures are actually implemented in order to compute the proximity of two terms.

- Conditional measure is an absolute measure which reflects interactions of 2 terms in the corpus.
- Distributional measure is a relative measure which depends on the selected list, it represents structural equivalence.
+Implementation use Accelerate library which enables GPU and CPU computation:

-Motivation and definition of the @Conditional@ distance.
-
-Implementation use Accelerate library :
  * Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
    [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
    In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
@@ -50,21 +47,27 @@ import qualified Gargantext.Prelude as P


 -----------------------------------------------------------------------
-- | Test perf.
-distriTest :: Matrix Double
-distriTest = distributional $ myMat 100
-----------------------------------------------------------------------
-
+-- | Define a vector
+--
+-- >>> vector 3
+-- Vector (Z :. 3) [0,1,2]
 vector :: Int -> (Array (Z :. Int) Int)
 vector n = fromList (Z :. n) [0..n]

+-- | Define a matrix
+--
+-- >>> matrix 3 ([1..] :: [Double])
+-- Matrix (Z :. 3 :. 3)
+--   [ 1.0, 2.0, 3.0,
+--     4.0, 5.0, 6.0,
+--     7.0, 8.0, 9.0]
 matrix :: Elt c => Int -> [c] -> Matrix c
 matrix n l = fromList (Z :. n :. n) l

-myMat :: Int -> Matrix Int
-myMat n = matrix n [1..]
-
 -- | Two ways to get the rank (as documentation)
+--
+-- >>> rank (matrix 3 ([1..] :: [Int]))
+-- 2
 rank :: (Matrix a) -> Int
 rank m = arrayRank $ arrayShape m

@@ -73,6 +76,10 @@ rank m = arrayRank $ arrayShape m
 -- How to force use with SquareMatrix ?
 type Dim = Int

+-- | Get Dimension of a square Matrix
+--
+-- >>> dim (matrix 3 ([1..] :: [Int]))
+-- 3
 dim :: Matrix a -> Dim
 dim m = n
  where
@@ -80,34 +87,100 @@ dim m = n
    -- indexTail (arrayShape m)

 -----------------------------------------------------------------------
-proba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
-proba r mat = zipWith (/) mat (mkSum r mat)

-mkSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
-mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
+-- | Sum of a Matrix by Column
+--
+-- >>> run $ matSum 3 (use $ matrix 3 [1..])
+-- Matrix (Z :. 3 :. 3)
+--   [ 12.0, 15.0, 18.0,
+--     12.0, 15.0, 18.0,
+--     12.0, 15.0, 18.0]
+matSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
+matSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose mat

-- | divByDiag 
+
+-- | Proba computes de probability matrix: all cells divided by thee sum of its column
+-- if you need get the probability on the lines, just transpose it
+--
+-- >>> run $ matProba 3 (use $ matrix 3 [1..])
+-- Matrix (Z :. 3 :. 3)
+--   [ 8.333333333333333e-2, 0.13333333333333333, 0.16666666666666666,
+--       0.3333333333333333,  0.3333333333333333,  0.3333333333333333,
+--       0.5833333333333334,  0.5333333333333333,                 0.5]
+matProba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
+matProba r mat = zipWith (/) mat (matSum r mat)
+
+-- | Diagonal of the matrix
+--
+-- >>> run $ diag (use $ matrix 3 ([1..] :: [Int]))
+-- Vector (Z :. 3) [1,5,9]
+diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
+diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m
+
+
+-- | Divide by the Diagonal of the matrix
+--
+-- >>> run $ divByDiag 3 (use $ matrix 3 ([1..] :: [Double]))
+-- Matrix (Z :. 3 :. 3)
+--   [ 1.0, 0.4, 0.3333333333333333,
+--     4.0, 1.0, 0.6666666666666666,
+--     7.0, 1.6,                1.0]
 divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
 divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
-  where
-    diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
-    diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m
-----------------------------------------------------------------------

-miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
-miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
+-----------------------------------------------------------------------
+-- | Filters the matrix with the minimum of maximums
+--
+-- >>> run $ matMiniMax $ use $ matrix 3 [1..]
+-- Matrix (Z :. 3 :. 3)
+--   [ 0.0, 4.0, 7.0,
+--     0.0, 5.0, 8.0,
+--     0.0, 6.0, 9.0]
+matMiniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
+matMiniMax m = map (\x -> ifThenElse (x > miniMax') x 0) (transpose m)
  where
    miniMax' = (the $ minimum $ maximum m)

+-- | Filters the matrix with a constant
+--
+-- >>> run $ matFilter 5 $ use $ matrix 3 [1..]
+-- Matrix (Z :. 3 :. 3)
+--   [ 0.0, 0.0, 7.0,
+--     0.0, 0.0, 8.0,
+--     0.0, 6.0, 9.0]
+matFilter :: Double -> Acc (Matrix Double) -> Acc (Matrix Double)
+matFilter t m = map (\x -> ifThenElse (x > (constant t)) x 0) (transpose m)
+
+-----------------------------------------------------------------------
+-- * Measures of proximity
+-----------------------------------------------------------------------
+-- ** Conditional distance
+
+-- *** Conditional distance (basic)
+
 -- | Conditional distance (basic version)
-conditional :: Matrix Int -> Matrix Double
-conditional m = run (miniMax $ proba (dim m) $ map fromIntegral $ use m)
+--
+-- 2 main measures are actually implemented in order to compute the
+-- proximity of two terms: conditional and distributional
+--
+-- Conditional measure is an absolute measure which reflects
+-- interactions of 2 terms in the corpus.
+measureConditional :: Matrix Int -> Matrix Double
+--measureConditional m = run (matMiniMax $ matProba (dim m) $ map fromIntegral $ use m)
+measureConditional m = run (matProba (dim m) $ map fromIntegral $ use m)


+-- *** Conditional distance (advanced)
+
 -- | Conditional distance (advanced version)
-- The conditional measure \[P_c\] of 2 terms @i@ and @j@, also called "confidence"
-- , is the maximum probability between @i@ and @j@. If \[n_i\] (resp.
-- \[n_j\]) is the number of occurrences of @i@ (resp. @j@) in the corpus and _[n_{ij}\] the number of its occurrences we get:
+--
+-- The conditional measure P(i|j) of 2 terms @i@ and @j@, also called
+-- "confidence" , is the maximum probability between @i@ and @j@ to see
+-- @i@ in the same context of @j@ knowing @j@.
+--
+-- If N(i) (resp. N(j)) is the number of occurrences of @i@ (resp. @j@)
+-- in the corpus and _[n_{ij}\] the number of its occurrences we get:
+--
 -- \[P_c=max(\frac{n_i}{n_{ij}},\frac{n_j}{n_{ij}} )\]
 conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
 conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
@@ -124,21 +197,26 @@ conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegr
    r = dim m

    xs :: Acc (Matrix Double) -> Acc (Matrix Double)
-    xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
+    xs mat = zipWith (-) (matSum r $ matProba r mat) (matProba r mat)
    ys :: Acc (Matrix Double) -> Acc (Matrix Double)
-    ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
+    ys mat = zipWith (-) (matSum r $ transpose $ matProba r mat) (matProba r mat)

 -----------------------------------------------------------------------
+-- ** Distributional Distance

-- | Distributional Distance
-- The distributional measure \[P_c\] of @i@ and @j@ terms is:
-- \[ S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik}, MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}}^{}}
-- \]
+-- | Distributional Distance Measure
+--
+-- Distributional measure is a relative measure which depends on the
+-- selected list, it represents structural equivalence.
+--
+-- The distributional measure \[P_c\] of @i@ and @j@ terms is: \[
+-- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
+-- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}}^{}} \]
 --
 -- Mutual information
 -- \[S{MI}({i},{j}) = \log(\frac{C{ij}}{E{ij}})\]
 --
-- Number of cooccurrences of @i@ and @j@ in the same context of text 
+-- Number of cooccurrences of @i@ and @j@ in the same context of text
 --                        \[C{ij}\]
 --
 -- The expected value of the cooccurrences
@@ -147,7 +225,7 @@ conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegr
 -- Total cooccurrences of @i@ term
 --            \[N_{i} = \sum_{i}^{} S_{i}\]
 distributional :: Matrix Int -> Matrix Double
-distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
+distributional m = run $ matMiniMax $ ri (map fromIntegral $ use m)
  where
    n    = dim m
    
@@ -155,8 +233,8 @@ distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
    
    ri mat = zipWith (/) mat1 mat2
      where
-        mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
-        mat2 = mkSum n mat
+        mat1 = matSum n $ zipWith min (mi mat) (mi $ transpose mat)
+        mat2 = matSum n mat
    
    mi    m'  = zipWith (\a b -> max (log $ a/b) 0)  m'
              $ zipWith (/) (crossProduct m') (total m')
@@ -164,10 +242,12 @@ distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
    total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
    
    crossProduct m''' = zipWith (*) (cross m'''  ) (cross (transpose m'''))
-    cross mat      = zipWith (-) (mkSum n mat) (mat)
+    cross mat      = zipWith (-) (matSum n mat) (mat)

 -----------------------------------------------------------------------
 -----------------------------------------------------------------------
+-- * Specificity and Genericity
+
 {- | Metric Specificity and genericity: select terms

 - let N termes and occurrences of i \[N{i}\]
@@ -181,7 +261,10 @@ distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
 - \[Inclusion (i) = Gen(i) + Spec(i)\)
 - \[GenericityScore = Gen(i)- Spec(i)\]

- References: Science mapping with asymmetrical paradigmatic proximity Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276, arXiv:0803.2315 [cs.OH]
+- References: Science mapping with asymmetrical paradigmatic proximity
+Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted
+on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276,
+arXiv:0803.2315 [cs.OH]
 -}
 type InclusionExclusion    = Double
 type SpecificityGenericity = Double
@@ -217,7 +300,8 @@ incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)


 -- | P(i|j) = Nij /N(jj) Probability to get i given j
-p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
+--p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
+p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (Matrix e) -> Acc (Matrix e)
 p_ij m = zipWith (/) m (n_jj m)
  where
    n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
@@ -255,4 +339,10 @@ p_ m = zipWith (/) m (n_ m)
                         ) m
 -}

+-- * For Tests (to be removed)
+-- | Test perfermance with this matrix
+-- TODO : add this in a benchmark folder
+distriTest :: Matrix Double
+distriTest = distributional $ matrix 100 [1..]
+-----------------------------------------------------------------------