Conditional.hs 4.17 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
{-|
Module      : Gargantext.Graph.Distances.Conditional
Description : 
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

Motivation and definition of the @Conditional@ distance.
-}

{-# LANGUAGE BangPatterns      #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts  #-}
{-# LANGUAGE Strict            #-}
17
module Gargantext.Viz.Graph.Distances.Conditional
18 19 20 21
  where

import Data.Matrix hiding (identity)

22
import Data.List (sortOn)
23 24 25 26 27 28 29 30 31

import Data.Map (Map)
import qualified Data.Map as M

import qualified Data.Set as S

import qualified Data.Vector as V

import Gargantext.Prelude
32
import Gargantext.Viz.Graph.Utils
33 34 35 36 37 38 39 40

------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Optimisation issue

toBeOptimized :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
toBeOptimized m = proba Col m

41
------------------------------------------------------------------------
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
-- | Main Functions
-- Compute the probability from axis
-- x' = x / (sum Col x)
proba :: (Num a, Fractional a) => Axis -> Matrix a -> Matrix a
proba a m = mapOn a (\c x -> x / V.sum (axis a c m)) m


mapOn :: Axis -> (AxisId -> a -> a) -> Matrix a -> Matrix a
mapOn a f m = V.foldl' f'  m (V.enumFromTo 1 (nOf a m))
  where
    f' m' c = mapOnly a f c m'

mapOnly :: Axis -> (AxisId -> a -> a) -> AxisId -> Matrix a -> Matrix a
mapOnly Col = mapCol
mapOnly Row = mapRow

mapAll :: (a -> a) -> Matrix a -> Matrix a
mapAll f m = mapOn Col (\_ -> f) m


---------------------------------------------------------------
-- | Compute a distance from axis
-- xs = (sum Col x') - x'
distFromSum :: (Num a, Fractional a)
         => Axis -> Matrix a -> Matrix a
distFromSum  a m = mapOn a (\c x -> V.sum (axis a c m) - x) m
---------------------------------------------------------------
---------------------------------------------------------------
-- | To compute included/excluded or specific/generic scores
opWith  :: (Fractional a1, Num a1)
         => (Matrix a2 -> t -> Matrix a1) -> Matrix a2 -> t -> Matrix a1
opWith op xs ys = mapAll (\x -> x / (2*n -1)) (xs `op` ys)
  where
    n = fromIntegral $ nOf Col xs
---------------------------------------------------------------


79 80
-------------------------------------------------------
conditional :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
81
conditional m = filterMat (threshold m') m'
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
  where
------------------------------------------------------------------------
    -- | Main Operations
    -- x' = x / (sum Col x)
    x' = proba Col m

------------------------------------------------------------------------
    -- xs = (sum Col x') - x'
    xs = distFromSum Col x'
    -- ys = (sum Row x') - x'
    ys = distFromSum Row x'

------------------------------------------------------------------------
--  | Top included or excluded
    ie = opWith (+) xs ys
--  ie = ( xs + ys) / (2 * (x.shape[0] - 1))

--  | Top specific or generic
    sg = opWith (-) xs ys
--  sg = ( xs - ys) / (2 * (x.shape[0] - 1))
    
    nodes_kept :: [Int]
    nodes_kept = take k' $ S.toList
                         $ foldl' (\s (n1,n2) -> insert [n1,n2] s) S.empty
                         $ map fst
                         $ nodes_included k <> nodes_specific k

    nodes_included n = take n $ sortOn snd $ toListsWithIndex ie
110
    nodes_specific n = take n $ sortOn snd $ toListsWithIndex sg
111 112 113 114 115
    insert as s = foldl' (\s' a -> S.insert a s') s as
    k' = 2*k
    k = 10
    
    dico_nodes :: Map Int Int
116 117
    dico_nodes     = M.fromList $ zip ([1..] :: [Int]) nodes_kept
    --dico_nodes_rev = M.fromList $ zip nodes_kept [1..]
118 119 120 121 122

    m' = matrix (length nodes_kept) 
                (length nodes_kept) 
                (\(i,j) -> getElem ((M.!) dico_nodes i) ((M.!) dico_nodes j) x')

123
    threshold m'' = V.minimum $ V.map (\cId -> V.maximum $ getCol cId m'') (V.enumFromTo 1 (nOf Col m''))
124
    
125
    filterMat t m''  = mapAll (\x -> filter' t x) m''
126
      where
127
        filter' t' x = case (x >= t') of
128 129 130 131
                        True  -> x
                        False -> 0

------------------------------------------------------------------------