Tools.hs 11.1 KB
Newer Older
1
{-|
2
Module      : Gargantext.Core.Viz.Graph.Tools
3 4 5 6 7 8 9 10 11
Description : Tools to build Graph
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}

12 13
{-# LANGUAGE ScopedTypeVariables #-}

14
module Gargantext.Core.Viz.Graph.Tools
15 16
  where

17
import Data.HashMap.Strict (HashMap)
18
import Data.Map (Map)
Alexandre Delanoë's avatar
Alexandre Delanoë committed
19
import Data.Maybe (fromMaybe)
20 21 22
import GHC.Float (sin, cos)
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Methods.Distances (Distance(..), measure)
Alexandre Delanoë's avatar
Alexandre Delanoë committed
23
import Gargantext.Core.Methods.Distances.Conditional (conditional)
24
import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
25
import Gargantext.Core.Statistics
26
import Gargantext.Core.Viz.Graph
27
import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
28
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
29
import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
30
import Gargantext.Core.Viz.Graph.Types (ClusterNode)
Alexandre Delanoë's avatar
Alexandre Delanoë committed
31
import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
32
import Gargantext.Prelude
33
import IGraph.Random -- (Gen(..))
34
import qualified Data.HashMap.Strict      as HashMap
35 36 37 38 39
import qualified Data.List                as List
import qualified Data.Map                 as Map
import qualified Data.Set                 as Set
import qualified Data.Vector.Storable     as Vec
import qualified IGraph                   as Igraph
40
import qualified IGraph.Algorithms.Layout as Layout
41

42

43 44
-------------------------------------------------------------
defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
45 46
-- defaultClustering x = pure $ BAC.defaultClustering x
defaultClustering x = spinglass 1 x
47 48 49

-------------------------------------------------------------
type Threshold = Double
50

51

52 53
cooc2graph' :: Ord t => Distance
                     -> Double
54 55
                     -> Map (t, t) Int
                     -> Map (Index, Index) Double
56 57 58 59 60
cooc2graph' distance threshold myCooc
    = Map.filter (> threshold)
    $ mat2map
    $ measure distance
    $ case distance of
61 62
        Conditional    -> map2mat Triangle 0 tiSize
        Distributional -> map2mat Square   0 tiSize
63 64 65 66
    $ Map.filter (> 1) myCooc'

     where
        (ti, _) = createIndices myCooc
67
        tiSize  = Map.size ti
68 69
        myCooc' = toIndex ti myCooc

70

71 72
data PartitionMethod = Louvain | Spinglass
-- TODO Bac
73

74
-- coocurrences graph computation
75 76 77 78 79
cooc2graphWith :: PartitionMethod
               -> Distance
               -> Threshold
               -> HashMap (NgramsTerm, NgramsTerm) Int
               -> IO Graph
80
cooc2graphWith Louvain   = undefined
81
cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
82
-- cooc2graphWith Bac       = cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
83

84

85 86 87 88 89 90 91 92
cooc2graphWith' :: ToComId a
               => Partitions a
               -> Distance
               -> Threshold
               -> HashMap (NgramsTerm, NgramsTerm) Int
               -> IO Graph
cooc2graphWith' doPartitions distance threshold myCooc = do
  let
93
    (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
94

Alexandre Delanoë's avatar
Alexandre Delanoë committed
95
{- -- Debug
96
  saveAsFileDebug "debug/distanceMap" distanceMap
97
  printDebug "similarities" similarities
98
-}
99

Alexandre Delanoë's avatar
Alexandre Delanoë committed
100 101 102
  partitions <- if (Map.size distanceMap > 0)
      then doPartitions distanceMap
      else panic "Text.Flow: DistanceMap is empty"
103

104
  let
Alexandre Delanoë's avatar
Alexandre Delanoë committed
105 106 107 108 109
    nodesApprox :: Int
    nodesApprox = n'
      where
        (as, bs) = List.unzip $ Map.keys distanceMap
        n' = Set.size $ Set.fromList $ as <> bs
110
    bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
111
    confluence' = confluence (Map.keys bridgeness') 3 True False
Alexandre Delanoë's avatar
Alexandre Delanoë committed
112

Alexandre Delanoë's avatar
Alexandre Delanoë committed
113
  pure $ data2graph ti diag bridgeness' confluence' partitions
114 115 116 117 118 119 120 121 122


doDistanceMap :: Distance
              -> Threshold
              -> HashMap (NgramsTerm, NgramsTerm) Int
              -> ( Map (Int,Int) Double
                 , Map (Index, Index) Int
                 , Map NgramsTerm Index
                 )
123
doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
124 125 126 127 128 129 130 131 132
  where
    -- TODO remove below
    (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
                      $ Map.fromList
                      $ HashMap.toList myCooc

    (ti, _it) = createIndices theMatrix
    tiSize  = Map.size ti

133
{-
134 135 136 137 138
    matCooc = case distance of  -- Shape of the Matrix
                Conditional    -> map2mat Triangle 0 tiSize
                Distributional -> map2mat Square   0 tiSize
            $ toIndex ti theMatrix
    similarities = measure distance matCooc
139 140 141 142 143 144
-}

    similarities = measure Distributional
                 $ map2mat Square 0 tiSize
                 $ toIndex ti theMatrix

Alexandre Delanoë's avatar
Alexandre Delanoë committed
145
    links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
146 147 148

    distanceMap = Map.fromList
                $ List.take links
Alexandre Delanoë's avatar
Alexandre Delanoë committed
149
                $ List.reverse
150 151
                $ List.sortOn snd
                $ Map.toList
152
                $ edgesFilter
153 154 155
                $ Map.filter (> threshold)
                $ mat2map similarities

Alexandre Delanoë's avatar
Alexandre Delanoë committed
156
doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
157 158 159
  where
    myCooc' = Map.fromList $ HashMap.toList myCooc
    (ti, _it) = createIndices myCooc'
Alexandre Delanoë's avatar
Alexandre Delanoë committed
160
    tiSize  = Map.size ti
161

Alexandre Delanoë's avatar
Alexandre Delanoë committed
162
    links = round (let n :: Double = fromIntegral tiSize in n * log n)
163 164 165

    distanceMap = toIndex ti
                $ Map.fromList
Alexandre Delanoë's avatar
Alexandre Delanoë committed
166 167
                $ List.take links
                $ List.sortOn snd
168
                $ HashMap.toList
Alexandre Delanoë's avatar
Alexandre Delanoë committed
169
                $ HashMap.filter (> threshold)
170
                $ conditional myCooc
171

172 173
----------------------------------------------------------
-- | From data to Graph
174

Alexandre Delanoë's avatar
Alexandre Delanoë committed
175
type Occurrences      = Int
176

177
data2graph :: ToComId a 
Alexandre Delanoë's avatar
Alexandre Delanoë committed
178 179
           => Map NgramsTerm Int
           -> Map (Int, Int) Occurrences
180 181
           -> Map (Int, Int) Double
           -> Map (Int, Int) Double
182
           -> [a]
183
           -> Graph
Alexandre Delanoë's avatar
Alexandre Delanoë committed
184 185 186 187
data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
                                                             , _graph_edges = edges
                                                             , _graph_metadata = Nothing
                                                             }
188
  where
189

190
    nodes = map (setCoord ForceAtlas labels bridge)
191 192 193
          [ (n, Node { node_size    = maybe 0 identity (Map.lookup (n,n) occurences)
                     , node_type    = Terms -- or Unknown
                     , node_id      = cs (show n)
Alexandre Delanoë's avatar
Alexandre Delanoë committed
194
                     , node_label   = unNgramsTerm l
195 196
                     , node_x_coord = 0
                     , node_y_coord = 0
Alexandre Delanoë's avatar
Alexandre Delanoë committed
197 198 199
                     , node_attributes = Attributes { clust_default = fromMaybe 0
                                                       (Map.lookup n community_id_by_node_id)
                                                    }
200
                     , node_children = [] }
201 202
               )
            | (l, n) <- labels
Alexandre Delanoë's avatar
Alexandre Delanoë committed
203
            , Set.member n nodesWithScores
204 205
            ]

206
    edges = [ Edge { edge_source = cs (show s)
207
                       , edge_target = cs (show t)
208
                       , edge_weight = weight
209
                       , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
210 211
                       , edge_id     = cs (show i)
                   }
Alexandre Delanoë's avatar
Alexandre Delanoë committed
212
            | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
213 214 215
            , s /= t
            , weight > 0
            ]
216

Alexandre Delanoë's avatar
Alexandre Delanoë committed
217 218 219 220 221 222 223 224 225 226
    community_id_by_node_id = Map.fromList
                            $ map nodeId2comId partitions

    labels = Map.toList labels'

    nodesWithScores = Set.fromList
                     $ List.concat
                     $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
                     $ Map.toList bridge

227

228 229 230 231 232 233 234
------------------------------------------------------------------------

data Layout = KamadaKawai | ACP | ForceAtlas


setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
235
  where
236 237
    (x,y) = f i

238

239
-- | ACP
240 241 242 243 244 245
setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
setCoord l labels m (n,node) = node { node_x_coord = x
                                    , node_y_coord = y
                                    }
  where
    (x,y) = getCoord l labels m n
246

247

248 249 250 251 252 253
getCoord :: Ord a
         => Layout
         -> [(a, Int)]
         -> Map (Int, Int) Double
         -> Int
         -> (Double, Double)
254
getCoord KamadaKawai _ _m _n = undefined -- layout m n
255

256
getCoord ForceAtlas _ _ n = (sin d, cos d)
257
  where
258
    d = fromIntegral n
259

260
getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
             $ Map.lookup n
             $ pcaReduceTo (Dimension 2)
             $ mapArray labels m
  where
    to2d :: Vec.Vector Double -> (Double, Double)
    to2d v  = (x',y')
      where
        ds = take 2 $ Vec.toList v
        x'  = head' "to2d" ds
        y'  = last' "to2d" ds

    mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
    mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
      where
        ns = map snd items
276 277 278

    toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
    toVec n' ns' m' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m') ns')
279 280 281
------------------------------------------------------------------------

-- | KamadaKawai Layout
282
-- TODO TEST: check labels, nodeId and coordinates
283 284
layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
285
  where
286 287
    coord :: (Map Int (Double,Double))
    coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
288
    --p = Layout.defaultLGL
289
    p = Layout.kamadaKawai
290
    g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
291

292
-----------------------------------------------------------------------------
293 294 295 296 297 298 299 300 301 302
-- MISC Tools
cooc2graph'' :: Ord t => Distance
                      -> Double
                      -> Map (t, t) Int
                      -> Map (Index, Index) Double
cooc2graph'' distance threshold myCooc = neighbourMap
  where
    (ti, _) = createIndices myCooc
    myCooc' = toIndex ti myCooc
    matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
Alexandre Delanoë's avatar
Alexandre Delanoë committed
303
    distanceMat  = measure distance matCooc
304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
    neighbourMap = filterByNeighbours threshold
                 $ mat2map distanceMat

-- Quentin
filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
filterByNeighbours threshold distanceMap = filteredMap
  where 
    indexes :: [Index]
    indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
    filteredMap :: Map (Index, Index) Double
    filteredMap = Map.fromList
                $ List.concat 
                $ map (\idx -> 
                          let selected = List.reverse
                                       $ List.sortOn snd
                                       $ Map.toList 
                                       $ Map.filter (> 0)
                                       $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
                           in List.take (round threshold) selected
                      ) indexes




328