{-| Module : Gargantext.Core.Viz.Graph.Tools Description : Tools to build Graph Copyright : (c) CNRS, 2017-Present License : AGPL + CECILL v3 Maintainer : team@gargantext.org Stability : experimental Portability : POSIX -} {-# LANGUAGE BangPatterns, ScopedTypeVariables #-} module Gargantext.Core.Viz.Graph.Tools where import Data.Aeson import Data.HashMap.Strict (HashMap) import Data.Map.Strict (Map) import Data.Maybe (fromMaybe) import Data.Swagger hiding (items) import GHC.Float (sin, cos) import GHC.Generics (Generic) import Gargantext.API.Ngrams.Types (NgramsTerm(..)) import Gargantext.Core.Methods.Similarities (Similarity(..), measure) -- import Gargantext.Core.Methods.Similarities.Conditional (conditional) import Gargantext.Core.Statistics import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId, {-recursiveClustering,-} recursiveClustering', setNodes2clusterNodes) import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..)) import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass') import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap) import Gargantext.Core.Viz.Graph.Types (Attributes(..), Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..)) import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter) import Gargantext.Prelude import Graph.Types (ClusterNode) import IGraph.Random -- (Gen(..)) import Test.QuickCheck (elements) import Test.QuickCheck.Arbitrary import qualified Data.HashMap.Strict as HashMap import qualified Data.List as List import qualified Data.Map.Strict as Map import qualified Data.Set as Set import qualified Data.HashSet as HashSet import qualified Data.Text as Text import qualified Data.Vector.Storable as Vec import qualified Graph.BAC.ProxemyOptim as BAC import qualified IGraph as Igraph import qualified IGraph.Algorithms.Layout as Layout data PartitionMethod = Spinglass | Confluence | Infomap deriving (Generic, Eq, Ord, Enum, Bounded, Show) instance FromJSON PartitionMethod instance ToJSON PartitionMethod instance ToSchema PartitionMethod instance Arbitrary PartitionMethod where arbitrary = elements [ minBound .. maxBound ] data BridgenessMethod = BridgenessMethod_Basic | BridgenessMethod_Advanced deriving (Generic, Eq, Ord, Enum, Bounded, Show) instance FromJSON BridgenessMethod instance ToJSON BridgenessMethod instance ToSchema BridgenessMethod instance Arbitrary BridgenessMethod where arbitrary = elements [ minBound .. maxBound ] ------------------------------------------------------------- defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode] -- defaultClustering x = pure $ BAC.defaultClustering x defaultClustering x = spinglass 1 x ------------------------------------------------------------- type Threshold = Double cooc2graph' :: Ord t => Similarity -> Double -> Map (t, t) Int -> Map (Index, Index) Double cooc2graph' distance threshold myCooc = Map.filter (> threshold) $ mat2map $ measure distance $ case distance of Conditional -> map2mat Triangle 0 tiSize Distributional -> map2mat Square 0 tiSize $ Map.filter (> 1) myCooc' where (ti, _) = createIndices myCooc tiSize = Map.size ti myCooc' = toIndex ti myCooc -- coocurrences graph computation cooc2graphWith :: PartitionMethod -> BridgenessMethod -> MultiPartite -> Similarity -> Threshold -> Strength -> HashMap (NgramsTerm, NgramsTerm) Int -> IO Graph cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1) cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x) cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2") --cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2") -- TODO: change these options, or make them configurable in UI? cooc2graphWith' :: Partitions -> BridgenessMethod -> MultiPartite -> Similarity -> Threshold -> Strength -> HashMap (NgramsTerm, NgramsTerm) Int -> IO Graph cooc2graphWith' _doPartitions _bridgenessMethod multi similarity threshold strength myCooc = do let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc distanceMap `seq` diag `seq` ti `seq` return () partitions <- if (Map.size distanceMap > 0) then recursiveClustering' (spinglass' 1) distanceMap else panic $ Text.unlines [ "I can not compute the graph you request" , "because either the quantity of documents" , "or the quantity of terms" , "are lacking. " , "Solution: add more either Documents or Map Terms to your analysis. " , "Follow the available tutorials on the Training EcoSystems. " , "Ask your co-users of GarganText how to have access to it." ] length partitions `seq` return () let !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True !bridgeness' = bridgeness (Bridgeness_Recursive partitions 1.0 similarity) distanceMap pure $ data2graph multi ti diag bridgeness' confluence' (setNodes2clusterNodes $ List.concat partitions) {- cooc2graphWith' _doPartitions _bridgenessMethod multi similarity@Distributional threshold strength myCooc = do let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc distanceMap `seq` diag `seq` ti `seq` return () partitions <- if (Map.size distanceMap > 0) then recursiveClustering (spinglass 1) distanceMap else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty" , "Maybe you should add more Map Terms in your list" , "Tutorial: TODO" ] length partitions `seq` return () let !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True !bridgeness' = bridgeness (Bridgeness_Basic partitions 1.0) distanceMap pure $ data2graph multi ti diag bridgeness' confluence' partitions -} type Reverse = Bool doSimilarityMap :: Similarity -> Threshold -> Strength -> HashMap (NgramsTerm, NgramsTerm) Int -> ( Map (Int,Int) Double , Map (Index, Index) Int , Map NgramsTerm Index ) doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti) where myCooc' = Map.fromList $ HashMap.toList myCooc (_diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y) $ Map.fromList $ HashMap.toList myCooc (ti, _it) = createIndices theMatrix tiSize = Map.size ti similarities = (\m -> m `seq` m) $ (\m -> m `seq` measure Conditional m) $ (\m -> m `seq` map2mat Square 0 tiSize m) $ theMatrix `seq` toIndex ti theMatrix links = round (let n :: Double = fromIntegral (Map.size ti) in 10 * n * (log n)^(2::Int)) distanceMap = Map.fromList $ List.take links $ (if strength == Weak then List.reverse else identity) $ List.sortOn snd $ Map.toList $ Map.filter (> threshold) -- $ conditional myCooc $ similarities `seq` mat2map similarities doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti) where -- TODO remove below (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y) $ Map.fromList $ HashMap.toList myCooc (ti, _it) = createIndices theMatrix tiSize = Map.size ti similarities = (\m -> m `seq` m) $ (\m -> m `seq` measure Distributional m) $ (\m -> m `seq` map2mat Square 0 tiSize m) $ theMatrix `seq` toIndex ti theMatrix links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int)) distanceMap = Map.fromList $ List.take links $ (if strength == Weak then List.reverse else identity) $ List.sortOn snd $ Map.toList $ edgesFilter $ (\m -> m `seq` Map.filter (> threshold) m) $ similarities `seq` mat2map similarities ---------------------------------------------------------- -- | From data to Graph type Occurrences = Int nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t = if HashSet.member t s1 then t1 else t2 data2graph :: MultiPartite -> Map NgramsTerm Int -> Map (Int, Int) Occurrences -> Map (Int, Int) Double -> Map (Int, Int) Double -> [ClusterNode] -> Graph data2graph multi labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes , _graph_edges = edges , _graph_metadata = Nothing } where nodes = map (setCoord ForceAtlas labels bridge) [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences) , node_type = nodeTypeWith multi label , node_id = (cs . show) n , node_label = unNgramsTerm label , node_x_coord = 0 , node_y_coord = 0 , node_attributes = Attributes { clust_default = fromMaybe 0 (Map.lookup n community_id_by_node_id) } , node_children = [] } ) | (label, n) <- labels , Set.member n toKeep ] (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge edges = [ Edge { edge_source = cs (show s) , edge_hidden = Nothing , edge_target = cs (show t) , edge_weight = weight , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf , edge_id = cs (show i) } | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge' , s /= t , weight > 0 ] community_id_by_node_id = Map.fromList $ map nodeId2comId partitions labels = Map.toList labels' ------------------------------------------------------------------------ data Layout = KamadaKawai | ACP | ForceAtlas setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y } where (x,y) = f i -- | ACP setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node setCoord l labels m (n,node) = node { node_x_coord = x , node_y_coord = y } where (x,y) = getCoord l labels m n getCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> Int -> (Double, Double) getCoord KamadaKawai _ _m _n = undefined -- layout m n getCoord ForceAtlas _ _ n = (sin d, cos d) where d = fromIntegral n getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity $ Map.lookup n $ pcaReduceTo (Dimension 2) $ mapArray labels m where to2d :: Vec.Vector Double -> (Double, Double) to2d v = (x',y') where ds = take 2 $ Vec.toList v x' = head' "to2d" ds y' = last' "to2d" ds mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double) mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ] where ns = map snd items toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double) toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns') ------------------------------------------------------------------------ -- | KamadaKawai Layout -- TODO TEST: check labels, nodeId and coordinates layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double) layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord where coord :: (Map Int (Double,Double)) coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen) --p = Layout.defaultLGL p = Layout.kamadaKawai g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m ----------------------------------------------------------------------------- -- MISC Tools cooc2graph'' :: Ord t => Similarity -> Double -> Map (t, t) Int -> Map (Index, Index) Double cooc2graph'' distance threshold myCooc = neighbourMap where (ti, _) = createIndices myCooc myCooc' = toIndex ti myCooc matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc' distanceMat = measure distance matCooc neighbourMap = filterByNeighbours threshold $ mat2map distanceMat -- Quentin filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double filterByNeighbours threshold distanceMap = filteredMap where indexes :: [Index] indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap filteredMap :: Map (Index, Index) Double filteredMap = Map.fromList $ List.concat $ map (\idx -> let selected = List.reverse $ List.sortOn snd $ Map.toList $ Map.filter (> 0) $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap in List.take (round threshold) selected ) indexes