{-| Module : Gargantext.Core.Viz.Graph.Tools Description : Tools to build Graph Copyright : (c) CNRS, 2017-Present License : AGPL + CECILL v3 Maintainer : team@gargantext.org Stability : experimental Portability : POSIX -} {-# OPTIONS_GHC -fno-warn-deprecations #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE ScopedTypeVariables #-} module Gargantext.Core.Viz.Graph.Tools where import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict qualified as HashMap import Data.HashSet qualified as HashSet import Data.List qualified as List import Data.Map.Strict qualified as Map import Data.Set qualified as Set import Data.Swagger ( ToSchema ) import Data.Text qualified as Text import Data.Vector.Storable qualified as Vec import Gargantext.API.Ngrams.Types (NgramsTerm(..)) import Gargantext.Core.Methods.Similarities (Similarity(..), measure) import Gargantext.Core.Statistics ( pcaReduceTo, Dimension(Dimension) ) import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId, {-recursiveClustering,-} recursiveClustering', setNodes2clusterNodes) import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..)) import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass') import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap) import Gargantext.Core.Viz.Graph.Types (Attributes(..), Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..)) import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter) import Gargantext.Prelude import Graph.BAC.ProxemyOptim qualified as BAC import Graph.Types (ClusterNode) import IGraph qualified as Igraph import IGraph.Algorithms.Layout qualified as Layout import IGraph.Random ( Gen ) -- (Gen(..)) import Test.QuickCheck (elements) import Test.QuickCheck.Arbitrary ( Arbitrary(arbitrary) ) data PartitionMethod = Spinglass | Confluence | Infomap deriving (Generic, Eq, Ord, Enum, Bounded, Show) instance FromJSON PartitionMethod instance ToJSON PartitionMethod instance ToSchema PartitionMethod instance Arbitrary PartitionMethod where arbitrary = elements [ minBound .. maxBound ] data BridgenessMethod = BridgenessMethod_Basic | BridgenessMethod_Advanced deriving (Generic, Eq, Ord, Enum, Bounded, Show) instance FromJSON BridgenessMethod instance ToJSON BridgenessMethod instance ToSchema BridgenessMethod instance Arbitrary BridgenessMethod where arbitrary = elements [ minBound .. maxBound ] ------------------------------------------------------------- defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode] -- defaultClustering x = pure $ BAC.defaultClustering x defaultClustering x = spinglass 1 x ------------------------------------------------------------- type Threshold = Double cooc2graph' :: Ord t => Similarity -> Double -> Map (t, t) Int -> Map (Index, Index) Double cooc2graph' distance threshold myCooc = Map.filter (> threshold) $ mat2map $ measure distance $ case distance of Conditional -> map2mat Square 1 tiSize _ -> map2mat Square 0 tiSize $ Map.filter (> 1) myCooc' where (ti, _) = createIndices myCooc tiSize = Map.size ti myCooc' = toIndex ti myCooc -- coocurrences graph computation cooc2graphWith :: PartitionMethod -> BridgenessMethod -> MultiPartite -> Similarity -> Threshold -> Strength -> HashMap (NgramsTerm, NgramsTerm) Int -> IO Graph cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1) cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x) cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2") --cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2") -- TODO: change these options, or make them configurable in UI? cooc2graphWith' :: Partitions -> BridgenessMethod -> MultiPartite -> Similarity -> Threshold -> Strength -> HashMap (NgramsTerm, NgramsTerm) Int -> IO Graph cooc2graphWith' _doPartitions _bridgenessMethod multi similarity threshold strength myCooc = do let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc distanceMap `seq` diag `seq` ti `seq` pure () partitions <- if (Map.size distanceMap > 0) then recursiveClustering' (spinglass' 1) distanceMap else panic $ Text.unwords [ "I can not compute the graph you request" , "because either the quantity of documents" , "or the quantity of terms" , "are lacking." , "Solution: add more either Documents or Map Terms to your analysis." , "Follow the available tutorials on the Training EcoSystems." , "Ask your co-users of GarganText how to have access to it." ] length partitions `seq` pure () let !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True !bridgeness' = bridgeness (Bridgeness_Recursive partitions 1.0 similarity) distanceMap pure $ data2graph multi ti diag bridgeness' confluence' (setNodes2clusterNodes $ List.concat partitions) {- cooc2graphWith' _doPartitions _bridgenessMethod multi similarity@Distributional threshold strength myCooc = do let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc distanceMap `seq` diag `seq` ti `seq` pure () partitions <- if (Map.size distanceMap > 0) then recursiveClustering (spinglass 1) distanceMap else panic $ Text.unlines [ "[Gargantext.C.V.Graph.Tools] Similarity Matrix is empty" , "Maybe you should add more Map Terms in your list" , "Tutorial: TODO" ] length partitions `seq` pure () let !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True !bridgeness' = bridgeness (Bridgeness_Basic partitions 1.0) distanceMap pure $ data2graph multi ti diag bridgeness' confluence' partitions -} type Reverse = Bool doSimilarityMap :: Similarity -> Threshold -> Strength -> HashMap (NgramsTerm, NgramsTerm) Int -> ( Map (Int,Int) Double , Map (Index, Index) Int , Map NgramsTerm Index ) doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti) where myCooc' = Map.fromList $ HashMap.toList myCooc (_diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y) $ Map.fromList $ HashMap.toList myCooc (ti, _it) = createIndices theMatrix tiSize = Map.size ti similarities = (\m -> m `seq` m) $ (\m -> m `seq` measure Conditional m) $ (\m -> m `seq` map2mat Square 0 tiSize m) $ theMatrix `seq` toIndex ti theMatrix links = round (let n :: Double = fromIntegral (Map.size ti) in 10 * n * (log n)^(2::Int)) distanceMap = Map.fromList $ List.take links $ (if strength == Weak then List.reverse else identity) $ List.sortOn snd $ Map.toList $ Map.filter (> threshold) $ similarities `seq` mat2map similarities doSimilarityMap distriType threshold strength myCooc = (distanceMap, toIndex ti diag, ti) where -- TODO remove below (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y) $ Map.fromList $ HashMap.toList myCooc (ti, _it) = createIndices theMatrix tiSize = Map.size ti similarities = (\m -> m `seq` m) $ (\m -> m `seq` measure distriType m) $ (\m -> m `seq` map2mat Square 0 tiSize m) $ theMatrix `seq` toIndex ti theMatrix links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int)) distanceMap = Map.fromList $ List.take links $ (if strength == Weak then List.reverse else identity) $ List.sortOn snd $ Map.toList $ edgesFilter $ (\m -> m `seq` Map.filter (> threshold) m) $ similarities `seq` mat2map similarities ---------------------------------------------------------- -- | From data to Graph type Occurrences = Int nodeTypeWith :: MultiPartite -> NgramsTerm -> NgramsType nodeTypeWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t = if HashSet.member t s1 then t1 else t2 data2graph :: MultiPartite -> Map NgramsTerm Int -> Map (Int, Int) Occurrences -> Map (Int, Int) Double -> Map (Int, Int) Double -> [ClusterNode] -> Graph data2graph multi labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes , _graph_edges = edges , _graph_metadata = Nothing } where nodes = map (setCoord ForceAtlas labels bridge) [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences) , node_type = nodeTypeWith multi label , node_id = show n , node_label = unNgramsTerm label , node_x_coord = 0 , node_y_coord = 0 , node_attributes = Attributes { clust_default = fromMaybe 0 (Map.lookup n community_id_by_node_id) } , node_children = [] } ) | (label, n) <- labels , Set.member n toKeep ] (bridge', toKeep) = nodesFilter (\v -> v > 1) bridge edges = [ Edge { edge_source = show s , edge_hidden = Nothing , edge_target = show t , edge_weight = weight , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf , edge_id = show i } | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge' , s /= t , weight > 0 ] community_id_by_node_id = Map.fromList $ map nodeId2comId partitions labels = Map.toList labels' ------------------------------------------------------------------------ data Layout = KamadaKawai | ACP | ForceAtlas setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y } where (x,y) = f i -- | ACP setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node setCoord l labels m (n,node) = node { node_x_coord = x , node_y_coord = y } where (x,y) = getCoord l labels m n getCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> Int -> (Double, Double) getCoord KamadaKawai _ _m _n = undefined -- layout m n getCoord ForceAtlas _ _ n = (sin d, cos d) where d = fromIntegral n getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity $ Map.lookup n $ pcaReduceTo (Dimension 2) $ mapArray labels m where to2d :: Vec.Vector Double -> (Double, Double) to2d v = (x',y') where ds = take 2 $ Vec.toList v x' = head' "to2d" ds y' = last' "to2d" ds mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double) mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ] where ns = map snd items toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double) toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns') ------------------------------------------------------------------------ -- | KamadaKawai Layout -- TODO TEST: check labels, nodeId and coordinates layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double) layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord where coord :: (Map Int (Double,Double)) coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen) --p = Layout.defaultLGL p = Layout.kamadaKawai g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m ----------------------------------------------------------------------------- -- MISC Tools cooc2graph'' :: Ord t => Similarity -> Double -> Map (t, t) Int -> Map (Index, Index) Double cooc2graph'' distance threshold myCooc = neighbourMap where (ti, _) = createIndices myCooc myCooc' = toIndex ti myCooc matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc' distanceMat = measure distance matCooc neighbourMap = filterByNeighbours threshold $ mat2map distanceMat -- Quentin filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double filterByNeighbours threshold distanceMap = filteredMap where indexes :: [Index] indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap filteredMap :: Map (Index, Index) Double filteredMap = Map.fromList $ List.concat $ map (\idx -> let selected = List.reverse $ List.sortOn snd $ Map.toList $ Map.filter (> 0) $ Map.filterWithKey (\(from', _) _ -> idx == from') distanceMap in List.take (round threshold) selected ) indexes