Commit b074d137 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[CLEAN/MV] Graph Tools.

parent a26bdc84
......@@ -58,7 +58,7 @@ import Gargantext.Database.Types.Node
import Gargantext.Database.Types.Node (CorpusId, ContactId)
import Gargantext.Database.Utils -- (Cmd, CmdM)
import Gargantext.Prelude
import Gargantext.Text.Flow (cooc2graph)
import Gargantext.Viz.Graph.Tools (cooc2graph)
import Gargantext.Viz.Graph hiding (Node)-- (Graph(_graph_metadata),LegendField(..), GraphMetadata(..),readGraphFromJson,defaultGraph)
import Servant
import Test.QuickCheck (elements)
......
......@@ -11,40 +11,19 @@ From text to viz, all the flow of texts in Gargantext.
-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Flow
where
--import qualified Data.Array.Accelerate as A
--import qualified Data.Set as DS
--import Control.Monad.Reader
import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
import Data.Map.Strict (Map)
--import Data.Maybe (catMaybes)
import qualified Data.Map.Strict as Map
import qualified Data.Text as T
import Data.Text (Text)
--import Data.Text.IO (readFile)
import Database.PostgreSQL.Simple (Connection)
import GHC.IO (FilePath)
--import Gargantext.Core (Lang)
import Gargantext.Core.Types (CorpusId)
--import Gargantext.Database.Schema.Node
--import Gargantext.Database.Types.Node
import Gargantext.Prelude
--import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
--import Gargantext.Text.Metrics (filterCooc, FilterConfig(..), Clusters(..), SampleBins(..), DefaultValue(..), MapListSize(..), InclusionSize(..))
--import Gargantext.Text.Metrics.Count (coocOn)
--import Gargantext.Text.Parsers.CSV
--import Gargantext.Text.Terms (TermType, extractTerms)
import Gargantext.Viz.Graph (Graph(..), data2graph)
import Gargantext.Viz.Graph.Bridgeness (bridgeness)
import Gargantext.Viz.Graph.Distances.Matrice (measureConditional)
--import Gargantext.Viz.Graph.Distances.Matrice (distributional)
import Gargantext.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map)
{-
____ _ _
/ ___| __ _ _ __ __ _ __ _ _ __ | |_ _____ _| |_
......@@ -115,53 +94,3 @@ textFlow' termType contexts = do
pure g
-}
-- TODO use Text only here instead of [Text]
cooc2graph :: (Map (Text, Text) Int) -> IO Graph
cooc2graph myCooc = do
--printDebug "myCooc" myCooc
-- Filtering terms with inclusion/Exclusion and Specificity/Genericity scores
{-
let myCooc3 = filterCooc ( FilterConfig (MapListSize 350 )
(InclusionSize 500 )
(SampleBins 10 )
(Clusters 3 )
(DefaultValue 0 )
) myCooc
--} --printDebug "myCooc3 size" $ M.size myCooc3
-- Cooc -> Matrix
let (ti, _) = createIndices myCooc
--printDebug "ti size" $ M.size ti
--printDebug "ti" ti
let myCooc4 = toIndex ti myCooc
--printDebug "myCooc4 size" $ M.size myCooc4
--printDebug "myCooc4" myCooc4
let matCooc = map2mat (0) (Map.size ti) myCooc4
--printDebug "matCooc shape" $ A.arrayShape matCooc
--printDebug "matCooc" matCooc
-- Matrix -> Clustering
let distanceMat = measureConditional matCooc
--let distanceMat = distributional matCooc
--printDebug "distanceMat shape" $ A.arrayShape distanceMat
--printDebug "distanceMat" distanceMat
--let distanceMap = M.filter (>0) $ mat2map distanceMat
let distanceMap = Map.map (\_ -> 1) $ Map.filter (>0) $ mat2map distanceMat
--printDebug "distanceMap size" $ M.size distanceMap
--printDebug "distanceMap" distanceMap
--let distance = fromIndex fi distanceMap
--printDebug "distance" $ M.size distance
partitions <- case Map.size distanceMap > 0 of
True -> cLouvain distanceMap
False -> panic "Text.Flow: DistanceMap is empty"
-- Building : -> Graph -> JSON
--printDebug "partitions" $ DS.size $ DS.fromList $ map (l_community_id) partitions
--printDebug "partitions" partitions
let distanceMap' = bridgeness 300 partitions distanceMap
pure $ data2graph (Map.toList ti) myCooc4 distanceMap' partitions
......@@ -16,35 +16,23 @@ Portability : POSIX
module Gargantext.Viz.Graph
where
------------------------------------------------------------------------
import Control.Lens (makeLenses)
import Control.Monad.IO.Class (MonadIO(liftIO))
import GHC.IO (FilePath)
import GHC.Generics (Generic)
import Data.Aeson.TH (deriveJSON)
import qualified Data.Aeson as DA
import Data.ByteString.Lazy as DBL (readFile, writeFile)
import Data.Text (Text, pack)
import qualified Text.Read as T
import qualified Data.Text as T
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import Data.Swagger
import Gargantext.Prelude
import Data.Text (Text, pack)
import GHC.Generics (Generic)
import GHC.IO (FilePath)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Types.Node (NodeId)
import Data.Graph.Clustering.Louvain.CplusPlus (LouvainNode(..))
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Gargantext.Prelude
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Data.Aeson as DA
import qualified Data.Text as T
import qualified Text.Read as T
------------------------------------------------------------------------
data TypeNode = Terms | Unknown
deriving (Show, Generic)
......@@ -124,13 +112,13 @@ instance ToSchema Graph where
defaultSchemaOptions {fieldLabelModifier = \fieldLabel -> drop 7 fieldLabel}
defaultGraph :: Graph
defaultGraph = Graph {_graph_nodes = [Node {node_size = 4, node_type = Terms, node_id = pack "0", node_label = pack "animal", node_attributes = Attributes {clust_default = 0}},Node {node_size = 3, node_type = Terms, node_id = pack "1", node_label = pack "bird", node_attributes = Attributes {clust_default = 0}},Node {node_size = 2, node_type = Terms, node_id = pack "2", node_label = pack "boy", node_attributes = Attributes {clust_default = 1}},Node {node_size = 2, node_type = Terms, node_id = pack "3", node_label = pack "dog", node_attributes = Attributes {clust_default = 0}},Node {node_size = 2, node_type = Terms, node_id = pack "4", node_label = pack "girl", node_attributes = Attributes {clust_default = 1}},Node {node_size = 4, node_type = Terms, node_id = pack "5", node_label = pack "human body", node_attributes = Attributes {clust_default = 1}},Node {node_size = 3, node_type = Terms, node_id = pack "6", node_label = pack "object", node_attributes = Attributes {clust_default = 2}},Node {node_size = 2, node_type = Terms, node_id = pack "7", node_label = pack "pen", node_attributes = Attributes {clust_default = 2}},Node {node_size = 2, node_type = Terms, node_id = pack "8", node_label = pack "table", node_attributes = Attributes {clust_default = 2}}], _graph_edges = [Edge {edge_source = pack "0", edge_target = pack "0", edge_weight = 1.0, edge_id = pack "0"},Edge {edge_source = pack "1", edge_target = pack "0", edge_weight = 1.0, edge_id = pack "1"},Edge {edge_source = pack "1", edge_target = pack "1", edge_weight = 1.0, edge_id = pack "2"},Edge {edge_source = pack "2", edge_target = pack "2", edge_weight = 1.0, edge_id = pack "3"},Edge {edge_source = pack "2", edge_target = pack "5", edge_weight = 1.0, edge_id = pack "4"},Edge {edge_source = pack "3", edge_target = pack "0", edge_weight = 1.0, edge_id = pack "5"},Edge {edge_source = pack "3", edge_target = pack "1", edge_weight = 1.0, edge_id = pack "6"},Edge {edge_source = pack "3", edge_target = pack "3", edge_weight = 1.0, edge_id = pack "7"},Edge {edge_source = pack "4", edge_target = pack "4", edge_weight = 1.0, edge_id = pack "8"},Edge {edge_source = pack "4", edge_target = pack "5", edge_weight = 1.0, edge_id = pack "9"},Edge {edge_source = pack "5", edge_target = pack "5", edge_weight = 1.0, edge_id = pack "10"},Edge {edge_source = pack "6", edge_target = pack "6", edge_weight = 1.0, edge_id = pack "11"},Edge {edge_source = pack "7", edge_target = pack "6", edge_weight = 1.0, edge_id = pack "12"},Edge {edge_source = pack "7", edge_target = pack "7", edge_weight = 1.0, edge_id = pack "13"},Edge {edge_source = pack "8", edge_target = pack "6", edge_weight = 1.0, edge_id = pack "14"},Edge {edge_source = pack "8", edge_target = pack "7", edge_weight = 1.0, edge_id = pack "15"},Edge {edge_source = pack "8", edge_target = pack "8", edge_weight = 1.0, edge_id = pack "16"}], _graph_metadata = Nothing}
-- | Intances for the mack
instance Arbitrary Graph where
arbitrary = elements $ [defaultGraph]
defaultGraph :: Graph
defaultGraph = Graph {_graph_nodes = [Node {node_size = 4, node_type = Terms, node_id = pack "0", node_label = pack "animal", node_attributes = Attributes {clust_default = 0}},Node {node_size = 3, node_type = Terms, node_id = pack "1", node_label = pack "bird", node_attributes = Attributes {clust_default = 0}},Node {node_size = 2, node_type = Terms, node_id = pack "2", node_label = pack "boy", node_attributes = Attributes {clust_default = 1}},Node {node_size = 2, node_type = Terms, node_id = pack "3", node_label = pack "dog", node_attributes = Attributes {clust_default = 0}},Node {node_size = 2, node_type = Terms, node_id = pack "4", node_label = pack "girl", node_attributes = Attributes {clust_default = 1}},Node {node_size = 4, node_type = Terms, node_id = pack "5", node_label = pack "human body", node_attributes = Attributes {clust_default = 1}},Node {node_size = 3, node_type = Terms, node_id = pack "6", node_label = pack "object", node_attributes = Attributes {clust_default = 2}},Node {node_size = 2, node_type = Terms, node_id = pack "7", node_label = pack "pen", node_attributes = Attributes {clust_default = 2}},Node {node_size = 2, node_type = Terms, node_id = pack "8", node_label = pack "table", node_attributes = Attributes {clust_default = 2}}], _graph_edges = [Edge {edge_source = pack "0", edge_target = pack "0", edge_weight = 1.0, edge_id = pack "0"},Edge {edge_source = pack "1", edge_target = pack "0", edge_weight = 1.0, edge_id = pack "1"},Edge {edge_source = pack "1", edge_target = pack "1", edge_weight = 1.0, edge_id = pack "2"},Edge {edge_source = pack "2", edge_target = pack "2", edge_weight = 1.0, edge_id = pack "3"},Edge {edge_source = pack "2", edge_target = pack "5", edge_weight = 1.0, edge_id = pack "4"},Edge {edge_source = pack "3", edge_target = pack "0", edge_weight = 1.0, edge_id = pack "5"},Edge {edge_source = pack "3", edge_target = pack "1", edge_weight = 1.0, edge_id = pack "6"},Edge {edge_source = pack "3", edge_target = pack "3", edge_weight = 1.0, edge_id = pack "7"},Edge {edge_source = pack "4", edge_target = pack "4", edge_weight = 1.0, edge_id = pack "8"},Edge {edge_source = pack "4", edge_target = pack "5", edge_weight = 1.0, edge_id = pack "9"},Edge {edge_source = pack "5", edge_target = pack "5", edge_weight = 1.0, edge_id = pack "10"},Edge {edge_source = pack "6", edge_target = pack "6", edge_weight = 1.0, edge_id = pack "11"},Edge {edge_source = pack "7", edge_target = pack "6", edge_weight = 1.0, edge_id = pack "12"},Edge {edge_source = pack "7", edge_target = pack "7", edge_weight = 1.0, edge_id = pack "13"},Edge {edge_source = pack "8", edge_target = pack "6", edge_weight = 1.0, edge_id = pack "14"},Edge {edge_source = pack "8", edge_target = pack "7", edge_weight = 1.0, edge_id = pack "15"},Edge {edge_source = pack "8", edge_target = pack "8", edge_weight = 1.0, edge_id = pack "16"}], _graph_metadata = Nothing}
-----------------------------------------------------------
-- V3 Gargantext Version
......@@ -160,29 +148,6 @@ data GraphV3 = GraphV3 { go_links :: [EdgeV3]
deriving (Show, Generic)
$(deriveJSON (unPrefix "go_") ''GraphV3)
----------------------------------------------------------
-- | From data to Graph
-- FIXME: distance should not be a map since we just "toList" it (same as cLouvain)
data2graph :: [(Text, Int)] -> Map (Int, Int) Int
-> Map (Int, Int) Double
-> [LouvainNode]
-> Graph
data2graph labels coocs distance partitions = Graph nodes edges Nothing
where
community_id_by_node_id = M.fromList [ (n, c) | LouvainNode n c <- partitions ]
nodes = [ Node { node_size = maybe 0 identity (M.lookup (n,n) coocs)
, node_type = Terms -- or Unknown
, node_id = cs (show n)
, node_label = l
, node_attributes =
Attributes { clust_default = maybe 0 identity
(M.lookup n community_id_by_node_id) } }
| (l, n) <- labels ]
edges = [ Edge { edge_source = cs (show s)
, edge_target = cs (show t)
, edge_weight = w
, edge_id = cs (show i) }
| (i, ((s,t), w)) <- zip ([0..]::[Integer]) (M.toList distance) ]
-----------------------------------------------------------
-----------------------------------------------------------
......
{-|
Module : Gargantext.Viz.Graph.Tools
Description : Tools to build Graph
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Viz.Graph.Tools
where
import Data.Graph.Clustering.Louvain.CplusPlus (LouvainNode(..))
import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
import Data.Map (Map)
import Data.Text (Text)
import Gargantext.Prelude
import Gargantext.Viz.Graph (Graph(..))
import Gargantext.Viz.Graph -- (Graph(..))
import Gargantext.Viz.Graph.Bridgeness (bridgeness)
import Gargantext.Viz.Graph.Distances.Matrice (measureConditional)
import Gargantext.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map)
import qualified Data.Map as Map
cooc2graph :: (Map (Text, Text) Int) -> IO Graph
cooc2graph myCooc = do
let (ti, _) = createIndices myCooc
myCooc4 = toIndex ti myCooc
matCooc = map2mat (0) (Map.size ti) myCooc4
distanceMat = measureConditional matCooc
distanceMap = Map.map (\_ -> 1) $ Map.filter (>0) $ mat2map distanceMat
partitions <- case Map.size distanceMap > 0 of
True -> cLouvain distanceMap
False -> panic "Text.Flow: DistanceMap is empty"
let distanceMap' = bridgeness 300 partitions distanceMap
pure $ data2graph (Map.toList ti) myCooc4 distanceMap' partitions
----------------------------------------------------------
-- | From data to Graph
-- FIXME: distance should not be a map since we just "toList" it (same as cLouvain)
data2graph :: [(Text, Int)] -> Map (Int, Int) Int
-> Map (Int, Int) Double
-> [LouvainNode]
-> Graph
data2graph labels coocs distance partitions = Graph nodes edges Nothing
where
community_id_by_node_id = Map.fromList [ (n, c) | LouvainNode n c <- partitions ]
nodes = [ Node { node_size = maybe 0 identity (Map.lookup (n,n) coocs)
, node_type = Terms -- or Unknown
, node_id = cs (show n)
, node_label = l
, node_attributes =
Attributes { clust_default = maybe 0 identity
(Map.lookup n community_id_by_node_id) } }
| (l, n) <- labels ]
edges = [ Edge { edge_source = cs (show s)
, edge_target = cs (show t)
, edge_weight = w
, edge_id = cs (show i) }
| (i, ((s,t), w)) <- zip ([0..]::[Integer]) (Map.toList distance) ]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment