Commit 8fab2571 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[graph] add ngrams to graph

This is not the optimal solution since it duplicates so much
data. Best would be to add "children" in the node directly.
parent ee353325
Pipeline #1993 failed with stage
in 10 minutes and 24 seconds
......@@ -7,6 +7,9 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
A Node Story is a Map between NodeId and an Archive (with state,
version and history) for that node.
TODO:
- remove
- filter
......
......@@ -16,6 +16,7 @@ module Gargantext.Core.Viz.Graph
where
import Data.ByteString.Lazy as DBL (readFile, writeFile)
import Data.HashMap.Strict (HashMap)
import Data.Text (pack)
import GHC.IO (FilePath)
......@@ -23,10 +24,11 @@ import qualified Data.Aeson as DA
import qualified Data.Text as T
import qualified Text.Read as T
import Gargantext.API.Ngrams.Types (NgramsTerm, NgramsRepoElement)
import Gargantext.Core.Methods.Distances (GraphMetric)
import Gargantext.Core.Types (ListId)
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Methods.Distances (GraphMetric)
import Gargantext.Prelude
......@@ -113,6 +115,7 @@ makeLenses ''GraphMetadata
data Graph = Graph { _graph_nodes :: [Node]
, _graph_edges :: [Edge]
, _graph_metadata :: Maybe GraphMetadata
, _graph_ngrams :: Maybe (HashMap NgramsTerm NgramsRepoElement)
}
deriving (Show, Generic)
$(deriveJSON (unPrefix "_graph_") ''Graph)
......@@ -126,7 +129,7 @@ instance Arbitrary Graph where
arbitrary = elements $ [defaultGraph]
defaultGraph :: Graph
defaultGraph = Graph {_graph_nodes = [Node {node_x_coord=0, node_y_coord=0, node_size = 4, node_type = Terms, node_id = pack "0", node_label = pack "animal", node_attributes = Attributes {clust_default = 0}},Node {node_x_coord=0, node_y_coord=0, node_size = 3, node_type = Terms, node_id = pack "1", node_label = pack "bird", node_attributes = Attributes {clust_default = 0}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "2", node_label = pack "boy", node_attributes = Attributes {clust_default = 1}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "3", node_label = pack "dog", node_attributes = Attributes {clust_default = 0}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "4", node_label = pack "girl", node_attributes = Attributes {clust_default = 1}},Node {node_x_coord=0, node_y_coord=0, node_size = 4, node_type = Terms, node_id = pack "5", node_label = pack "human body", node_attributes = Attributes {clust_default = 1}},Node {node_x_coord=0, node_y_coord=0, node_size = 3, node_type = Terms, node_id = pack "6", node_label = pack "object", node_attributes = Attributes {clust_default = 2}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "7", node_label = pack "pen", node_attributes = Attributes {clust_default = 2}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "8", node_label = pack "table", node_attributes = Attributes {clust_default = 2}}], _graph_edges = [Edge {edge_source = pack "0", edge_target = pack "0", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "0"},Edge {edge_source = pack "1", edge_target = pack "0", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "1"},Edge {edge_source = pack "1", edge_target = pack "1", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "2"},Edge {edge_source = pack "2", edge_target = pack "2", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "3"},Edge {edge_source = pack "2", edge_target = pack "5", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "4"},Edge {edge_source = pack "3", edge_target = pack "0", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "5"},Edge {edge_source = pack "3", edge_target = pack "1", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "6"},Edge {edge_source = pack "3", edge_target = pack "3", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "7"},Edge {edge_source = pack "4", edge_target = pack "4", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "8"},Edge {edge_source = pack "4", edge_target = pack "5", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "9"},Edge {edge_source = pack "5", edge_target = pack "5", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "10"},Edge {edge_source = pack "6", edge_target = pack "6", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "11"},Edge {edge_source = pack "7", edge_target = pack "6", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "12"},Edge {edge_source = pack "7", edge_target = pack "7", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "13"},Edge {edge_source = pack "8", edge_target = pack "6", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "14"},Edge {edge_source = pack "8", edge_target = pack "7", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "15"},Edge {edge_source = pack "8", edge_target = pack "8", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "16"}], _graph_metadata = Nothing}
defaultGraph = Graph {_graph_nodes = [Node {node_x_coord=0, node_y_coord=0, node_size = 4, node_type = Terms, node_id = pack "0", node_label = pack "animal", node_attributes = Attributes {clust_default = 0}},Node {node_x_coord=0, node_y_coord=0, node_size = 3, node_type = Terms, node_id = pack "1", node_label = pack "bird", node_attributes = Attributes {clust_default = 0}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "2", node_label = pack "boy", node_attributes = Attributes {clust_default = 1}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "3", node_label = pack "dog", node_attributes = Attributes {clust_default = 0}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "4", node_label = pack "girl", node_attributes = Attributes {clust_default = 1}},Node {node_x_coord=0, node_y_coord=0, node_size = 4, node_type = Terms, node_id = pack "5", node_label = pack "human body", node_attributes = Attributes {clust_default = 1}},Node {node_x_coord=0, node_y_coord=0, node_size = 3, node_type = Terms, node_id = pack "6", node_label = pack "object", node_attributes = Attributes {clust_default = 2}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "7", node_label = pack "pen", node_attributes = Attributes {clust_default = 2}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "8", node_label = pack "table", node_attributes = Attributes {clust_default = 2}}], _graph_edges = [Edge {edge_source = pack "0", edge_target = pack "0", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "0"},Edge {edge_source = pack "1", edge_target = pack "0", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "1"},Edge {edge_source = pack "1", edge_target = pack "1", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "2"},Edge {edge_source = pack "2", edge_target = pack "2", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "3"},Edge {edge_source = pack "2", edge_target = pack "5", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "4"},Edge {edge_source = pack "3", edge_target = pack "0", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "5"},Edge {edge_source = pack "3", edge_target = pack "1", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "6"},Edge {edge_source = pack "3", edge_target = pack "3", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "7"},Edge {edge_source = pack "4", edge_target = pack "4", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "8"},Edge {edge_source = pack "4", edge_target = pack "5", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "9"},Edge {edge_source = pack "5", edge_target = pack "5", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "10"},Edge {edge_source = pack "6", edge_target = pack "6", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "11"},Edge {edge_source = pack "7", edge_target = pack "6", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "12"},Edge {edge_source = pack "7", edge_target = pack "7", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "13"},Edge {edge_source = pack "8", edge_target = pack "6", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "14"},Edge {edge_source = pack "8", edge_target = pack "7", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "15"},Edge {edge_source = pack "8", edge_target = pack "8", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "16"}], _graph_metadata = Nothing, _graph_ngrams = Nothing}
-----------------------------------------------------------
......@@ -211,7 +214,7 @@ instance FromField HyperdataGraphAPI
-----------------------------------------------------------
graphV3ToGraph :: GraphV3 -> Graph
graphV3ToGraph (GraphV3 links nodes) = Graph (map nodeV32node nodes) (zipWith linkV32edge [1..] links) Nothing
graphV3ToGraph (GraphV3 links nodes) = Graph (map nodeV32node nodes) (zipWith linkV32edge [1..] links) Nothing Nothing
where
nodeV32node :: NodeV3 -> Node
nodeV32node (NodeV3 no_id' (AttributesV3 cl') no_s' no_lb')
......
......@@ -33,6 +33,7 @@ import Gargantext.Core.Viz.Graph
import Gargantext.Core.Viz.Graph.GEXF ()
import Gargantext.Core.Viz.Graph.Tools -- (cooc2graph)
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Node (mkNodeWithParent)
import Gargantext.Database.Admin.Config
import Gargantext.Database.Admin.Types.Node
......@@ -78,7 +79,11 @@ graphAPI u n = getGraph u n
:<|> graphVersionsAPI u n
------------------------------------------------------------------------
getGraph :: UserId -> NodeId -> GargNoServer HyperdataGraphAPI
--getGraph :: UserId -> NodeId -> GargServer HyperdataGraphAPI
getGraph :: FlowCmdM env err m
=> UserId
-> NodeId
-> m HyperdataGraphAPI
getGraph _uId nId = do
nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
......@@ -109,7 +114,12 @@ getGraph _uId nId = do
HyperdataGraphAPI graph' camera
recomputeGraph :: UserId -> NodeId -> Maybe GraphMetric -> GargNoServer Graph
--recomputeGraph :: UserId -> NodeId -> Maybe GraphMetric -> GargNoServer Graph
recomputeGraph :: FlowCmdM env err m
=> UserId
-> NodeId
-> Maybe GraphMetric
-> m Graph
recomputeGraph _uId nId maybeDistance = do
nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
let
......@@ -122,7 +132,7 @@ recomputeGraph _uId nId maybeDistance = do
_ -> maybeDistance
let
cId = maybe (panic "[G.V.G.API.recomputeGraph] Node has no parent")
cId = maybe (panic "[G.C.V.G.API.recomputeGraph] Node has no parent")
identity
$ nodeGraph ^. node_parent_id
similarity = case graphMetric of
......@@ -151,12 +161,18 @@ recomputeGraph _uId nId maybeDistance = do
-- TODO use Database Monad only here ?
computeGraph :: HasNodeError err
--computeGraph :: HasNodeError err
-- => CorpusId
-- -> Distance
-- -> NgramsType
-- -> NodeListStory
-- -> Cmd err Graph
computeGraph :: FlowCmdM env err m
=> CorpusId
-> Distance
-> NgramsType
-> NodeListStory
-> Cmd err Graph
-> m Graph
computeGraph cId d nt repo = do
lId <- defaultList cId
lIds <- selectNodesWithUsername NodeList userMaster
......@@ -172,9 +188,11 @@ computeGraph cId d nt repo = do
-- printDebug "myCooc" myCooc
-- saveAsFileDebug "debug/my-cooc" myCooc
listNgrams <- getListNgrams [lId] NgramsTerms
graph <- liftBase $ cooc2graphWith Spinglass d 0 myCooc
-- saveAsFileDebug "debug/graph" graph
pure graph
pure $ set graph_ngrams (Just listNgrams) graph
defaultGraphMetadata :: HasNodeError err
......@@ -214,10 +232,15 @@ graphAsync u n =
JobFunction (\_ log' -> graphRecompute u n (liftBase . log'))
graphRecompute :: UserId
--graphRecompute :: UserId
-- -> NodeId
-- -> (JobLog -> GargNoServer ())
-- -> GargNoServer JobLog
graphRecompute :: FlowCmdM env err m
=> UserId
-> NodeId
-> (JobLog -> GargNoServer ())
-> GargNoServer JobLog
-> (JobLog -> m ())
-> m JobLog
graphRecompute u n logStatus = do
logStatus JobLog { _scst_succeeded = Just 0
, _scst_failed = Just 0
......@@ -274,7 +297,11 @@ graphVersions n nId = do
pure $ GraphVersions { gv_graph = listVersion
, gv_repo = v }
recomputeVersions :: UserId -> NodeId -> GargNoServer Graph
--recomputeVersions :: UserId -> NodeId -> GargNoServer Graph
recomputeVersions :: FlowCmdM env err m
=> UserId
-> NodeId
-> m Graph
recomputeVersions uId nId = recomputeGraph uId nId Nothing
------------------------------------------------------------
......@@ -300,9 +327,13 @@ graphClone uId pId (HyperdataGraphAPI { _hyperdataAPIGraph = graph
pure nId
------------------------------------------------------------
getGraphGexf :: UserId
--getGraphGexf :: UserId
-- -> NodeId
-- -> GargNoServer (Headers '[Servant.Header "Content-Disposition" Text] Graph)
getGraphGexf :: FlowCmdM env err m
=> UserId
-> NodeId
-> GargNoServer (Headers '[Servant.Header "Content-Disposition" Text] Graph)
-> m (Headers '[Servant.Header "Content-Disposition" Text] Graph)
getGraphGexf uId nId = do
HyperdataGraphAPI { _hyperdataAPIGraph = graph } <- getGraph uId nId
pure $ addHeader "attachment; filename=graph.gexf" graph
......
......@@ -205,7 +205,10 @@ data2graph :: ToComId a
-> Map (Int, Int) Double
-> [a]
-> Graph
data2graph labels coocs bridge conf partitions = Graph nodes edges Nothing
data2graph labels coocs bridge conf partitions = Graph { _graph_nodes = nodes
, _graph_edges = edges
, _graph_metadata = Nothing
, _graph_ngrams = Nothing }
where
community_id_by_node_id = Map.fromList $ map nodeId2comId partitions
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment