WIP making facets work in bipartite case + further factorizing

facets (ie clusters and other searchable/colorable attributes) should work with bipartite case, but it meant factorizing facetsBinning() and updateValueFacets() from commit 71507331

WIP making facets work in bipartite case + further factorizing
facets (ie clusters and other searchable/colorable attributes) should work with bipartite case, but it meant factorizing facetsBinning() and updateValueFacets() from commit 71507331
69e7c039 · Romain Loth · d06a320b · 69e7c039 · 69e7c039 · 69e7c039
Commit 69e7c039 authored May 23, 2017 by Romain Loth
Hide whitespace changes
Inline Side-by-side

Showing with 250 additions and 187 deletions

developer_manual.md doc/developer_manual.md +9 -3

sigma.parseCustom.js tinawebJS/sigma.parseCustom.js +217 -170

sigmaUtils.js tinawebJS/sigmaUtils.js +24 -14

No files found.
--- a/doc/developer_manual.md
+++ b/doc/developer_manual.md
@@ -21,11 +21,17 @@ This will still evolve but the main steps for any graph initialization messily u
 #### About source data
- - doc/sem typing: follows the node property "`category`"
+ - doc/sem typing: follows the node property "`type`" or if absent, "`category`"
   - if the category name is "document"  => catSoc (type 0)
   - if the category name contains the str "term"  => catSem (type 1)
- - coloration:     "`age`" "`growth_rate`" + any attribute of type float or int
- - clustering:     "`cluster_index`" ou nom figurant dans `TW.nodeClusAtt`
+ - `somenode.attributes`: the `attributes` property is always an object
+   - any attribute listed in the sourcenode.attributes will be indexed if the TW.scanClusters flag is true
+   - the mapping from attribute values to matching nodes is in TW.Clusters.aType.anAttr.aValue.map !
+   - coloration:     "`age`" "`growth_rate`" + any attribute of type float or int
+   - clustering:     "`cluster_index`" ou nom figurant dans `TW.nodeClusAtt`
+   - vocabulary: (en cours) any attribute of type string and where the amount of distinct values is < TW.somesettings
 ## User interaction mecanisms

--- a/tinawebJS/sigma.parseCustom.js
+++ b/tinawebJS/sigma.parseCustom.js
@@ -262,6 +262,175 @@ function sortNodeTypes(observedTypesDict) {
  return {'categories': observedTypes, 'lookup_dict': catDict}
 }
+// sorting out n.attributes + binning them if too much distinct values
+// --------------------------------------------------------------------
+// @arg valuesIdx:
+//      a census of present attribute-values with a 4+ tier structure
+//           by nodetype
+//                => by attribute
+//                       => {vals:[allpossiblevalues...],
+//                           map:{eachvalue:[matchingnodeids],
+//                                eachvalue2:[matchingnodeids]...}
+// NB vals and map are both useful and complementary
+function facetsBinning (valuesIdx, Atts_2_Exclude) {
+  console.warn("valuesIdx", valuesIdx)
+  let facetIdx = {}
+  if (TW.debugFlags.logFacets) {
+    console.log('dictfyGexf: begin TW.Clusters')
+    var classvalues_deb = performance.now()
+  }
+  // var gotClusters = false
+  // for (var nodecat in valuesIdx) {
+  //   gotClusters = gotClusters || (valuesIdx[nodecat]['cluster_index'] || valuesIdx[nodecat][TW.nodeClusAtt])
+  // }
+  // all scanned attributes get an inverted index
+  for (var cat in valuesIdx) {
+    if (!facetIdx[cat])    facetIdx[cat] = {}
+    for (var at in valuesIdx[cat]) {
+      // console.log(`======= ${cat}::${at} =======`)
+      // skip non-numeric or already done
+      if (Atts_2_Exclude[at] || at == "clust_default") {
+        continue
+      }
+      // array of valueclass/interval/bin objects
+      facetIdx[cat][at] = []
+      // if n possible values doesn't need binify
+      if (Object.keys(valuesIdx[cat][at].map).length <= TW.maxDiscreteValues) {
+        for (var pval in valuesIdx[cat][at].map) {
+          facetIdx[cat][at].push({
+            'labl': `${cat}||${at}||${pval}`,
+            'val': pval,
+            // val2ids
+            'nids': valuesIdx[cat][at].map[pval]
+          })
+        }
+      }
+      // if binify
+      else {
+        var len = valuesIdx[cat][at].vals.length
+        // sort out vals
+        valuesIdx[cat][at].vals.sort(function (a,b) {
+               return Number(a)-Number(b)
+        })
+        // (enhanced intervalsInventory)
+        // => creates bin, binlabels, inverted index per bins
+        var legendRefTicks = []
+        // how many bins for this attribute ?
+        var nBins = 3
+        if (TW.customLegendsBins && TW.customLegendsBins[at]) {
+          nBins = TW.customLegendsBins[at]
+        }
+        else if (TW.legendsBins) {
+          nBins = TW.legendsBins
+        }
+        // create tick thresholds
+        for (var l=0 ; l < nBins ; l++) {
+          let nthVal = Math.floor(len * l / nBins)
+          legendRefTicks.push(valuesIdx[cat][at].vals[nthVal])
+        }
+        if (TW.debugFlags.logFacets)    console.debug("intervals for", at, legendRefTicks)
+        var nTicks = legendRefTicks.length
+        var sortedDistinctVals = Object.keys(valuesIdx[cat][at].map).sort(function(a,b){return Number(a)-Number(b)})
+        var nDistinctVals = sortedDistinctVals.length
+        var lastCursor = 0
+        // create ticks objects with retrieved full info
+        for (var l in legendRefTicks) {
+          l = Number(l)
+          let lowThres = Number(legendRefTicks[l])
+          let hiThres = null
+          if (l < nTicks-1) {
+            hiThres = Number(legendRefTicks[l+1])
+          }
+          else {
+            hiThres = Infinity
+          }
+          var newTick = {
+            'labl':'',
+            'nids':[],
+            'range':[lowThres, hiThres]
+          }
+          // 1) union of idmaps
+          for (var k = lastCursor ; k <= nDistinctVals ; k++) {
+            var val = Number(sortedDistinctVals[k])
+            if (val < lowThres) {
+              console.error("mixup !!", val, lowThres, at)
+            }
+            else if ((val >= lowThres) && (val < hiThres)) {
+              if (!valuesIdx[cat][at].map[val]) {
+                console.error("unscanned val2ids mapping", val, at)
+              }
+              else {
+                // eg bin2ids map for 2 <= val < 3
+                //    will be U(val2ids maps for 2, 2.1, 2.2,...,2.9)
+                for (var j in valuesIdx[cat][at].map[val]) {
+                  newTick.nids.push(valuesIdx[cat][at].map[val][j])
+                }
+              }
+            }
+            // we're over the interval upper bound
+            // we just need to remember where we were for next interval
+            else if (val >= hiThres) {
+              lastCursor = k
+              break
+            }
+          }
+          // create label
+          // round %.6f for display
+          var labLowThres = Math.round(lowThres*1000000)/1000000
+          var labHiThres = (l==nTicks-1)? '+ ∞' : Math.round(hiThres*1000000)/1000000
+          newTick.labl = `${cat}||${at}||[${labLowThres} ; ${labHiThres}]`
+          // save these bins as the cluster index (aka faceting)
+          if (newTick.nids.length) {
+            facetIdx[cat][at].push(newTick)
+          }
+        }
+      }
+    }
+    // 'clust_default' is an alias to the user-defined default clustering
+    if (TW.nodeClusAtt != undefined
+        && facetIdx[cat][TW.nodeClusAtt]   // <= if found in data
+        && !facetIdx[cat]['clust_default'] // <= and if an attr named 'clust_default' was not already in data
+      ) {
+      facetIdx[cat]['clust_default'] = facetIdx[cat][TW.nodeClusAtt]
+    }
+  }
+  if (TW.debugFlags.logFacets) {
+    var classvalues_fin = performance.now()
+    console.log('end TW.Clusters, own time:', classvalues_fin-classvalues_deb)
+  }
+  return facetIdx
+}
 // Level-00
 // for {1,2}partite graphs
 function dictfyGexf( gexf , categories ){
@@ -382,7 +551,7 @@ function dictfyGexf( gexf , categories ){
            var attributes = []
            var attvalueNodes = elNode.getElementsByTagName('attvalue');
            var atts={};
-            for(k=0; k<attvalueNodes.length; k++){
+            for(var k=0; k<attvalueNodes.length; k++){
                var attvalueNode = attvalueNodes[k];
                var attr = attvalueNode.getAttribute('for');
                var val = attvalueNode.getAttribute('value');
@@ -424,25 +593,9 @@ function dictfyGexf( gexf , categories ){
                maxNodeSize= node.size;
            // console.debug("node.attributes", node.attributes)
+            // creating a faceted index from node.attributes
            if (TW.scanClusters) {
-              if (!tmpVals[node_cat])      tmpVals[node_cat]={}
+              [tmpVals, Atts_2_Exclude] = updateValueFacets(tmpVals, Atts_2_Exclude, node)
-              for (var at in node.attributes) {
-                if (!tmpVals[node_cat][at])  tmpVals[node_cat][at]={'vals':[],'map':{}}
-                let someval = Number(node.attributes[at])
-                // Identifying the attribute datatype: exclude strings and objects
-                if ( isNaN(someval) ) {
-                    if (!Atts_2_Exclude[at]) Atts_2_Exclude[at]=true;
-                }
-                // numeric attr => build facets
-                else {
-                  if (!tmpVals[node_cat][at].map[someval]) tmpVals[node_cat][at].map[someval] = []
-                  tmpVals[node_cat][at].vals.push(someval)      // for ordered scale
-                  tmpVals[node_cat][at].map[someval].push(node.id)  // inverted index
-                }
-              }
            }
        } // finish nodes loop
@@ -461,160 +614,10 @@ function dictfyGexf( gexf , categories ){
    // ------------- /debug: for local stats ----------------
-    if (TW.debugFlags.logFacets) {
-      console.log('dictfyGexf: begin TW.Clusters')
-      var classvalues_deb = performance.now()
-    }
-    var gotClusters = false
-    for (var nodecat in tmpVals) {
-      gotClusters = gotClusters || (tmpVals[nodecat]['cluster_index'] || tmpVals[nodecat][TW.nodeClusAtt])
-    }
    // clusters and other facets => type => name => [{label,val/range,nodeids}]
-    TW.Clusters = {}
+    TW.Clusters = facetsBinning(tmpVals, Atts_2_Exclude)
-    // sorting out properties in n.attributes ==> £TODO shared function createClusterIndex() up to classvalues_fin
-    // --------------------------------------
-    // all scanned attributes get an inverted index
-    for (var cat in tmpVals) {
-      if (!TW.Clusters[cat])    TW.Clusters[cat] = {}
-      for (var at in tmpVals[cat]) {
-        // console.log(`======= ${cat}::${at} =======`)
-        var allTicks = []
-        // skip non-numeric or already done
-        if (Atts_2_Exclude[at] || at == "clust_default") {
-          continue
-        }
-        // array of valueclass/interval/bin objects
-        TW.Clusters[cat][at] = []
-        // if n possible values doesn't need binify
-        if (Object.keys(tmpVals[cat][at].map).length <= TW.maxDiscreteValues) {
-          for (var pval in tmpVals[cat][at].map) {
-            TW.Clusters[cat][at].push({
-              'labl': `${cat}||${at}||${pval}`,
-              'val': pval,
-              // val2ids
-              'nids': tmpVals[cat][at].map[pval]
-            })
-          }
-        }
-        // if binify
-        else {
-          var len = tmpVals[cat][at].vals.length
-          // sort out vals
-          tmpVals[cat][at].vals.sort(function (a,b) {
-                 return Number(a)-Number(b)
-          })
-          // (enhanced intervalsInventory)
-          // => creates bin, binlabels, inverted index per bins
-          var legendRefTicks = []
-          // how many bins for this attribute ?
-          var nBins = 3
-          if (TW.customLegendsBins && TW.customLegendsBins[at]) {
-            nBins = TW.customLegendsBins[at]
-          }
-          else if (TW.legendsBins) {
-            nBins = TW.legendsBins
-          }
-          // create tick thresholds
-          for (var l=0 ; l < nBins ; l++) {
-            let nthVal = Math.floor(len * l / nBins)
-            legendRefTicks.push(tmpVals[cat][at].vals[nthVal])
-          }
-          if (TW.debugFlags.logFacets)    console.debug("intervals for", at, legendRefTicks)
-          var nTicks = legendRefTicks.length
-          var sortedDistinctVals = Object.keys(tmpVals[cat][at].map).sort(function(a,b){return Number(a)-Number(b)})
-          var nDistinctVals = sortedDistinctVals.length
-          var lastCursor = 0
-          // create ticks objects with retrieved full info
-          for (var l in legendRefTicks) {
-            l = Number(l)
-            let lowThres = Number(legendRefTicks[l])
-            let hiThres = null
-            if (l < nTicks-1) {
-              hiThres = Number(legendRefTicks[l+1])
-            }
-            else {
-              hiThres = Infinity
-            }
-            var newTick = {
-              'labl':'',
-              'nids':[],
-              'range':[lowThres, hiThres]
-            }
-            // 1) union of idmaps
-            for (var k = lastCursor ; k <= nDistinctVals ; k++) {
-              var val = Number(sortedDistinctVals[k])
-              if (val < lowThres) {
-                console.error("mixup !!", val, lowThres, at)
-              }
-              else if ((val >= lowThres) && (val < hiThres)) {
-                if (!tmpVals[cat][at].map[val]) {
-                  console.error("unscanned val2ids mapping", val, at)
-                }
-                else {
-                  // eg bin2ids map for 2 <= val < 3
-                  //    will be U(val2ids maps for 2, 2.1, 2.2,...,2.9)
-                  for (var j in tmpVals[cat][at].map[val]) {
-                    newTick.nids.push(tmpVals[cat][at].map[val][j])
-                  }
-                }
-              }
-              // we're over the interval upper bound
-              // we just need to remember where we were for next interval
-              else if (val >= hiThres) {
-                lastCursor = k
-                break
-              }
-            }
-            // create label
-            // round %.6f for display
-            var labLowThres = Math.round(lowThres*1000000)/1000000
-            var labHiThres = (l==nTicks-1)? '+ ∞' : Math.round(hiThres*1000000)/1000000
-            newTick.labl = `${cat}||${at}||[${labLowThres} ; ${labHiThres}]`
-            // save these bins as the cluster index (aka faceting)
-            if (newTick.nids.length) {
-              TW.Clusters[cat][at].push(newTick)
-            }
-          }
-        }
-      }
-      // 'clust_default' is an alias to the user-defined default clustering
-      if (TW.nodeClusAtt != undefined
-          && TW.Clusters[cat][TW.nodeClusAtt]   // <= if found in data
-          && !TW.Clusters[cat]['clust_default'] // <= and if an attr named 'clust_default' was not already in data
-        ) {
-        TW.Clusters[cat]['clust_default'] = TW.Clusters[cat][TW.nodeClusAtt]
-      }
-    }
-    if (TW.debugFlags.logFacets) {
-      var classvalues_fin = performance.now()
-      console.log('end TW.Clusters, own time:', classvalues_fin-classvalues_deb)
-    }
    //New scale for node size: now, between 2 and 5 instead [1,70]
@@ -745,6 +748,37 @@ function updateRelations(typedRelations, edgeCateg, srcId, tgtId){
 }
+// To fill the reverse map: values => nodeids of a given type
+function updateValueFacets(facetIdx, Atts_2_Exclude, aNode){
+  if (!facetIdx[aNode.type])      facetIdx[aNode.type]={}
+  for (var at in aNode.attributes) {
+    if (!facetIdx[aNode.type][at])  facetIdx[aNode.type][at]={'vals':[],'map':{}}
+    let castVal = Number(aNode.attributes[at])
+    // Identifying the attribute datatype: exclude strings and objects
+    if ( isNaN(castVal) ) {
+        if (!Atts_2_Exclude[at]) Atts_2_Exclude[at]=true;
+        // TODO: this old Atts_2_Exclude strategy could be replaced,
+        //       not to exclude but to store the datatype somewhere like facetIdx[aNode.type][at].dtype
+        //  => the datatype would be a condition (no bins if not numeric, etc.)
+        //  => it would also allow to index text values (eg country, affiliation, etc.)
+        //     with the strategy "most frequent distinct values" + "others"
+        //     which would be useful (eg country, affiliation, etc.) !!!
+    }
+    // numeric attr => build facets
+    else {
+      if (!facetIdx[aNode.type][at].map[castVal]) facetIdx[aNode.type][at].map[castVal] = []
+      facetIdx[aNode.type][at].vals.push(castVal)      // for ordered scale
+      facetIdx[aNode.type][at].map[castVal].push(aNode.id)  // inverted index
+    }
+  }
+  return [facetIdx, Atts_2_Exclude]
+}
 // creates and updates nodes1 nodes2 and bipartiteN2D and bipartiteD2N
 // but seems useless because all info is already in each nodes.type and edge.categ
 // (especially when changeType uses a loop on all nodes anyway)
@@ -873,6 +907,10 @@ function dictfyJSON( data , categories ) {
        nodes2={}, bipartiteD2N={}, bipartiteN2D={}
    }
+    // if scanClusters, we'll also use:
+    var tmpVals = {}
+    var Atts_2_Exclude = {}
    for(var nid in data.nodes) {
        let n = data.nodes[nid];
        let node = {}
@@ -901,8 +939,17 @@ function dictfyJSON( data , categories ) {
        catCount[node.type]++;
        nodes[node.id] = node;
+        console.log(n.attributes)
+        // creating a faceted index from node.attributes
+        if (TW.scanClusters) {
+          [tmpVals, Atts_2_Exclude] = updateValueFacets(tmpVals, Atts_2_Exclude, node)
+        }
    }
+    TW.Clusters = facetsBinning (tmpVals, Atts_2_Exclude)
    colorList.sort(function(){ return Math.random()-0.5; });
    for (var i in nodes ){
        if (nodes[i].color=="#FFFFFF") {

--- a/tinawebJS/sigmaUtils.js
+++ b/tinawebJS/sigmaUtils.js
@@ -816,17 +816,24 @@ function clustersBy(daclass) {
    for(var nid in NodeID_Val) {
        var newval_color = Math.round( ( Min_color+(NodeID_Val[nid]["round"]-real_min)*((Max_color-Min_color)/(real_max-real_min)) ) );
        var hex_color = rgbToHex(255, (255-newval_color) , 0)
-        TW.partialGraph.graph.nodes(nid).color = hex_color
-        TW.partialGraph.graph.nodes(nid).customAttrs.alt_color = hex_color
-        // FIXME not used ?
+        let n = TW.partialGraph.graph.nodes(nid)
-        TW.partialGraph.graph.nodes(nid).customAttrs.altgrey_color = false
+        if (n && !n.hidden) {
+          n.color = hex_color
+          n.customAttrs.alt_color = hex_color
+          // FIXME not used ?
+          n.customAttrs.altgrey_color = false
+          // £TODO SETTING SIZE HERE SHOULD BE OPTIONAL
+          var newval_size = Math.round( ( Min_size+(NodeID_Val[nid]["round"]-real_min)*((Max_size-Min_size)/(real_max-real_min)) ) );
+          n.size = newval_size;
+          n.label = "("+NodeID_Val[nid]["real"].toFixed(min_pow)+") "+TW.Nodes[nid].label
+        }
-        var newval_size = Math.round( ( Min_size+(NodeID_Val[nid]["round"]-real_min)*((Max_size-Min_size)/(real_max-real_min)) ) );
-        TW.partialGraph.graph.nodes(nid).size = newval_size;
        // console.log("real:"+ NodeID_Val[i]["real"] + " | newvalue: "+newval_size)
-        TW.partialGraph.graph.nodes(nid).label = "("+NodeID_Val[nid]["real"].toFixed(min_pow)+") "+TW.Nodes[nid].label
    }
    //    [ / Scaling node colours(0-255) and sizes(2-7) ]
@@ -855,15 +862,18 @@ function repaintEdges() {
      }
      else {
        let e = TW.partialGraph.graph.edges(eid)
-        let src = TW.partialGraph.graph.nodes(idPair[0])
-        let tgt = TW.partialGraph.graph.nodes(idPair[1])
+        if (e) {
+          let src = TW.partialGraph.graph.nodes(idPair[0])
+          let tgt = TW.partialGraph.graph.nodes(idPair[1])
-        if (src && tgt) {
+          if (src && tgt) {
-          let src_color = src.customAttrs.alt_color || '#555'
+            let src_color = src.customAttrs.alt_color || '#555'
-          let tgt_color = tgt.customAttrs.alt_color || '#555'
+            let tgt_color = tgt.customAttrs.alt_color || '#555'
-          e.customAttrs.alt_rgb = sigmaTools.edgeRGB(src_color,tgt_color)
+            e.customAttrs.alt_rgb = sigmaTools.edgeRGB(src_color,tgt_color)
-          // we don't set e.color because opacity may vary if selected or not
+            // we don't set e.color because opacity may vary if selected or not
+          }
        }
      }
    }