WIP facet legends 1/2 (samerange)

added binning mode 'samerange' + new syntax for choosing it in settings + finished replacing atts_2_exclude + possibilities for better string cluster mapping TODO: color functions per settings

WIP facet legends 1/2 (samerange)
added binning mode 'samerange' + new syntax for choosing it in settings + finished replacing atts_2_exclude + possibilities for better string cluster mapping TODO: color functions per settings
afe77960 · Romain Loth · 0c79ed7e · afe77960 · afe77960 · afe77960
Commit afe77960 authored Jun 01, 2017 by Romain Loth
Show whitespace changes
Inline Side-by-side

Showing with 259 additions and 139 deletions

developer_manual.md doc/developer_manual.md +11 -3

settings_explorerjs.js settings_explorerjs.js +28 -4

sigma.parseCustom.js tinawebJS/sigma.parseCustom.js +220 -132

No files found.
--- a/doc/developer_manual.md
+++ b/doc/developer_manual.md
@@ -43,10 +43,18 @@ This will still evolve but the main steps for any graph initialization messily u

 - `somenode.attributes`: the `attributes` property is always an object
   - any attribute listed in the sourcenode.attributes will be indexed if the TW.scanClusters flag is true
+   - data type and style of processing (for heatmap, or for classes, etc.) should be stipulated in settings
   - the mapping from attribute values to matching nodes is in TW.Clusters.aType.anAttr.aValue.map
-   - coloration:     "`age`" "`growth_rate`" + any attribute of type float or int
-   - clustering:     "`cluster_index`" ou nom figurant dans `TW.conf.nodeClusAtt`
-   - vocabulary: (en cours) any attribute of type string and where the amount of distinct values is < TW.somesettings
+
+   - finally in GUI we can associate 3 types of coloration
+    - `"gradient"` coloration
+      - available for any attribute that looks like a continuous metric
+    - `"heatmap"` coloration
+      - colors from cold to hot centered on a white "neutral" color
+      - applied for attributes stipulated in settings: eg "`age`" "`growth_rate`"
+    - `"cluster"` coloration for str or num classes like modularity_class, affiliation, etc.
+      - we use contrasted values from colorList
+      - automatically applied for "`cluster_index`" or any name in `TW.conf.nodeClusAtt`


 ## User interaction mecanisms

--- a/settings_explorerjs.js
+++ b/settings_explorerjs.js
@@ -64,6 +64,30 @@ TW.conf = (function(TW){
    'growth_rate': 12
  }

+  // facetOptions: choose here the visual result of your node attributes
+  // 3 possible coloring functions
+  //   - cluster   (contrasted colors for attributes describing *classes*)
+  //   - gradient  (uniform map from a numeric attribute to red/yellow gradient)
+  //   - heatmap   (from blue to red/brown, centered on a white "neutral" color)
+  // 2 possible binning modes
+  //   - samerange: constant intervals between each bin
+  //   - samepop:   constant cardinality inside each class (~ quantiles)
+  // Cases with no binning: if type is not numeric or if there is less than n vdistinct values
+  TWConf.facetOptions = {
+
+    // attribute     |    coloring     |  number |    binning
+    //   name        |    function     | of bins |     mode
+    // --------------------------------------------------------------------
+    'numuniform'   : {'col': "gradient", 'n': 3,  'binmode': 'samerange'},
+    'numpareto'    : {'col': "gradient", 'n': 8,  'binmode': 'samepop'  },
+    'intfewvalues' : {'col': "heatmap" , 'n': 4,  'binmode': 'samerange'},
+    'countryuniform':{'col': "cluster" },
+  }
+
+  // other POSS option: display attribute value in label or not ?
+
+
+
  // default clustering attribute (<---> used for initial node colors)
  TWConf.nodeClusAtt = "modularity_class"

@@ -101,7 +125,7 @@ TW.conf = (function(TW){
  // -----------------------------
  TWConf.filterSliders = true     // show sliders for nodes/edges subsets

-  TWConf.colorsByAtt = false;     // show "Set colors" menu
+  TWConf.colorsByAtt = true;      // show "Set colors" menu

  TWConf.deselectOnclickStage = true   // click on background remove selection ?
                                       // (except when dragging)
@@ -152,7 +176,7 @@ TW.conf = (function(TW){
      // nodes
      defaultNodeColor: "#333",
      twNodeRendBorderSize: 1,           // node borders (only iff ourRendering)
-      twNodeRendBorderColor: "#eee",
+      twNodeRendBorderColor: "#222",

      // edges
      minEdgeSize: 2,                    // in fact used in tina as edge size
@@ -176,7 +200,7 @@ TW.conf = (function(TW){


      // selected nodes <=> special label
-      twSelectedColor: "node",     // "node" for a label bg like the node color,
+      twSelectedColor: "default",     // "node" for a label bg like the node color,
                                   // "default" for note-like yellow

      // not selected <=> grey
@@ -221,7 +245,7 @@ TW.conf = (function(TW){
    // show verbose console logs...
    logFetchers: false,              // ...about ajax/fetching of graph data
    logParsers: false,               // ...about parsing said data
-    logFacets: false,                // ...about parsing node attribute:value facets
+    logFacets: true,                // ...about parsing node attribute:value facets
    logSettings: false,              // ...about settings at Tina and Sigma init time
    logSelections: false
  }

--- a/tinawebJS/sigma.parseCustom.js
+++ b/tinawebJS/sigma.parseCustom.js
@@ -107,11 +107,10 @@ function gexfCheckAttributesMap (someXMLContent) {
    //   (...)


-      // THIS SEGMENT USED TO BE IN dictifyGexf
    // Census of the conversions between attr and some attr name
    var i, j, k;
-      var nodesAttributes = [];   // The list of attributes of the nodes of the graph that we build in json
-      var edgesAttributes = [];   // The list of attributes of the edges of the graph that we build in json
+    var nodesAttributes = {};
+    var edgesAttributes = {};

    // In the gexf (that is an xml), the list of xml nodes 'attributes' (note the plural 's')
    var attributesNodes = someXMLContent.getElementsByTagName('attributes');
@@ -136,7 +135,7 @@ function gexfCheckAttributesMap (someXMLContent) {
                    title:title,
                    type:type
                };
-                  nodesAttributes.push(attribute);
+                nodesAttributes[id] = attribute;

            }
        } else if(attributesNode.getAttribute('class') == 'edge'){
@@ -153,16 +152,16 @@ function gexfCheckAttributesMap (someXMLContent) {
                    title:title,
                    type:type
                };
-                  edgesAttributes.push(attribute);
+                edgesAttributes[id] = attribute;

            }
        }
-      } //out: nodesAttributes Array
+    }

-      // console.debug('>>> tr: nodesAttributes', nodesAttributes)
-      // console.debug('>>> tr: edgesAttributes', edgesAttributes)
+    // console.debug('gexf declared nodesAttributes:', nodesAttributes)
+    // console.debug('gexf declared edgesAttributes:', edgesAttributes)

-      return {nAttrs: nodesAttributes, eAttrs: edgesAttributes}
+    return {nodeAttrs: nodesAttributes, edgeAttrs: edgesAttributes}
 }

 // Level-00
@@ -189,16 +188,9 @@ function scanGexf(gexfContent) {

                // some attrs are gexf-local indices refering to an <attributes> declaration
                // so if it matches declared we translate their integer in title
-                // FIXME use a dict by id in gexfCheckAttributesMap for loop rm
-                if(Number.isInteger(Number(attr))) {
-                  // mini loop inside declared node attrs (eg substitute 0 for 'centrality')
-                  for (var l=0;l<declaredAttrs.nAttrs.length;l++) {
-                    let declared = declaredAttrs.nAttrs[l]
-                    if (declared.id == attr) {
-                      attr = declared.title
-                    }
-                  }
-                }
+                if (! isUndef(declaredAttrs.nodeAttrs[attr]))
+                  attr = declaredAttrs.nodeAttrs[attr].title
+
                // console.log('attr', attr)

                // THIS WILL BECOME catDict (if ncats == 1 => monopart)
@@ -272,18 +264,21 @@ function sortNodeTypes(observedTypesDict) {
 //                => by attribute
 //                       => {vals:[allpossiblevalues...],
 //                           map:{eachvalue:[matchingnodeids],
-//                                eachvalue2:[matchingnodeids]...}
+//                                eachvalue2:[matchingnodeids]...
+//                           vtypes:{str: nbstringvaluesforthisattr
+//                                   num: nbnumericvaluesforthisattr}
+//                           }

 // NB vals and map are both useful and complementary

-function facetsBinning (valuesIdx, Atts_2_Exclude) {
+function facetsBinning (valuesIdx) {

-  console.warn("valuesIdx", valuesIdx)
+  console.debug("facetsBinning: valuesIdx", valuesIdx)

  let facetIdx = {}

  if (TW.conf.debug.logFacets) {
-    console.log('dictfyGexf: begin TW.Clusters')
+    console.log('facetsBinning: begin TW.Clusters')
    var classvalues_deb = performance.now()
  }

@@ -299,28 +294,56 @@ function facetsBinning (valuesIdx, Atts_2_Exclude) {
    for (var at in valuesIdx[cat]) {
      // console.log(`======= ${cat}::${at} =======`)

-      // skip non-numeric or already done
-      // £TODO finish changes to Atts_2_Exclude from 69e7c039
-      if (Atts_2_Exclude[at] || at == "clust_default") {
-        continue
+      // new array of valueclass/interval/bin objects
+      facetIdx[cat][at] = []
+
+      // POSSible: auto-detect if vtypes ==> condition
+
+
+
+      // diagnosed data type replacing previous Atts_2_Exclude (more polyvalent)
+      let dataType = 'num'
+      if (valuesIdx[cat][at].vtypes.vnum = 0) {
+        // FIXME condition should be vnum << vstr instead of vnum = 0
+        dataType = 'str'
      }

-      // array of valueclass/interval/bin objects
-      facetIdx[cat][at] = []
+      // default options
+      let maxDiscreteValues = TW.conf.maxDiscreteValues
+      let nBins = TW.conf.legendsBins
+      let binningMode = 'samepop'
+
+      // read stipulated options in user settings
+      // ----------------------------------------
+      if (TW.conf.facetOptions[at]) {
+        binningMode = TW.conf.facetOptions[at]["binmode"]
+        nBins = TW.conf.facetOptions[at]["n"]
+        maxDiscreteValues = nBins

-      // if n possible values doesn't need binify
-      if (Object.keys(valuesIdx[cat][at].map).length <= TW.conf.maxDiscreteValues) {
+        if (nBins == 0) {
+          console.warn(`Can't use user-specified number of bins value 0 for attribute ${at}, using TW.conf.legendsBins ${TW.conf.legendsBins} instead`)
+          nBins = TW.conf.legendsBins
+        }
+      }
+
+      // if small number of distinct values doesn't need binify
+      if (Object.keys(valuesIdx[cat][at].map).length <= maxDiscreteValues) {
        for (var pval in valuesIdx[cat][at].map) {
+
+          var idList = valuesIdx[cat][at].map[pval]
          facetIdx[cat][at].push({
-            'labl': `${cat}||${at}||${pval}`,
+            // simple label
+            'labl': `${pval} (${idList.length})`,
+            // verbose label
+            'fullLabl': `${cat}||${at}||${pval} (${idList.length})`,
            'val': pval,
            // val2ids
-            'nids': valuesIdx[cat][at].map[pval]
+            'nids': idList
          })
        }
      }
      // if binify
-      else {
+      else if (dataType == 'num') {
        var len = valuesIdx[cat][at].vals.length

        // sort out vals
@@ -332,26 +355,39 @@ function facetsBinning (valuesIdx, Atts_2_Exclude) {
        // => creates bin, binlabels, inverted index per bins
        var legendRefTicks = []

-        // how many bins for this attribute ?
-        var nBins = 3
-        if (TW.conf.customLegendsBins && TW.conf.customLegendsBins[at]) {
-          nBins = TW.conf.customLegendsBins[at]
+        var lastUpperBound = null
+
+        if (binningMode == 'samerange') {
+          // minimax
+          let vMin = valuesIdx[cat][at].vals[0]
+          let vMax = valuesIdx[cat][at].vals.slice(-1)[0]
+          lastUpperBound = vMax
+
+          // same interval each time
+          let step = (vMax - vMin) / nBins
+
+          for (var k=vMin ; k < vMax ; k += step ){
+            legendRefTicks.push(k)
          }
-        else if (TW.conf.legendsBins) {
-          nBins = TW.conf.legendsBins
+          // NB these ticks are *minimums* so we stop one step *before* vMax
+          //    and simply include it in last interval
        }

+        else if (binningMode == 'samepop') {
          // create tick thresholds
          for (var l=0 ; l < nBins ; l++) {
            let nthVal = Math.floor(len * l / nBins)
            legendRefTicks.push(valuesIdx[cat][at].vals[nthVal])
          }
+        }

-        if (TW.conf.debug.logFacets)    console.debug("intervals for", at, legendRefTicks)
+        if (TW.conf.debug.logFacets)    console.debug("intervals for", at, legendRefTicks, "(list of minima)")

-        var nTicks = legendRefTicks.length
+        // the unique-d array will allow us to group ranges
        var sortedDistinctVals = Object.keys(valuesIdx[cat][at].map).sort(function(a,b){return Number(a)-Number(b)})

+        var nTicks = legendRefTicks.length
+
        var nDistinctVals = sortedDistinctVals.length
        var lastCursor = 0

@@ -361,15 +397,21 @@ function facetsBinning (valuesIdx, Atts_2_Exclude) {

          let lowThres = Number(legendRefTicks[l])
          let hiThres = null
+
          if (l < nTicks-1) {
            hiThres = Number(legendRefTicks[l+1])
          }
-          else {
+          else if (binningMode == 'samepop') {
            hiThres = Infinity
          }
+          else {
+            // in 'samerange' mode
+            hiThres = lastUpperBound
+          }

          var newTick = {
            'labl':'',
+            'fullLabl':'',
            'nids':[],
            'range':[lowThres, hiThres]
          }
@@ -393,24 +435,52 @@ function facetsBinning (valuesIdx, Atts_2_Exclude) {
              }
            }
            // we're over the interval upper bound
-            // we just need to remember where we were for next interval
            else if (val >= hiThres) {
+
+              // normal case
+              if (binningMode != 'samerange' || l != nTicks-1 ) {
+                // we just need to remember where we were for next interval
                lastCursor = k
                break
              }
+
+              // samerange && last interval case: inclusive last interval upper bound
+              else {
+                for (var j in valuesIdx[cat][at].map[val]) {
+                  newTick.nids.push(valuesIdx[cat][at].map[val][j])
+                }
+              }
+
+            }
          }

          // create label
-          // round %.6f for display
-          var labLowThres = Math.round(lowThres*1000000)/1000000
-          var labHiThres = (l==nTicks-1)? '+ ∞' : Math.round(hiThres*1000000)/1000000
-          newTick.labl = `${cat}||${at}||[${labLowThres} ; ${labHiThres}]`
+          // round %.3f for display
+          var labLowThres = Math.round(lowThres*1000)/1000
+          var labHiThres = ''
+          var bracket = '['
+
+          if (l < nTicks-1) {
+            labHiThres = Math.round(hiThres*1000)/1000
+          }
+          // last bound is +Inf if samepop
+          else if (binningMode == 'samepop') {
+            labHiThres = '+ ∞'
+          }
+          else if (binningMode == 'samerange') {
+            labHiThres = Math.round(hiThres*1000)/1000
+            bracket = ']'
+          }
+
+          newTick.labl = `[${labLowThres} ; ${labHiThres}${bracket} (${newTick.nids.length})`
+          newTick.fullLabl = `${cat}||${at}||[${labLowThres} ; ${labHiThres}${bracket} (${newTick.nids.length})`

          // save these bins as the cluster index (aka faceting)
          if (newTick.nids.length) {
            facetIdx[cat][at].push(newTick)
          }
        }
+
      }
    }

@@ -453,9 +523,10 @@ function dictfyGexf( gexf , categories ){
    }

    var declaredAtts = gexfCheckAttributesMap(gexf)
-    var nodesAttributes = declaredAtts.nAttrs
+    var nodesAttributes = declaredAtts.nodeAttrs
    // var edgesAttributes = declaredAtts.eAttrs

+
    var elsNodes = gexf.getElementsByTagName('nodes') // The list of xml nodes 'nodes' (plural)
    TW.labels = [];

@@ -473,7 +544,6 @@ function dictfyGexf( gexf , categories ){
                            // (to inventory subclasses for a given attr)
                            //   if < maxDiscreteValues: keep all in legend
                            //   else:  show intervals in legend
-    var Atts_2_Exclude = {} // to exclude strings that don't convert to number

    // usually there is only 1 <nodes> element...
    for(i=0; i<elsNodes.length; i++) {
@@ -558,7 +628,9 @@ function dictfyGexf( gexf , categories ){
                var attr = attvalueNode.getAttribute('for');
                var val = attvalueNode.getAttribute('value');

-                if(nodesAttributes[attr]) attr = atts[nodesAttributes[attr]]=val
+                if(! isUndef(nodesAttributes[attr])) {
+                  atts[nodesAttributes[attr].title]=val
+                }
                else atts[attr]=val;
            }
            node.attributes = atts;
@@ -597,14 +669,24 @@ function dictfyGexf( gexf , categories ){
            // console.debug("node.attributes", node.attributes)
            // creating a faceted index from node.attributes
            if (TW.conf.scanClusters) {
-              [tmpVals, Atts_2_Exclude] = updateValueFacets(tmpVals, Atts_2_Exclude, node)
+
+              tmpVals = updateValueFacets(tmpVals, node)
            }

        } // finish nodes loop
    }

    // console.warn ('parseCustom output nodes', nodes)
-    // console.warn ('parseCustom inverted index: vals to srcType', tmpVals)
+
+
+
+
+
+    console.warn ('parseCustom inverted index: vals to srcType', tmpVals)
+
+
+
+

    // -------------- debug: for local stats ----------------
    // allSizes.sort();
@@ -619,8 +701,7 @@ function dictfyGexf( gexf , categories ){


    // clusters and other facets => type => name => [{label,val/range,nodeids}]
-    // £TODO finish changes to Atts_2_Exclude from 69e7c039 (new specif: dtype str is accepted for classes)
-    TW.Clusters = facetsBinning(tmpVals, Atts_2_Exclude)
+    TW.Clusters = facetsBinning(tmpVals)


    // linear rescale node sizes
@@ -754,34 +835,42 @@ function updateRelations(typedRelations, edgeCateg, srcId, tgtId){


 // To fill the reverse map: values => nodeids of a given type
-function updateValueFacets(facetIdx, Atts_2_Exclude, aNode) {
+function updateValueFacets(facetIdx, aNode) {

  if (!facetIdx[aNode.type])      facetIdx[aNode.type]={}
  for (var at in aNode.attributes) {
-    if (!facetIdx[aNode.type][at])  facetIdx[aNode.type][at]={'vals':[],'map':{}}
+    let val = aNode.attributes[at]

-    let castVal = Number(aNode.attributes[at])
-    // Identifying the attribute datatype: exclude strings and objects
-    // if ( isNaN(castVal) ) {
-    //     if (!Atts_2_Exclude[at]) Atts_2_Exclude[at]=true;
-    //
-    //     // TODO: this old Atts_2_Exclude strategy could be replaced,
-    //     //       not to exclude but to store the datatype somewhere like facetIdx[aNode.type][at].dtype
-    //     //  => the datatype would be a condition (no bins if not numeric, etc.)
-    //     //  => it would also allow to index text values (eg country, affiliation, etc.)
-    //     //     with the strategy "most frequent distinct values" + "others"
-    //     //     which would be useful (eg country, affiliation, etc.) !!!
-    //
-    // }
-    // numeric attr => build facets
-    // else {
-      if (!facetIdx[aNode.type][at].map[castVal]) facetIdx[aNode.type][at].map[castVal] = []
+    if (!facetIdx[aNode.type][at])  facetIdx[aNode.type][at]={'vals':[],'map':{}, 'vtypes': {'vstr':0, 'vnum':0}}

-      facetIdx[aNode.type][at].vals.push(castVal)      // for ordered scale
-      facetIdx[aNode.type][at].map[castVal].push(aNode.id)  // inverted index
-    // }
+    // shortcut
+    var indx = facetIdx[aNode.type][at]
+
+    // determine observed type of this single value
+    let castVal = Number(val)
+
+    // this discovered datatype will be a condition (no bins if not numeric)
+    if (isNaN(castVal)) {
+      indx.vtypes["vstr"]++
    }
-  return [facetIdx, Atts_2_Exclude]
+    else {
+      indx.vtypes["vnum"]++
+      val = castVal           // we keep it as number
+    }
+
+    if (!indx.map[val]) indx.map[val] = []
+
+    indx.vals.push(val)               // for ordered scale
+    indx.map[val].push(aNode.id)      // inverted index
+
+
+    // POSSIBLE with the discovered datatype
+    //  => it would also allow to index text values (eg country, affiliation, etc.)
+    //     with the strategy "most frequent distinct values" + "others"
+    //     which would be useful (eg country, affiliation, etc.) !!!
+
+  }
+  return facetIdx
 }


@@ -919,7 +1008,6 @@ function dictfyJSON( data , categories ) {

    // if scanClusters, we'll also use:
    var tmpVals = {}
-    var Atts_2_Exclude = {}

    for(var nid in data.nodes) {
        let n = data.nodes[nid];
@@ -958,14 +1046,14 @@ function dictfyJSON( data , categories ) {

        // creating a faceted index from node.attributes
        if (TW.scanClusters) {
-          [tmpVals, Atts_2_Exclude] = updateValueFacets(tmpVals, Atts_2_Exclude, node)
+          tmpVals = updateValueFacets(tmpVals, node)
        }
    }

    // test: json with string facet (eg lab affiliation in comex)
    console.log(tmpVals['Document'])

-    TW.Clusters = facetsBinning (tmpVals, Atts_2_Exclude)
+    TW.Clusters = facetsBinning (tmpVals)

    // £TODO ask if wanted
    // if we wanted linear rescale node sizes like dictfyGexf: