Commit 96e3608a authored by Romain Loth's avatar Romain Loth

'majority_type': smarter defaults when stipulated types for node0 and node1 are not found

parents fb0485ac 7b091348
......@@ -193,7 +193,10 @@ function scanGexf(gexfContent) {
attr = declaredAttrs.nodeAttrs[attr].title
// THIS WILL BECOME catDict (if ncats == 1 => monopart)
if (attr=="category") categoriesDict[val]=val;
if (attr=="category" || attr=="type") {
if (!categoriesDict[val]) categoriesDict[val] = 0
categoriesDict[val]++;
}
}
}
}
......@@ -211,40 +214,58 @@ function scanGexf(gexfContent) {
// ex: ISItermsriskV2_140 & ISItermsriskV2_140
function sortNodeTypes(observedTypesDict) {
var observedTypes = Object.keys(observedTypesDict)
observedTypes.sort(function(a,b) {return observedTypesDict[b] - observedTypesDict[a]})
var newcats = []
var catDict = {}
var nTypes = observedTypes.length
if(nTypes==0) {
observedTypes[0]="Terms";
newcats[0]="Terms";
catDict["Terms"] = 0;
}
if(nTypes==1) {
// if we have only one category, it gets code 0 as Terms
newcats[0] = observedTypes[0]
catDict[observedTypes[0]] = 0;
if (TW.conf.debug.logParsers)
console.log(`cat unique (${observedTypes[0]}) =>0`)
}
if(nTypes>1) {
var newcats = []
// allows multiple node types, with an "all the rest" node1
// NB: only 2 cat labels are allowed by this
// try stipulated cats, then fallbacks
if (observedTypesDict[TW.conf.catSem]) {
newcats[0] = TW.conf.catSem;
catDict[TW.conf.catSem] = 0;
}
if (observedTypesDict[TW.conf.catSoc]) {
newcats[1] = TW.conf.catSoc;
catDict[TW.conf.catSoc] = 1;
}
// NB: type for nodes0 will be the majoritary by default, unless taken
if (!newcats[0]) {
if (observedTypes[0] != newcats[1])
newcats[0] = observedTypes[0] // 0 is the most frequent here
else
newcats[0] = observedTypes[1] // 1 is second most frequent
}
// all the rest
for(var i in observedTypes) {
let c = observedTypes[i]
if(c == TW.conf.catSoc) {// conf says that it's not a term-category
newcats[1] = c;
catDict[c] = 1;
}
// else: term-category is the new default
else {
newcats[0] = c;
catDict[c] = 0;
}
let c = observedTypes[i]
// or c is in "all the rest" group
// (POSS extend to multitypes)
if (c != newcats[0] && c != newcats[1]) {
if (!newcats[1]) newcats[1] = c;
else newcats[1] += '/'+c
catDict[c] = 1;
}
}
observedTypes = newcats;
}
return {'categories': observedTypes, 'lookup_dict': catDict}
return {'categories': newcats, 'lookup_dict': catDict}
}
......@@ -990,8 +1011,11 @@ function scanJSON( data ) {
var nodes = data.nodes;
for(var i in nodes) {
let n = nodes[i];
if(n.type) categoriesDict[n.type]=n.type;
let ntype = nodes[i].type;
if(ntype) {
if (!categoriesDict[ntype]) categoriesDict[ntype] = 0
categoriesDict[ntype]++;
}
}
// sorting observed json node types into Sem (=> 1)/Soc (=> 0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment