php part of new db.json spec

php requires less initial url parameters and reads every variable from db.json conf (values for columns to index and to search in, and the db table itself)... + documentation on the same ; NB: the only things that stayed hardcoded are the non-changing output tables from cortext-style dbs

php part of new db.json spec
php requires less initial url parameters and reads every variable from db.json conf (values for columns to index and to search in, and the db table itself)... + documentation on the same ; NB: the only things that stayed hardcoded are the non-changing output tables from cortext-style dbs
9fbbbd43 · Romain Loth · 673e1d53 · 9fbbbd43 · 9fbbbd43 · 9fbbbd43
Commit 9fbbbd43 authored Jul 05, 2017 by Romain Loth
10 changed files
--- a/00.DOCUMENTATION/A-Introduction/servermenu_config.md
+++ b/00.DOCUMENTATION/A-Introduction/servermenu_config.md
 ## Servermenu Configuration

+The servermenu file associates some metadata to each graph file.

-#### minimal config
+It is used if `sourcemode="servermenu"` is found in the url params or the settings file.
+
+By default, the file is called `./db.json` (this can be modified in settings under `TW.conf.paths.sourceMenu`)
+
+The `db.json` file of this distribution contains many examples.
+
+------------------------------------------------------
+#### Minimal Config
+
+One minimal servermenu entry contains:
+  - a data dir path (**the project**)
+  - for each project: a list of graph files subpaths (**the graph source**)
+  - for each graph file: a list of expected node types starting by 'node0' (**the nodetypes**)
+
+```json
+  "$$data/yourprojectdir": {
+    "graphs": {
+      "$$graph_source.gexf":{
+        "node0": {"name": "$$a_typename_of_nodes"}
+      },
+      "$$another_graph_source.json":{
+        "node0": {"name": "$$a_typename_of_nodes"}
+      }
+    }
+  }
+```
+The value **typename_of_nodes** should match the `type` or `category` attribute value of your nodes in the source gexf or json. It acts as a filter specifying the nodes that will be displayed in the ProjectExplorer GUI.
+
+##### For a bipartite graph
+If the source file has 2 types of nodes, the config should look like this:
 ```json
  "$$data/yourprojectdir": {
    "graphs": {
-      "$$something.gexf":{
-        "node0": {"name": "$$blabla"}
+      "$$source_file.ext":{
+        "node0": {"name": "$$typename_of_term_nodes"},
+        "node1": {"name": "$$typename_of_context_nodes"}
      }
    }
  }
 ```

+NB: giving an empty string value to `node1.name` property will group all other found types in an "other" category.
+
+Having a node0.name entry and optionally a node1.name is enough to display the graph.

+###### Real life example
+```json
+"data/comexjsons": {
+  "graphs": {
+    "graph_example.json": {
+      "node0": { "name": "NGram" },
+      "node1": { "name": "Document" }
+    }
+  }
+}
+```

-#### to activate relatedDocs LocalDB queries
+------------------------------------------------------
+#### Activating relatedDocs LocalDB queries

-For a relatedDocs query, you need to add to your node entry the `reldbfile` key:
+The servermenu file also allows configuration of associated queries for selected node(s): **relatedDocs**

+To enable it, you need to add to your node entry the `reldbfile` key:

 ```json
  "node0": {
@@ -25,3 +72,47 @@ For a relatedDocs query, you need to add to your node entry the `reldbfile` key:
    "reldbfile": "$$relpath/to/csv/or/sqlite"
  }
 ```
+
+The presence of this property `reldbfile` makes the API usable in db.json.
+
+##### More relatedDocs settings
+In addition, for full configuration, the following entries can be set under node0 or node1.
+
+###### => for a CSV doc-by-doc table
+Expected type is `"csv"` and you should fill the columns to search in.
+```json
+"reldbtype": "csv",
+"reldbqcols": ["list", "of", "columns", "to", "search", "in", "for", "node0"]
+```
+
+###### Real life example
+```json
+"data/gargistex": {
+    "first": "shale_and_ice.gexf",
+    "graphs": {
+      "shale_and_ice.gexf": {
+        "node0": {
+          "name": "terms",
+          "reldbtype": "csv",
+          "reldbfile": "shale_and_ice.csv",
+          "reldbqcols": ["title", "abstract"]
+        }
+      },
+      "model_calibration.gexf": {
+        "node0": {
+          "name": "terms",
+          "reldbtype": "csv",
+          "reldbfile": "model_calibration.csv",
+          "reldbqcols": ["title", "abstract"]
+        }
+      }
+    }
+}
+```
+
+###### => for CortextDB SQL tables
+Expected type is `"CortextDB"` and you should fill the table to search in.
+```json
+"reldbtype": "CortextDB",
+"reldbqtable": []
+```
--- a/db.json
+++ b/db.json
@@ -71,7 +71,7 @@
      "Maps_S_800.gexf": {
          "node0": {
            "name": "ISItermsWhitelistV2Oct_5 &amp; ISItermsWhitelistV2Oct_5",
-            "reldbtable": "ISItermsWhitelistV2Oct_5",
+            "reldbqtable": "ISItermsWhitelistV2Oct_5",
            "reldbfile" : "wos_climate-change_title_2014-2015.db",
            "reldbtype": "CortextDB"
          }

--- a/settings_explorerjs.js
+++ b/settings_explorerjs.js
@@ -19,12 +19,6 @@ TW.conf = (function(TW){
  // the graph input depends on TWConf.sourcemode (or manual url arg 'sourcemode')
  TWConf.sourcemode = "servermenu"   // accepted: "api" | "serverfile" | "servermenu" | "localfile"

-  // server-side .gexf|.json default source
-  TWConf.sourceFile = ""
-
-  // ...or server-side gexf default source list
-  TWConf.sourceMenu = "db.json"
-
  // ...or remote bridge to default source api ajax queries
  TWConf.sourceAPI={};
  TWConf.sourceAPI["forNormalQuery"] = "services/api/graph";
@@ -165,7 +159,9 @@ TW.conf = (function(TW){
  // ------------
  TWConf.paths = {
    'ourlibs': 'twlibs',
-    'modules': 'twmodules'
+    'modules': 'twmodules',
+    'sourceFile': "",           // server-side .gexf|.json default source
+    'sourceMenu': "db.json"     // ...or server-side gexf default source list
  }
  Object.freeze(TWConf.paths)  // /!\ to prevent path modification before load


--- a/twbackends/phpAPI/csv_indexation.php
+++ b/twbackends/phpAPI/csv_indexation.php
@@ -55,14 +55,14 @@ function parse_and_index_csv($filename, $typed_cols_to_index, $separator, $quote
          $base[$rowid][$colname] = $line_fields[$c];

          // fill our search index if the type+col was asked in postings
-          foreach (['semantic', 'social'] as $swtype){
-            if (array_key_exists($swtype, $postings)) {
-              if (array_key_exists($colname, $postings[$swtype])) {
+          for ($ndtypeid = 0 ; $ndtypeid < $GLOBALS["ntypes"] ; $ndtypeid++) {
+            if (array_key_exists($ndtypeid, $postings)) {
+              if (array_key_exists($colname, $postings[$ndtypeid])) {
                // basic tokenisation (TODO specify tokenisation delimiters etc.)
                $tokens = preg_split("/\W/", $line_fields[$c]);

                // for debug
-                // echo("indexing column:".$colname." under type:".$swtype.'<br>');
+                // echo("indexing column:".$colname." under type:".$ndtypeid.'<br>');
                // var_dump($tokens);

                foreach($tokens as $tok) {
@@ -70,16 +70,16 @@ function parse_and_index_csv($filename, $typed_cols_to_index, $separator, $quote
                  if (strlen($tok)) {

                    // POSS : stopwords could be used here
-                    if (! array_key_exists($tok, $postings[$swtype][$colname])) {
-                      $postings[$swtype][$colname][$tok] = array();
+                    if (! array_key_exists($tok, $postings[$ndtypeid][$colname])) {
+                      $postings[$ndtypeid][$colname][$tok] = array();
                    }
                    // in a csv, rowid is a pointer to the document
-                    if (array_key_exists($docid, $postings[$swtype][$colname][$tok])) {
+                    if (array_key_exists($docid, $postings[$ndtypeid][$colname][$tok])) {
                      // we keep the frequencies
-                      $postings[$swtype][$colname][$tok][$docid]++ ;
+                      $postings[$ndtypeid][$colname][$tok][$docid]++ ;
                    }
                    else {
-                      $postings[$swtype][$colname][$tok][$docid] = 1;
+                      $postings[$ndtypeid][$colname][$tok][$docid] = 1;
                    }
                  }
                }

--- a/twbackends/phpAPI/default_div.php
+++ b/twbackends/phpAPI/default_div.php
 <?php

 // default informations
-$TITLE="ISITITLE";              // <=== hardcoded Cortext table /!\
 $query = str_replace( '__and__', '&', $_GET["query"] );
 $elems = json_decode($query);

-// hardcoded CortextDB table /!\

-$table = "";
-$column = "";
-$id="";
+// the table used as search perimeter is from db.json conf
+$table = $my_conf[$ntid]['reldbqtable'] ;

-// echo("count(elems): ".count($elems)."<br/>");
-// echo("elems[0]: ".$elems[0]."<br/>");
-// echo("is_array($elems): ".is_array($elems)."<br/>");
+// values for CortextDB that seem to never change: /!\ hardcoded here /!\
+// the column accessors
+$column = "data";
+$id = "id";

-if($ndtype=="social"){
-  $table = "ISIAUTHOR";       // <== hardcoded CortextDB table /!\
-  $column = "data";
-  $id = "id";
-  $restriction='';
-  $factor=10;// factor for normalisation of stars
-}
+// the output tables
+$author_table = "ISIAUTHOR";
+$titles_table = "ISITITLE";

-if($ndtype=="semantic"){
-  $table = $_GET["index"];//"ISItermsfirstindexing";
-  $column = "data";
-  $id = "id";
-  $restriction='';
-  $factor=10;
-}
+$factor=10;// factor for normalisation of stars
 $restriction='';

+
 $sql="";
 //////////
 if (count($elems)==1){// un seul mot est sélectionné, on compte les mots multiples
@@ -105,8 +94,7 @@ foreach ($wos_ids as $id => $score) {
 			if ($count<=$max_item_displayed){
 				$count+=1;

-        // hardcoded CortextDB table /!\
-				$sql = 'SELECT data FROM ISITITLE WHERE id='.$id.' group by data';
+				$sql = 'SELECT data FROM '.$titles_table.' WHERE id='.$id.' group by data';

 				foreach ($base->query($sql) as $row) {
 					$external_link="<a href=http://google.com/webhp?#q=".urlencode('"'.$row['data'].'"')." target=blank>".' <img width=15px src="'.$our_libs_root.'/img/google.png"></a>';
@@ -115,8 +103,8 @@ foreach ($wos_ids as $id => $score) {
 					$output.='<a href="JavaScript:newPopup(\''.$our_php_root.'/default_doc_details.php?gexf='.urlencode($gexf).'&index='.$table.'&query='.urlencode($query).'&type='.urlencode($_GET["type"]).'&id='.$id.'	\')">'.$row['data']." </a> ";
 				}

-				// get the authors /!\ hardcoded CortextDB table /!\
-				$sql = 'SELECT data FROM ISIAUTHOR WHERE id='.$id;
+				// get the authors
+				$sql = 'SELECT data FROM '.$author_table.' WHERE id='.$id;
 				foreach ($base->query($sql) as $row) {
 					$output.=($row['data']).', ';
 				}

--- a/twbackends/phpAPI/info_div.php
+++ b/twbackends/phpAPI/info_div.php
@@ -7,24 +7,50 @@ ini_set('display_startup_errors',1);
 // exemple call:
 // http://blabla/LOCALDB/info_div.php?type=semantic&bi=0&query=[%22Monte%20Carlo%22]&gexf=%22line/AXA/RiskV2PageRank1000.gexf%22&index=ISItermsAxa_2015

+include('tools.php');
 include('parameters_details.php');

-if ($_GET['dbtype'] == "CortextDB") {
-  $base = new PDO("sqlite:".$mainpath.$graphdb);
-  include('default_div.php');
+$dbtype = null;
+if (array_key_exists('reldbtype', $my_conf[$ntid])) {
+  $dbtype = $my_conf[$ntid]['reldbtype'];
 }
-
 else {
-  // to index: the "searchable columns"
-  if (! array_key_exists('toindex', $_GET)) {
-    echo('<br> info_div.php (csv mode): please provide columns to index <br>');
+  $guess_src = '';
+  if (array_key_exists('dbtype', $_GET))  {
+    $dbtype = $_GET['dbtype'];
+    $guess_src = "via url parameters";
  }
  else {
-    $idxcolsbytype = json_decode($_GET['toindex']);
+    $dbtype = 'csv'; // new default
+    $guess_src = "by default";

-    // echodump("columns to index",$idxcolsbytype);
+  }
+  errmsg("not filled", "$gexf -> node$ntid -> 'reldbtype'", "...Assuming dbtype is $dbtype ($guess_src).");
+}


+if ($dbtype == "CortextDB") {
+  $base = new PDO("sqlite:".$mainpath.$graphdb);
+  include('default_div.php');
+}
+
+else {
+  // to index: the union of "searchable columns" qcols for all nodetypes
+  $idxcolsbytype = [];
+  for ($i = 0; $i < $ntypes ; $i++) {
+    if ($my_conf[$i]['active']) {
+      $idxcolsbytype[$i] = [];
+      $idxcolsbytype[$i] = $my_conf[$i]['reldbqcols'];
+    }
+    // else {
+    //   echo("no nodetype ".$i."<br>");
+    // }
+  }
+
+  if (! $idxcolsbytype) {
+    echo('<br> info_div.php (csv mode): please provide reldbqcols param in db.json <br>');
+  }
+  else {
    // DO THE INDEXATION (or RETRIEVE CACHED ONE)
    // we use cache if memcached is present (and if we indexed the csv already)
    include('csv_indexation.php');
@@ -68,7 +94,7 @@ else {

    // DO THE SEARCH
    // -------------
-    $searchcols = json_decode($_GET['searchin']);
+    $searchcols = $my_conf[$ntid]['reldbqcols'];

    // a - split the query
    $qtokens = preg_split('/\W/', $_GET["query"]);
@@ -93,7 +119,7 @@ else {
      for ($l=0 ; $l < count($searchcols) ; $l++) {

        // set of values we could find a match in
-        $searchable = $postings[$_GET['type']][$searchcols[$l]];
+        $searchable = $postings[$ntid][$searchcols[$l]];

        if (array_key_exists($tok, $searchable)) {


--- a/twbackends/phpAPI/parameters_details.php
+++ b/twbackends/phpAPI/parameters_details.php
@@ -32,60 +32,8 @@ $memport = 11211;

 // CONFIGURATION PARAMS
 // --------------------
-// reading db.json associations
-//    source graph file <=> (db, dbtype, cols) as relatedDocs php API
-$project_menu_fh = fopen($mainpath.$project_menu_path, "r");
-$json_st = '';
-while (!feof($project_menu_fh)) {
-  $json_st .= fgets($project_menu_fh);
-}
-fclose($project_menu_fh);
-
-$project_menu = json_decode($json_st);
-
-// echodump("== db.json menu ==", $project_menu);
-
 // parse db.json project menu and create a conf by file
-$conf = array();
-foreach ($project_menu as $project_dir => $dir_items){
-  // NB access by obj property (and not array key)
-  if (! property_exists($dir_items, 'graphs')) {
-    error_log("tw/phpAPI skip error: conf file ($project_menu_path)
-               has no 'graphs' entry for project '$project_dir' !");
-    continue;
-  }
-  foreach ($dir_items->graphs as $graph_file => $graph_conf){
-
-    // echodump("== $graph_file ==", $graph_conf);
-
-    $gpath = $project_dir.'/'.$graph_file;
-
-    // NB a graph conf can now have different settings for each nodetype
-    // node0 <=> classic type 'semantic'
-    // node1 <=> classic type 'social'
-
-    $conf[$gpath] = array($ntypes);
-
-    for ($i = 0 ; $i < $ntypes ; $i++) {
-      // check node0, node1, etc to see if they at least have a reldbfile
-      if (! property_exists($graph_conf, 'node'.$i)
-          || ! property_exists($graph_conf->{'node'.$i}, 'reldbfile') ) {
-        $conf[$gpath][$i] = array('active' => false);
-        continue;
-      }
-      else {
-        // we have a file for this type: copy entire conf
-        $conf[$gpath][$i] = (array)$graph_conf->{'node'.$i};
-
-        $conf[$gpath][$i]['active'] = true;
-        $conf[$gpath][$i]['dir'] = $project_dir;
-      }
-      // POSS here info on higher level may be propagated for lower ones
-      //     (ex: if dbtype is on the project level, its value should count
-      //          for each source file in the project unless overridden)
-    }
-  }
-}
+$conf = read_conf($mainpath.$project_menu_path, $ntypes);

 // =======================================
 // echodump("== READ CONF ==<br>", $conf);
@@ -101,13 +49,12 @@ if ($ndtype == 'semantic') {  $ntid = 0;  }
 else                       {  $ntid = 1;  }

 if (! $conf[$gexf][$ntid]['active']) {
-  echo("The relatedDocs configuration for your graph ($gexf) isn't active
-  (please read 00.DOCUMENTATION/A-Introduction/servermenu_config.md).<br>");
+  errmsg("not active", "your graph ($gexf)");
  exit(1);
 }
 else {
-  $my_conf = $conf[$gexf][$ntid];
-  $graphdb = $my_conf['dir'].'/'.$my_conf['reldbfile'];
+  $my_conf = $conf[$gexf];
+  $graphdb = $my_conf[$ntid]['dir'].'/'.$my_conf[$ntid]['reldbfile'];
 }

 // echodump("params: reldb", $graphdb);

--- a/twbackends/phpAPI/tools.php
+++ b/twbackends/phpAPI/tools.php
@@ -15,6 +15,60 @@ function errmsg($message, $context, $more = "") {
  (please read A-Introduction/servermenu_config.md).<br>$more</p>";
 }

+// reading db.json associations
+//    source graph file <=> (db, dbtype, cols) as relatedDocs php API
+function read_conf($filepath, $ntypes) {
+  $project_menu_fh = fopen($filepath, "r");
+  $json_st = '';
+  while (!feof($project_menu_fh)) {
+    $json_st .= fgets($project_menu_fh);
+  }
+  fclose($project_menu_fh);
+  $project_menu = json_decode($json_st);
+
+  // echodump("== db.json menu ==", $project_menu);
+
+  $conf = array();
+  foreach ($project_menu as $project_dir => $dir_items){
+    // NB access by obj property (and not array key)
+    if (! property_exists($dir_items, 'graphs')) {
+      error_log("tw/phpAPI skip error: conf file ($project_menu_path)
+                 has no 'graphs' entry for project '$project_dir' !");
+      continue;
+    }
+    foreach ($dir_items->graphs as $graph_file => $graph_conf){
+      // echodump("== $graph_file ==", $graph_conf);
+
+      $gpath = $project_dir.'/'.$graph_file;
+
+      // NB a graph conf can now have different settings for each nodetype
+      // node0 <=> classic type 'semantic'
+      // node1 <=> classic type 'social'
+
+      $conf[$gpath] = array($ntypes);
+
+      for ($i = 0 ; $i < $ntypes ; $i++) {
+        // check node0, node1, etc to see if they at least have a reldbfile
+        if (! property_exists($graph_conf, 'node'.$i)
+            || ! property_exists($graph_conf->{'node'.$i}, 'reldbfile') ) {
+          $conf[$gpath][$i] = array('active' => false);
+          continue;
+        }
+        else {
+          // we have a file for this type: copy entire conf
+          $conf[$gpath][$i] = (array)$graph_conf->{'node'.$i};
+
+          $conf[$gpath][$i]['active'] = true;
+          $conf[$gpath][$i]['dir'] = $project_dir;
+        }
+        // POSS here info on higher level may be propagated for lower ones
+        //     (ex: if dbtype is on the project level, its value should count
+        //          for each source file in the project unless overridden)
+      }
+    }
+  }
+  return $conf;
+}

 function imagestar($score,$factor,$static_libs) {
 // produit le html des images de score

--- a/twmain/extras_explorerjs.js
+++ b/twmain/extras_explorerjs.js
@@ -468,32 +468,7 @@ function topPapersFetcher(swType, qWords, priorHtml, cbNext){
      //          or 'csv' (like gargantext exports)

      // POSS object + join.map(join)
-      let urlParams = "type="+swType+"&query="+joinedQ+"&gexf="+TW.File+"&n="+TW.conf.relatedDocsMax+"&dbtype="+thisRelDocsConf.reldbtype
-
-      if (thisRelDocsConf.reldbtype == "CortextDB") {
-        var qIndex = thisRelDocsConf.reldbtable    // a table
-        urlParams += `&index=${qIndex}`
-      }
-      else {
-        // a list of csv columns to search in
-        // ex: for semantic nodes matching we look in 'title', 'keywords' cols
-        //     for social nodes matching we look in 'authors' col... etc.
-        let joinedSearchCols = JSON.stringify(thisRelDocsConf.reldbqcols)
-        urlParams += `&searchin=${joinedSearchCols}`
-
-        // HIGHER LEVEL SCOPE (whole indexation directive) WILL BE MOVED TO PHP
-        let allCols = {}
-
-        if (TW.gmenuInfos[TW.File][0])
-          allCols.semantic = TW.gmenuInfos[TW.File][0].reldbqcols
-
-        if (TW.gmenuInfos[TW.File][1])
-          allCols.social = TW.gmenuInfos[TW.File][1].reldbqcols
-
-        let joinedAllCols = JSON.stringify(allCols)
-        urlParams += `&toindex=${joinedAllCols}`
-        // POSS use a direct access from php to db.json to avoid toindex
-      }
+      let urlParams = "type="+swType+"&query="+joinedQ+"&gexf="+TW.File+"&n="+TW.conf.relatedDocsMax ;

      $.ajax({
          type: 'GET',

--- a/twmain/main.js
+++ b/twmain/main.js
@@ -215,7 +215,7 @@ function syncRemoteGraphData () {
  // cases            (2)       and     (3) : we'll read a file from server
  // sourcemode == "serverfile" or "servermenu" (several files with <select>)
  else {
-    console.log("input case: server-side file, using TW.conf.sourceMenu or getUrlParam.file or TW.conf.sourceFile")
+    console.log("input case: server-side file, using TW.conf.paths.sourceMenu or getUrlParam.file or TW.conf.paths.sourceFile")

    // -> @mode is servermenu, files are listed in db.json file (preRes ajax)
    //      --> if @file also in url, choose the db.json one matching
@@ -223,13 +223,13 @@ function syncRemoteGraphData () {

    // -> @mode is serverfile
    //      -> gexf file path is in the urlparam @file
-    //      -> gexf file path is already specified in TW.conf.sourceFile
+    //      -> gexf file path is already specified in TW.conf.paths.sourceFile

    // menufile case : a list of source files in ./db.json
    if (sourcemode == 'servermenu') {
-        console.log("reading from FILEMENU TW.conf.sourceMenu")
+        console.log("reading from FILEMENU TW.conf.paths.sourceMenu")
        // we'll first retrieve the menu of available files in db.json, then get the real data in a second ajax
-        var infofile = TW.conf.sourceMenu
+        var infofile = TW.conf.paths.sourceMenu

        if (TW.conf.debug.logFetchers)  console.info(`attempting to load filemenu ${infofile}`)
        var preRES = AjaxSync({ url: infofile, datatype:"json" });
@@ -307,12 +307,12 @@ function syncRemoteGraphData () {
      TW.File = getUrlParam.file
    }
    // direct file fallback case: specified file in settings_explorer
-    else if (TW.conf.sourceFile && linkCheck(TW.conf.sourceFile)) {
+    else if (TW.conf.paths.sourceFile && linkCheck(TW.conf.paths.sourceFile)) {
      console.log("no @file arg: trying TW.conf.sourceFile from settings")
-      TW.File = TW.conf.sourceFile;
+      TW.File = TW.conf.paths.sourceFile;
    }
    else {
-      console.error(`No specified input and neither db.json nor TW.conf.sourceFile ${TW.conf.sourceFile} are present`)
+      console.error(`No specified input and neither db.json nor TW.conf.paths.sourceFile ${TW.conf.paths.sourceFile} are present`)
    }

    var finalRes = AjaxSync({ url: TW.File });