adapt dbdatapi for new org tables (+ gui autocomplete 1/2)

72859dbf · Romain Loth · 9fb58f01 · 72859dbf · 72859dbf · 72859dbf
Commit 72859dbf authored Mar 14, 2017 by Romain Loth
8 changed files
--- a/php_library/directory_content.php
+++ b/php_library/directory_content.php
@@ -189,34 +189,42 @@ $loop = 0;


 // all lab orgids except _NULL
-$ids_str = implode(',', array_keys($lab_counts));
-
-// print_r("all lab ids here:");
-// print_r($ids_str);
-// print_r("<br/>");
+$lab_ids = array_filter(array_keys($lab_counts));
+sort($lab_ids);

+// all lab infos to retrieve
 $labs = array();

-// normal query would be enough for everything except parent org
-// POSS page the request in nb of ids >= mysql technical limit for IN
-// $sql = 'SELECT * FROM orgs WHERE orgid IN ('.$ids_str.') ORDER BY name, acro' ;
-
-// variant with parent org
-// unique org1 (=> unique pairs (sch_org => sch_org2)
-//              => org2 info)
-//
-// it's much longer in code but fast because of indexes
-//
-// a POSS alternative would be to
-//        create an org_org table
-//        at record time
-
-$sql = <<< LABSQLEXTENDED
-SELECT orgs.*,
+// paging
+$step = 2000;
+$n_steps = ceil(count($lab_ids)/$step);
+
+for($i = 0; $i < $n_steps; $i++) {
+    $batch = array_slice($lab_ids, $step * $i, $step);
+    $ids_str = implode(',', $batch);
+
+    // print_r("<br>step: ".$i." / ids_str".$ids_str."<br>");
+
+    // normal query would be enough for everything except parent org
+    // POSS page the request in nb of ids >= mysql technical limit for IN
+    // $sql = 'SELECT * FROM orgs WHERE orgid IN ('.$ids_str.')'; // ORDER BY name, acro' ;
+
+    // variant query with parent org
+    // unique org1 (=> unique pairs (sch_org => sch_org2)
+    //              => org2 info)
+    //
+    // it's much longer in code but fast because of indexes
+    //
+    // a POSS alternative would be to
+    //        create an org_org table
+    //        at record time
+    //
+    $sql = <<< LABSQLEXTENDED
+    SELECT orgs.*,
           GROUP_CONCAT( tgt_tostring ORDER BY tgt_freq DESC SEPARATOR '%%%')
            AS related_insts
-FROM orgs
-LEFT JOIN (
+    FROM orgs
+    LEFT JOIN (
        SELECT sch_org.orgid AS src_orgid,
              sch_org2.orgid AS tgt_orgid,
              orgs2.tostring AS tgt_tostring,
@@ -230,15 +238,15 @@ LEFT JOIN (
        AND  sch_org.orgid != sch_org2.orgid
        GROUP BY sch_org.orgid, sch_org2.orgid
        ) AS lab_relationship_to_inst_via_scholars ON src_orgid = orgs.orgid
-WHERE orgs.orgid IN ( {$ids_str} )
-AND orgs.name != '_NULL'
-GROUP BY orgs.orgid
-ORDER BY orgs.name, orgs.acro
+    WHERE orgs.orgid IN ( {$ids_str} )
+    AND orgs.name != '_NULL'
+    GROUP BY orgs.orgid
+    ORDER BY orgs.name, orgs.acro
 LABSQLEXTENDED;

-// print_r($sql);
+    // print_r($sql);

-foreach ($base->query($sql) as $row) {
+    foreach ($base->query($sql) as $row) {
        $info = array();
        $info['unique_id'] = $row['orgid'];

@@ -272,7 +280,13 @@ foreach ($base->query($sql) as $row) {
        }
        // print_r($info);
        $labs[$row['orgid']] = $info;
+
+        // finished batch
+    }
+    // finished all labs
 }
+
+
 //
 // print_r("all labs here:");
 // print_r($labs);
@@ -284,7 +298,7 @@ foreach ($base->query($sql) as $row) {

 // debug
 // $content .= var_dump($all_orga_list) ;
-//
+
 // $organiz = array();
 // sort($all_orga_list);
 // foreach ($all_orga_list as $name) {
@@ -318,7 +332,7 @@ foreach ($base->query($sql) as $row) {
 //     }
 //
 // }
-//
+


 ///////////////////////////////////////////////////////////////

--- a/php_library/stat-prep_from_array.php
+++ b/php_library/stat-prep_from_array.php
@@ -6,17 +6,19 @@
 */


+// parameters : threshold to display orgs (labs / institutions) diagrams
+$MIN_DISTINCT_LABS = 1 ;
+$MIN_DISTINCT_LABS_SCHOLARS_SHARE = .25;

-// paramters : threshold to display orgs (labs / institutions) diagrams
-$MIN_DISTINCT_LABS = 5 ;
-$MIN_DISTINCT_INSTS = 4 ;
+$MIN_DISTINCT_INSTS = 1 ;
+$MIN_DISTINCT_INSTS_SCHOLARS_SHARE = .20;


+
+// main vars
 $country_list = array();
 $position_list = array();
 $title_list = array();
-
-
 // not needed already factorized in lab_counts, inst_counts
 // $labs_list = array();
 // $insts_list = array();
@@ -129,16 +131,18 @@ asort($inst_counts);

 // TODO factorize all this

+// we are creating highcharts' arguments for pie chart
+// eg position_data: data: [["senior researcher",11],["assistant professor",23],["lecturer",25],["engineer",26],["associate professor",28],["student",28],["post-doc",48],["professor",51],["phd student",53],["research director",64],["researcher",68],["Missing data",467],["Others",210]]
+
 // NB escaping: no need to do htmlspeciazlchars($key, ENT_HTML5 | ENT_QUOTES, 'UTF-8'); because the target language is js (doesn't need html entities)

 // données des pays
 $country_data = "data: [";
 foreach ($country_list as $key => $value) {

-        $key = addslashes($key);
-
-        if ($value > min(9, count($country_list) / 10)) {
-            $country_data.='["' . $key . '",' . $value . '],';
+        $thresh = min(9, count($country_list) / 10);
+        if ($value > $thresh) {
+            $country_data.='["' . addslashes($key) . '",' . $value . '],';
        } else {
            $other_country+=$value;
        }
@@ -162,9 +166,9 @@ $country_data.=']';
 // données des position
 $position_data = "data: [";
 foreach ($position_list as $key => $value) {
-    $key = addslashes($key);
-    if ($value > min(9, count($position_list) / 10)) {
-        $position_data.='["' . $key . '",' . $value . '],';
+    $thresh = min(9, count($position_list) / 10);
+    if ($value > $thresh) {
+        $position_data.='["' . addslashes($key) . '",' . $value . '],';
    } else {
        $other_position+=$value;
    }
@@ -207,16 +211,26 @@ $title_data.=']';
 $labs_data = "data: [";
 $n_labs = count($lab_counts);
 $n_shown_labs = 0 ;
+$tot_shown_labs = 0;
+$labs_total_responses = 0;
 foreach ($lab_counts as $key => $value) {
+        // $key is the orgid, but we need the name
+        $label = $org_id_to_label[$key];
+        $thresh = min(9, $n_labs / 15);

-        $key = addslashes($key);
-        if ($value > min(9, $n_labs / 15)) {
-            $labs_data.='["' . $key . '",' . $value . '],';
+        if (!$label || $label == "_NULL") {
+            $missing_labs += $value;
+        }
+        elseif ($value > $thresh) {
+            $labs_data.='["' . addslashes($label) . '",' . $value . '],';
            $n_shown_labs += 1;
+            $tot_shown_labs += $value;
        } else {
            $other_labs+=$value;
        }

+        # doesn't include missing, but we can compare to n_scholars to know
+        $labs_total_responses += $value;
 }
 if ($missing_labs>0){
    $labs_data.='["Missing data",' . $missing_labs . '],';
@@ -230,19 +244,31 @@ if ($other_labs>0){
 $labs_data.=']';


+// $share_of_shown_labs = sprintf("%.6f", $tot_shown_labs/$labs_total_responses);
+$share_of_shown_labs = sprintf("%.6f", $tot_shown_labs/count($scholars));
+
 $insts_data = "data: [";
 $n_insts = count($inst_counts);
 $n_shown_insts = 0 ;
+$tot_shown_insts = 0;
+$insts_total_responses = 0;
 foreach ($inst_counts as $key => $value) {
+        $label = $org_id_to_label[$key];
+        $thresh = min(9, $n_insts / 15);

-        $key = addslashes($key);
-        if ($value > min(9, $n_insts / 15)) {
-            $insts_data.='["' . $key . '",' . $value . '],';
+        if (!$label) {
+            $missing_insts += $value;
+        }
+        elseif ($value > $thresh) {
+            $insts_data.='["' . addslashes($label) . '",' . $value . '],';
            $n_shown_insts += 1;
+            $tot_shown_insts += $value;
        } else {
            $other_insts+=$value;
        }

+        $insts_total_responses+=$value;
+
 }
 if ($missing_insts>0){
    $insts_data.='["Missing data",' . $missing_insts . '],';
@@ -256,7 +282,10 @@ if ($other_labs>0){

 $insts_data.=']';

+// $share_of_shown_insts = sprintf("%.6f", $tot_shown_insts/$insts_total_responses);
+$share_of_shown_insts = sprintf("%.6f", $tot_shown_insts/count($scholars));

+// print_r("shown_insts_total % ".$share_of_shown_insts);

 // TODO separate this Highcharts js to factorize and expose as functions
 //      (or replace it by D3 and also separate)
@@ -372,7 +401,13 @@ $(document).ready(function() {
         '}]
 	});

-    if (parseInt('.$n_shown_labs.') >= parseInt('.$MIN_DISTINCT_LABS.')) {
+    var MIN_DISTINCT_LABS = parseInt('.$MIN_DISTINCT_LABS.')
+    var MIN_DISTINCT_LABS_SCHOLARS_SHARE = parseFloat('.$MIN_DISTINCT_LABS_SCHOLARS_SHARE.')
+
+    if (
+        parseInt('.$n_shown_labs.') >= MIN_DISTINCT_LABS
+        && parseFloat('.$share_of_shown_labs.') >= MIN_DISTINCT_LABS_SCHOLARS_SHARE
+        ) {

        labs= new Highcharts.Chart({
    		chart: {
@@ -412,7 +447,12 @@ $(document).ready(function() {
        document.getElementById("labs_div").style.display = "none"
    }

-    if (parseInt('.$n_shown_insts.') >= parseInt('.$MIN_DISTINCT_INSTS.')) {
+    var MIN_DISTINCT_INSTS = parseInt('.$MIN_DISTINCT_INSTS.')
+    var MIN_DISTINCT_INSTS_SCHOLARS_SHARE = parseFloat('.$MIN_DISTINCT_INSTS_SCHOLARS_SHARE.')
+
+    if ( parseInt('.$n_shown_insts.') >= MIN_DISTINCT_INSTS
+            && parseFloat('.$share_of_shown_insts.') >= MIN_DISTINCT_INSTS_SCHOLARS_SHARE
+            ) {

        insts= new Highcharts.Chart({
    		chart: {

--- a/print_directory.php
+++ b/print_directory.php
@@ -3,8 +3,6 @@ include ("php_library/comex_library.php");
 include ("php_library/parametres.php");
 include ("php_library/normalize.php");

-//include("../common/library/fonctions_php.php");
-
 $meta = '<!DOCTYPE html>
 <html lang="en">
    <head>
@@ -86,6 +84,7 @@ function objectToArray($d) {

 $data = objectToArray($data);

+// REST query params
 $categorya = $data["categorya"] ?? [];
 $categoryb = $data["categoryb"] ?? [];
 $countries = $data["countries"] ?? [];
@@ -195,9 +194,6 @@ if ($countries) {
    $f .= ")  ";
 }

-
-
-// £TODO_ORGS FILTER x 2
 if ($laboratories) {
    // debug
    // echo '<p style="color:white">MATCHING ON labs<p>';
@@ -211,7 +207,7 @@ if ($laboratories) {
        if ($lab == "") continue;
        if ($i > 0)
            $f .= " OR ";
-        $f .= 'team_lab LIKE "%' . $lab . '%" ';
+        $f .= 'labs_list LIKE "%' . $lab . '%" ';
        $query_details.=$lab.', ';
        $i++;
    }
@@ -230,11 +226,11 @@ if ($organizations) {
    foreach ($organizations as $org) {
        // echo '<p style="color:white">========> org =====> '. $org ."<p>";
        $org = sanitize_input(trim(strtolower($org)));
-
        if ($org == "") continue;
+        if ($i > 0)
+            $f .= " OR ";
+        $f .= 'insts_list LIKE "%' . $org . '%" ';
        $query_details.=$org.', ';
-        $f .= 'org LIKE "%' . $org . '%" ';
-                //'affiliation LIKE "%' . $org . '% OR affiliation2 LIKE "%' . $org . '%"';
        $i++;
    }
    $f .= ")  ";
@@ -242,6 +238,9 @@ if ($organizations) {

 $query_details.='</ul>';

+// debug SQL filters
+// print_r("query filters: ". $f);
+
 $base = new PDO($dsn, $user, $pass, $opt);
 $termsMatrix = array(); // liste des termes présents chez les scholars avec leurs cooc avec les autres termes
 $scholarsMatrix = array(); // liste des scholars avec leurs cooc avec les autres termes
@@ -338,7 +337,7 @@ SELECT * FROM (
 END_QUERY;

 // debug
-// echo '<p style="color:white">query:'. $sql ."<p>";
+// echo '<p style="color:grey;">query:<br>'. $sql ."<p>";

 // liste des chercheurs
 $scholars = array();
@@ -491,11 +490,8 @@ $header = '<div class="row" id="welcome">
 <br/>
 <br/>
 <p>
-This directory presents the profiles of <a href="#scholars">'.  count($scholars).' scholars</a> and <a href="#labs">'.  count($labs).' labs</a> in the field of Complex Systems';
-
+This directory presents the profiles of <a href="#scholars">'.  count($scholars).' scholars</a>, <a href="#labs">'.  count($labs).' labs</a> and <a href="#orga">'.$orga_count.' organizations</a> in the field of Complex Systems';

-// TODO restore old version before duplicate lab/orga
-// This directory presents the profiles of <a href="#scholars">'.  count($scholars).' scholars</a>, <a href="#labs">'.  count($labs).' labs</a> and <a href="#orga">'.$orga_count.' organizations</a> in the field of Complex Systems';


 if (strlen(trim($query_details))>3){

--- a/print_scholar_directory.php
+++ b/print_scholar_directory.php
@@ -70,6 +70,13 @@ $base = new PDO($dsn, $user, $pass, $opt);
 // liste des chercheurs
 $scholars = array();

+// these stats are useful BOTH in stat-prep and directory_content
+// => should be prepared right now (the label mapping contain all orgs ie both labs and institutions)
+$lab_counts = array();
+$inst_counts = array();
+$org_id_to_label = array();
+
+
 if ($userid) {

    // query idea:
@@ -118,27 +125,30 @@ if ($userid) {
            FROM (
                SELECT
                    scholars.*,
-                    -- GROUP_CONCAT(labs.orgid SEPARATOR ',') AS labs_ids,
+                    GROUP_CONCAT(labs.orgid SEPARATOR ',') AS labs_ids,
                    GROUP_CONCAT(labs.tostring SEPARATOR '%%%') AS labs_list
                FROM scholars
                LEFT JOIN sch_org AS map_labs
                        ON map_labs.uid = luid
-                JOIN orgs AS labs
+                    LEFT JOIN (
+                        SELECT * FROM orgs WHERE class='lab'
+                    ) AS labs
                         ON map_labs.orgid = labs.orgid
                    WHERE (record_status = 'active'
                            OR (record_status = 'legacy' AND valid_date >= NOW()))
-                AND labs.class = 'lab'
                    GROUP BY luid
                    ) AS scholars_and_labs
                LEFT JOIN sch_org AS map_insts
                    ON map_insts.uid = luid
-            JOIN orgs AS insts
+                LEFT JOIN (
+                    SELECT * FROM orgs WHERE class='inst'
+                ) AS insts
                    ON map_insts.orgid = insts.orgid

-            AND insts.class = 'inst'
                GROUP BY luid
    ) AS scholars_and_orgs

+    -- expansion (+kw info)
    LEFT JOIN sch_kw AS second_level
        ON second_level.uid = scholars_and_orgs.luid
    JOIN sch_kw ON sch_kw.kwid = second_level.kwid
@@ -172,18 +182,14 @@ HERE_QUERY;
        $info['country'] = $row['country'];
        $info['homepage'] = $row['home_url'];

-
-        // TODO recreate difference between lab and org --------->8--------
+        // recreated arrays
        $info['labs'] = explode('%%%', $row['labs_list'] ?? "") ;
        $info['institutions'] = explode('%%%', $row['insts_list'] ?? "") ;

-        // right now duplicate treatment short-circuited like this
-        // (effect visible in stat-prep_from_array)
-        $info['affiliation'] = $row['org'] . $row['team_lab'];
-        $info['affiliation_id'] = $row['affiliation_id'];
-        // ----------------------------------------------------->8---------
-        // $info['lab2'] = $row['lab2'];
-        // $info['affiliation2'] = $row['affiliation2'];
+        $info['labs_ids'] = explode(',', $row['labs_ids'] ?? "") ;
+        $info['insts_ids'] = explode(',', $row['insts_ids'] ?? "") ;
+
+
        $info['title'] = $row['hon_title'];
        $info['position'] = $row['position'];
        $info['pic_src'] = $row['pic_fname'] ? '/data/shared_user_img/'.$row['pic_fname'] : $row['pic_url']  ;
@@ -196,11 +202,53 @@ HERE_QUERY;
        // $info['fax'] = $row['fax'];
        // $info['affiliation_acronym'] = $row['affiliation_acronym'];
        $scholars[$row['luid']] = $info;
+
+        // we prepare the agregated lab stats in this loop too
+        foreach ( array(
+                    array('labs','labs_ids', &$lab_counts),
+                    array('institutions','insts_ids', &$inst_counts)
+                  ) as $cat) {
+
+            // var_dump($cat);
+
+            $namekey = $cat[0];
+            $idkey = $cat[1];
+            $counthash_ref = &$cat[2];
+
+            // £TODO_ORGS we'll need a missing_labs
+
+            $j = -1 ;
+            foreach ($info[$idkey] as $org_id) {
+
+                $j++;
+                $org_label = $info[$namekey][$j];
+                $org_label = trim($org_label);
+
+                if (strcmp($org_label, "") == 0) {
+                    $org_label = null;
+                } else {
+                    $org_label = weedout_alt_nulls($org_label);
+                }
+
+                // all non-values are there as null
+                $org_id_to_label[$org_id] = $org_label;
+
+
+                if (array_key_exists($org_id, $counthash_ref)) {
+                    $counthash_ref[$org_id]+=1;
+                } else {
+                    $counthash_ref[$org_id] = 1;
+                }
+            }
+        }
    }
 }

+// both our stats have been filled
+// var_dump($lab_counts) ;
+// var_dump($inst_counts) ;

-// creates js for stats visualisations
+// creates js for stats visualisations and counts (we re-use the orgs counts)
 include ("php_library/stat-prep_from_array.php");

 // debug
@@ -211,8 +259,6 @@ include ("php_library/directory_content.php");



-
-
 $content .= '</div>';
 $content .= '</div>
            <footer style="color:white">
@@ -261,12 +307,10 @@ $header = '<div class="row" id="welcome">
 <br/>
 <br/>
 <p>
-This directory presents the profiles of <a href="#scholars">'.  count($scholars).' scholars</a> and <a href="#labs">'.  count($labs).' labs</a> in the field of Complex Systems
+This directory presents the profiles of <a href="#scholars">'.  count($scholars).' scholars</a>, <a href="#labs">'.  count($labs).' labs</a> and <a href="#orga">'.$orga_count.' organizations</a> in the field of Complex Systems
 <br/>
 Scholars have been selected from the complex systems directory when sharing common keywords with '.$target_name.'

-<!-- TODO restore old version before duplicate lab/orga with $orga_count -->
-
 </p>
 <h4>About the complex systems directory</h4>
 <p>
@@ -287,7 +331,11 @@ Contributions and ideas are welcome to improve this directory.
 <div id="country" style="width: 800px; height: 300px; margin: 0 auto"></div>
 <div id="title" style="width: 800px; height: 300px; margin: 0 auto"></div>
 <div id="position" style="width: 800px; height: 300px; margin: 0 auto"></div>
-<div id="organizations" style="width: 800px; height: 300px; margin: 0 auto"></div>
+
+<!-- these two are displayed only if the distribution has
+     at least 3 big groups (cf. n_shown in stats-prep) -->
+<div id="labs_div" style="width: 800px; height: 300px; margin: 0 auto"></div>
+<div id="insts_div" style="width: 800px; height: 300px; margin: 0 auto"></div>


 <br/>
@@ -301,6 +349,8 @@ Contributions and ideas are welcome to improve this directory.

 echo $meta.' '.$stats.'</head>';
 if (count($scholars)==0){
+
+// TODO message in modal panel
 echo  '<h2>Sorry, '.$target_name.' did not mention any keywords ... we cannot process its network.</h2><br/>
    If you are '.$target_name.', you can  <a href="/services/user/profile"  target="_BLANK">modify your profile</a> and see your
        network in few minutes.';
@@ -308,5 +358,6 @@ echo  '<h2>Sorry, '.$target_name.' did not mention any keywords ... we cannot pr
 echo $header;
 echo $content;
 }
+exit(0);

 ?>
--- a/services/dbdatapi.py
+++ b/services/dbdatapi.py
@@ -14,6 +14,7 @@ from math      import floor, log, log1p
 from cgi       import escape
 from re        import sub, match
 from traceback import format_tb
+from json      import loads

 if __package__ == 'services':
    from services.tools import mlog, REALCONFIG
@@ -257,6 +258,24 @@ def find_scholar(some_key, some_str_value, cmx_db = None):
    return luid


+class Org:
+    " tiny helper class to serialize/deserialize orgs TODO use more OOP :) "
+
+    def __init__(self, org_array, org_class=None):
+        if len(org_array) < 3:
+            raise ValueError("Org is implemented for at least [name, acr, loc]")
+        self.name = org_array[0]
+        self.acro = org_array[1]
+        self.locname = org_array[2]
+        self.org_class = org_class
+
+        # DB specifications say that at least one of name||acr is NOT NULL
+        self.any = self.acro if self.acro else self.name
+        self.tostring = (  ( self.name if self.name else "")
+                        + ((' ('+self.acro+')') if self.acro else "")
+                        + ((', '+self.locname) if self.locname else "")
+                        )
+

 class BipartiteExtractor:
    """
@@ -405,7 +424,6 @@ class BipartiteExtractor:
                            (record_status = 'legacy' AND valid_date >= NOW())
                        )
                    """
-
                else:
                    # query is a set of filters like: key <=> array of values
                    # (expressed as rest parameters: "keyA[]=valA1&keyB[]=valB1&keyB[]=valB2")
@@ -426,41 +444,28 @@ class BipartiteExtractor:
                            continue
                        else:
                            known_filter = key
-                            sql_column = FIELDS_FRONTEND_TO_SQL[key]['col']
+                            sql_field = FIELDS_FRONTEND_TO_SQL[key]['grouped']

                            # "LIKE_relation" or "EQ_relation"
                            rel_type = FIELDS_FRONTEND_TO_SQL[key]['type']

-
-                        # pre-treatment: rewrite tables' names if they're inside the sub-query
-
-                        # exemple:
-                        # scholars.country   ~~~~~> scholars_n_hashtags.country
-                        # hashtags.htstr     ~~~~~> scholars_n_hashtags.htstr
-                        # (see cascaded join below for explanation)
-
-                        if match("scholars", sql_column):
-                            (sql_table, sql_field) = sql_column.split('.')
-                            sql_column = 'scholars_n_hashtags.'+sql_field
-
-                            mlog('DBG', "rewrote sql col", sql_column)
-                        elif match("hashtags.htstr", sql_column):
-                            sql_column = 'scholars_n_hashtags.hashtags_list'
-
-                            mlog('DBG', "rewrote sql col", sql_column)
-
                        # now create the constraints
                        val = filter_dict[known_filter]

                        if len(val):
-                            # clause type          clause is full
-                            #  IN (val1, val2)       False
-                            # "= val"                False
-                            # "col LIKE '%val%'"     True
-                            clause_is_full = False
-                            rhsclause = ""
-                            fullclause = ""
-                            if (isinstance(val, list) or isinstance(val, tuple)):
+                            # clause exemples
+                            # "col IN (val1, val2)"
+                            # "col = val"
+                            # "col LIKE '%escapedval%'"
+
+                            if (not isinstance(val, list)
+                              and not isinstance(val, tuple)):
+                                mlog("WARNING", "direct graph api query without tina")
+                                clause = sql_field + type_to_sql_filter(val)
+
+                            # normal case
+                            # tina sends an array of str filters
+                            else:
                                tested_array = [x for x in val if x]
                                mlog("DEBUG", "tested_array", tested_array)
                                if len(tested_array):
@@ -468,33 +473,20 @@ class BipartiteExtractor:
                                        qwliststr = repr(tested_array)
                                        qwliststr = sub(r'^\[', '(', qwliststr)
                                        qwliststr = sub(r'\]$', ')', qwliststr)
-                                        clause = 'IN '+qwliststr
+                                        clause = sql_field + ' IN '+qwliststr
+                                        # ex: country IN ('France', 'USA')
+
                                    elif rel_type == "LIKE_relation":
                                        like_clauses = []
                                        for singleval in tested_array:
                                            if type(singleval) == str and len(singleval):
                                                like_clauses.append(
-                                                   sql_column+" LIKE '%"+singleval+"%'"
+                                                   sql_field+" LIKE '%"+quotestr(singleval)+"%'"
                                                )
                                        clause = " OR ".join(like_clauses)
-                                        # clause already includes col name
-                                        clause_is_full = True
-
-                            elif isinstance(val, int):
-                                clause = '= %i' % val
-                            elif isinstance(val, float):
-                                clause = '= %f' % val
-                            # elif isinstance(val, str):
-                            #     clause = '= "%s"' % val
-                            elif isinstance(val, str):
-                                clause = 'LIKE "%'+val+'%"'
-                                clause_is_full = True

                            if len(clause):
-                                if clause_is_full:
                                sql_constraints.append("(%s)" % clause)
-                                else:
-                                    sql_constraints.append("(%s %s)" % (sql_column, clause))

                    # debug
                    mlog("INFO", "SELECTing active users with sql_constraints", sql_constraints)
@@ -502,44 +494,48 @@ class BipartiteExtractor:
                    # use constraints as WHERE-clause

                    # NB we must cascade join because
-                    #    both hashtags and keywords are one-to-many
-                    #   => it renames scholars and hashtag tables
-                    #      into 'scholars_n_hashtags'
+                    #    orgs, hashtags and keywords are one-to-many
+                    #   => it renames tables into 'full_scholar'
                    sql_query = """
+                    SELECT * FROM (
                        SELECT
-                            scholars_n_hashtags.luid,
-                            scholars_n_hashtags.affiliation_id,
+                            sch_org_n_tags.*,

                            -- kws info
                            GROUP_CONCAT(keywords.kwstr) AS keywords_list

                        FROM (
                            SELECT
-                                scholars.*,
+                                scholars_and_orgs.*,
                                -- hts info
                                GROUP_CONCAT(hashtags.htstr) AS hashtags_list

+                            FROM (
+                              SELECT scholars.*,
+                                     -- org info
+                                     -- GROUP_CONCAT(orgs.orgid) AS orgs_ids_list,
+                                     GROUP_CONCAT(orgs_set.tostring) AS orgs_list
                              FROM scholars
+                              LEFT JOIN sch_org ON luid = sch_org.uid
+                              LEFT JOIN (
+                                SELECT * FROM orgs
+                              ) AS orgs_set ON sch_org.orgid = orgs_set.orgid
+                              GROUP BY luid
+                            ) AS scholars_and_orgs
                            LEFT JOIN sch_ht
                                ON uid = luid
                            JOIN hashtags
                                ON sch_ht.htid = hashtags.htid
                            GROUP BY luid
-                        ) AS scholars_n_hashtags
+                        ) AS sch_org_n_tags

                        -- two step JOIN for keywords
                        LEFT JOIN sch_kw
                            ON uid = luid
                        JOIN keywords
                            ON sch_kw.kwid = keywords.kwid
-                        -- we still must keep affiliations in case it's used in the WHERE-clause...
-                        LEFT JOIN affiliations
-                            ON affiliation_id = affid

-                        -- our filtering constraints fit here
-                        WHERE  %s
-
-                        AND (
+                        WHERE (
                            record_status = 'active'
                            OR
                            (record_status = 'legacy' AND valid_date >= NOW())
@@ -547,7 +543,13 @@ class BipartiteExtractor:

                        GROUP BY luid

-                    """ % (" AND ".join(sql_constraints))
+                    ) AS full_scholar
+                    -- our filtering constraints fit here
+                    WHERE  %s
+
+                    """ % " AND ".join(sql_constraints)
+
+                mlog("DEBUGSQL", "getScholarsList SELECT:  ", sql_query)

                # in both cases "*" or constraints
                self.cursor.execute(sql_query)
@@ -573,32 +575,55 @@ class BipartiteExtractor:
        Adding each connected scholar per unique_id

        (getting details for selected scholars into graph object)
-        # TODO do it along with previous step getScholarsList
+        # POSS if filters, could do it along with previous step getScholarsList
        # (less modular but a lot faster)
+
+        NB here scholar_array is actually a dict :/ ...
        """
        # debug
        # mlog("DEBUG", "MySQL extract scholar_array:", scholar_array)
+        # scholar_array = list(scholar_array.keys())[0:3]

+        # TODO loop could be after SELECT
        for scholar_id in scholar_array:
            sql3='''
                SELECT
-                    scholars_and_affiliations.*,
+                    scholars_and_orgs.*,
                    COUNT(keywords.kwid) AS keywords_nb,
                    GROUP_CONCAT(keywords.kwid) AS keywords_ids,
                    GROUP_CONCAT(kwstr) AS keywords_list
+                FROM (
+                    SELECT
+                        scholars_and_insts.*,
+                        -- small serializations here to avoid 2nd query
+                        GROUP_CONCAT(
+                          JSON_ARRAY(labs.name, labs.acro, labs.locname)
+                        ) AS labs_list
                    FROM (
                        SELECT
                            scholars.*,
-                        affiliations.*
-                    FROM scholars
-                    LEFT JOIN affiliations
-                        ON scholars.affiliation_id = affiliations.affid
+                            GROUP_CONCAT(
+                              JSON_ARRAY(insts.name, insts.acro, insts.locname)
+                            ) AS insts_list
+                        FROM
+                            scholars
+                            LEFT JOIN sch_org ON luid = sch_org.uid
+                            LEFT JOIN (
+                                SELECT * FROM orgs WHERE class = 'inst'
+                            ) AS insts ON sch_org.orgid = insts.orgid
                        WHERE (record_status = 'active'
                            OR (record_status = 'legacy' AND valid_date >= NOW()))
-                ) AS scholars_and_affiliations
+                        GROUP BY luid
+                    ) AS scholars_and_insts
+                    LEFT JOIN sch_org ON luid = sch_org.uid
+                    LEFT JOIN (
+                        SELECT * FROM orgs WHERE class = 'lab'
+                    ) AS labs ON sch_org.orgid = labs.orgid
+                    GROUP BY luid
+                ) AS scholars_and_orgs

                LEFT JOIN sch_kw
-                    ON sch_kw.uid = scholars_and_affiliations.luid
+                    ON sch_kw.uid = scholars_and_orgs.luid
                LEFT JOIN keywords
                    ON sch_kw.kwid = keywords.kwid
                WHERE luid = %s
@@ -623,6 +648,23 @@ class BipartiteExtractor:
                else:
                    pic_src = ''

+                # NB instead of secondary query for orgs.*, we can
+                # simply parse orgs infos
+                # and take labs[0] and insts[0]
+                labs  = list(map(
+                        lambda arr: Org(arr, org_class='lab'),
+                        loads('['+res3['labs_list'] +']')
+                ))
+                insts = list(map(
+                        lambda arr: Org(arr, org_class='insts'),
+                        loads('['+res3['insts_list']+']')
+                ))
+                mlog("DEBUGSQL", "main lab:", labs[0])
+                mlog("DEBUGSQL", "main inst:", insts[0])
+                # each lab is an array [name, acronym, location]
+
+
+                # all detailed node data
                ide="D::"+res3['initials']+("/%05i"%int(res3['luid']));
                info['id'] = ide;
                info['luid'] = res3['luid'];
@@ -635,13 +677,16 @@ class BipartiteExtractor:
                info['keywords_ids'] = res3['keywords_ids'].split(',') if res3['keywords_ids'] else [];
                info['keywords_list'] = res3['keywords_list'];
                info['country'] = res3['country'];
-                # info['ACR'] = res3['org_acronym']       # TODO create
+                info['ACR'] = labs[0].acro if labs[0].acro else labs[0].any
                #info['CC'] = res3['norm_country'];
                info['home_url'] = res3['home_url'];
-                info['team_lab'] = res3['team_lab'];
-                info['org'] = res3['org'];
-                # info['lab2'] = res3['lab2'];                 # TODO restore
-                # info['affiliation2'] = res3['affiliation2'];
+                info['team_lab'] = labs[0].tostring;
+                info['org'] = insts[0].tostring;
+
+                if len(labs) > 1:
+                    info['lab2'] = labs[1].tostring
+                if len(insts) > 1:
+                    info['affiliation2'] = insts[1].tostring
                info['hon_title'] = res3['hon_title'] if res3['hon_title'] else ""
                info['position'] = res3['position'];
                info['job_looking'] = res3['job_looking'];
@@ -975,14 +1020,13 @@ class BipartiteExtractor:
                    content += '<b>Position: </b>' +self.scholars[idNode]['position'].replace("&"," and ")+ '</br>'

                affiliation=""
-                if self.scholars[idNode]['team_lab'] and self.scholars[idNode]['team_lab'] != "":
+                if self.scholars[idNode]['team_lab'] and self.scholars[idNode]['team_lab'] not in ["", "_NULL"]:
                    affiliation += self.scholars[idNode]['team_lab']+ ','
                if self.scholars[idNode]['org'] and self.scholars[idNode]['org'] != "":
                    affiliation += self.scholars[idNode]['org']

-                # TODO restore if not redundant with org
-                # if self.scholars[idNode]['affiliation'] != "" or self.scholars[idNode]['lab'] != "":
-                #     content += '<b>Affiliation: </b>' + affiliation.replace("&"," and ") + '</br>'
+                if affiliation != "":
+                    content += '<b>Affiliation: </b>' + escape(affiliation) + '</br>'

                if len(self.scholars[idNode]['keywords_list']) > 3:
                    content += '<b>Keywords: </b>' + self.scholars[idNode]['keywords_list'].replace(",",", ")+'.</br>'
@@ -1009,8 +1053,6 @@ class BipartiteExtractor:
                else: node["CC"]="-"

                # Affiliation
-                # TODO restore with org_acronym
-                # node["ACR"] = self.scholars[idNode]["ACR"]
                node["ACR"] = self.scholars[idNode]["org"]
                if node["ACR"]=="": node["ACR"]="-"

@@ -1089,3 +1131,23 @@ class BipartiteExtractor:
        # mlog("DEBUG", "nodes2",edgesB)
        # mlog("DEBUG", "bipartite",edgesAB)
        return graph
+
+
+
+def quotestr(a_str):
+    "helper function if we need to quote values ourselves"
+    return sub(r"(?<!\\)[']",r"\\'",a_str)
+
+
+def type_to_sql_filter(val):
+    "helper functions if we need to build test filters ourselves"
+
+    if isinstance(val, int):
+        rhs = '= %i' % val
+    elif isinstance(val, float):
+        rhs = '= %f' % val
+    # elif isinstance(val, str):
+    #     rhs = '= "%s"' % val
+    elif isinstance(val, str):
+        rhs = 'LIKE "%'+quotestr(val)+'%"'
+    return rhs
--- a/setup/toolbox/org.tostring.sql
+++ b/setup/toolbox/org.tostring.sql
+
+-- if serialization must be parsable, separators need to be absent tokens
+SELECT
+    -- our convention (eg in dbdatapi.extract)
+    CONCAT(name, '((', acro, '))', ";;", locname)
+FROM orgs
+ORDER BY RAND()
+LIMIT 10;
+
+
+
+-- if serialization is just for display : for human-readable labels
+-- with CONCAT_WS => nice because removes null segments eg '('+NULL+')'
+SELECT
+    name,
+    acro,
+    locname,
+    CONCAT_WS( '',
+               CONCAT(name, ' '),
+               CONCAT('(',acro,')'),
+               CONCAT(', ', locname) )
+FROM orgs
+ORDER BY RAND()
+LIMIT 10;
+
+
+
+-- with CASE
+SELECT
+    name,
+    acro,
+    locname,
+
+    -- 3 vars NULL or not => 8 cases
+    -- but by def either acro or name is not null => 7 cases
+    CASE
+        WHEN name IS NULL AND acro IS NULL AND locname IS NULL
+        THEN "_NULL"
+
+        WHEN name IS NULL AND locname IS NULL
+        THEN acro
+
+        WHEN acro IS NULL AND locname IS NULL
+        THEN name
+
+        WHEN locname IS NULL
+        THEN CONCAT (acro, ' (' ,name,')')
+
+        -- locname cases
+        WHEN name IS NULL
+        THEN CONCAT (acro, ', ', locname)
+
+        WHEN acro IS NULL
+        THEN CONCAT (name, ', ', locname)
+
+        -- eg "I3S (Laboratoire d'Informatique, Signaux et Systèmes), Sophia Antipolis, France"
+        ELSE CONCAT (acro, ' (' ,name,'), ', locname)
+    END AS tostring
+FROM orgs
+ORDER BY RAND()
+LIMIT 10;
+
+
+-- EXEMPLES:
+-- +-----------------------------------------------------+-------------+--------------------------+----------------------------------------------------------------------------------+
+-- | name                                                | acro        | locname                  | tostring                                                                         |
+-- +-----------------------------------------------------+-------------+--------------------------+----------------------------------------------------------------------------------+
+-- | Dynamiques et écologie des paysages agroforestiers  | DYNAFOR     | NULL                     | DYNAFOR (Dynamiques et écologie des paysages agroforestiers)                     |
+-- | University of Waterloo                              | NULL        | Waterloo, Canada         | University of Waterloo, Waterloo, Canada                                         |
+-- | University of Arizona                               | NULL        | Tucson, Arizona, USA     | University of Arizona, Tucson, Arizona, USA                                      |
+-- | Laboratoire d'Informatique, Signaux et Systèmes     | I3S         | Sophia Antipolis, France | I3S (Laboratoire d'Informatique, Signaux et Systèmes), Sophia Antipolis, France  |
+-- | Visvesvaraya National Institute of Technology       | NULL        | NULL                     | Visvesvaraya National Institute of Technology                                    |
+-- | Sciences Po                                         | NULL        | Paris, France            | Sciences Po, Paris, France                                                       |
+-- | School of Human Evolution and Social Change         | SHESC       | NULL                     | SHESC (School of Human Evolution and Social Change)                              |
+-- | NULL                                                | DSSCQ       | NULL                     | DSSCQ                                                                            |
+-- +-----------------------------------------------------+-------------+--------------------------+----------------------------------------------------------------------------------+
--- a/templates/base_layout.html
+++ b/templates/base_layout.html
@@ -123,6 +123,11 @@
                               onclick='$(this).parents(".dropdown-menu").toggle();'>
                               Filter by laboratory</a>
                        </li>
+                        <li>
+                            <a id="addfilterorganization" href="#"
+                               onclick='$(this).parents(".dropdown-menu").toggle();'>
+                               Filter by organization</a>
+                        </li>
                    </ul>
                </li>
                <li class="comex-nav-item">

--- a/templates/rootindex.html
+++ b/templates/rootindex.html
@@ -160,6 +160,5 @@
        //               ---------
        var uinfo = {{ (current_user.json_info | safe) if current_user.info else ("null" | safe) }};
    </script>
-    <script src="{{ url_for('static', filename='js/comex_page_rootindex.js') }}"></script>

 {% endblock %}