Commit 9b74f9fd authored by Romain Loth's avatar Romain Loth

org autocomplete 2/2 + fix string parsing + harmonize varnames: orgs is all,...

org autocomplete 2/2 + fix string parsing + harmonize varnames: orgs is all, inst is big org, lab is small org
parent 122ac64e
...@@ -4,5 +4,6 @@ ...@@ -4,5 +4,6 @@
logs/ logs/
logs/services.log logs/services.log
data/shared_user_img/* data/shared_user_img/*
services/.idea/
.agignore .agignore
__pycache__/ __pycache__/
### List of features that where deactivated
During rewrite we deactivated some retrieval to match the new sql tables from registration:
- tables organizations, labs are not separated and their detailed addresses don't exist => TODO RESTORE in `directory_content.php` (and therefore incomplete info are passed to `labs_list.php` and `orga_list.php`)
- in `print_directory.php`, all the following columns are now ignored: 'css_voter', 'css_member', 'status', 'lab2', 'affiliation2', 'address', 'city', 'postal_code', 'phone', 'mobile', 'fax', 'affiliation_acronym' => TODO RESTORE
- in `print_scholar_directory.php`
- similar changes as above
...@@ -15,13 +15,13 @@ SELECT * FROM ( ...@@ -15,13 +15,13 @@ SELECT * FROM (
SELECT SELECT
scholars_and_labs.*, scholars_and_labs.*,
-- GROUP_CONCAT(insts.orgid SEPARATOR ',') AS insts_ids, -- GROUP_CONCAT(insts.orgid SEPARATOR ',') AS insts_ids,
GROUP_CONCAT(insts.tostring SEPARATOR '%%%') AS insts_list GROUP_CONCAT(insts.label SEPARATOR '%%%') AS insts_list
FROM ( FROM (
SELECT SELECT
scholars.*, scholars.*,
-- GROUP_CONCAT(labs.orgid SEPARATOR ',') AS labs_ids, -- GROUP_CONCAT(labs.orgid SEPARATOR ',') AS labs_ids,
GROUP_CONCAT(labs.tostring SEPARATOR '%%%') AS labs_list GROUP_CONCAT(labs.label SEPARATOR '%%%') AS labs_list
FROM scholars FROM scholars
LEFT JOIN sch_org AS map_labs LEFT JOIN sch_org AS map_labs
ON map_labs.uid = luid ON map_labs.uid = luid
......
...@@ -3,13 +3,13 @@ ...@@ -3,13 +3,13 @@
-- (for suggestions and/or than mapping) -- (for suggestions and/or than mapping)
SELECT orgs.*, SELECT orgs.*,
GROUP_CONCAT( tgt_tostring ORDER BY tgt_freq DESC SEPARATOR '%%%') GROUP_CONCAT( tgt_label ORDER BY tgt_freq DESC SEPARATOR '%%%')
AS related_insts AS related_insts
FROM orgs FROM orgs
LEFT JOIN ( LEFT JOIN (
SELECT sch_org.orgid AS src_orgid, SELECT sch_org.orgid AS src_orgid,
sch_org2.orgid AS tgt_orgid, sch_org2.orgid AS tgt_orgid,
orgs2.tostring AS tgt_tostring, orgs2.label AS tgt_label,
count(*) AS tgt_freq count(*) AS tgt_freq
FROM sch_org FROM sch_org
LEFT JOIN sch_org AS sch_org2 LEFT JOIN sch_org AS sch_org2
......
<?php <?php
/* /*
* To change this template, choose Tools | Templates * Common utility functions
* and open the template in the editor.
*/ */
function clean_exp($string){ function clean_exp($string){
......
...@@ -31,143 +31,141 @@ $loop = 0; ...@@ -31,143 +31,141 @@ $loop = 0;
// NB this array was prepared in print_directory or print_scholar_directory // NB this array was prepared in print_directory or print_scholar_directory
// ----------------------------------------------- uncomment foreach ($scholars as $scholar) {
// foreach ($scholars as $scholar) {
// $scholar['position'] = weedout_alt_nulls($scholar['position']) ;
// $scholar['position'] = weedout_alt_nulls($scholar['position']) ;
// // debug
// // debug // var_dump($scholar);
// // var_dump($scholar);
// if ($loop % 100){
// if ($loop % 100){ set_time_limit(20);
// set_time_limit(20); }
// } $loop+=1;
// $loop+=1; $content.= '<div class="row">
// $content.= '<div class="row"> <div class="span12">
// <div class="span12"> <div class="row">
// <div class="row"> <div class="span9" align="justify">';
// <div class="span9" align="justify">'; $content .= '<div>';
// $content .= '<div>';
// // remote pictures url 'http://some.org/path/blabla.png'
// // remote pictures url 'http://some.org/path/blabla.png' // or local '/data/shared_user_img/blabla.png'
// // or local '/data/shared_user_img/blabla.png' if ($scholar['pic_src'] != null) {
// if ($scholar['pic_src'] != null) { $pic_src = $scholar['pic_src'] ;
// $pic_src = $scholar['pic_src'] ; if ($_SERVER['REQUEST_SCHEME'] == 'https') {
// if ($_SERVER['REQUEST_SCHEME'] == 'https') { $pic_src = preg_replace('/^http:/i', 'https:', $pic_src) ;
// $pic_src = preg_replace('/^http:/i', 'https:', $pic_src) ; }
// } $content .= '<img style="margin: 7px 10px 10px 0px" src="'. $pic_src . '" width="' . $imsize . 'px" align="left">';
// $content .= '<img style="margin: 7px 10px 10px 0px" src="'. $pic_src . '" width="' . $imsize . 'px" align="left">'; }
// } else {
// else { if (count($scholars) < 2000) {
// if (count($scholars) < 2000) { $im_id = floor(rand(0, 11));
// $im_id = floor(rand(0, 11)); $content .= '<img style="margin: 7px 10px 10px 0px" src="static/img/' . $im_id . '.png" width="' . $imsize . 'px" align="left">';
// $content .= '<img style="margin: 7px 10px 10px 0px" src="static/img/' . $im_id . '.png" width="' . $imsize . 'px" align="left">'; }
// } }
// }
// $content .= '<h2 >' . $scholar['title'] . ' ' . $scholar['first_name'] . ' ' . $scholar['mid_initial'] . ' ' . $scholar['last_name'] .
// $content .= '<h2 >' . $scholar['title'] . ' ' . $scholar['first_name'] . ' ' . $scholar['mid_initial'] . ' ' . $scholar['last_name'] . ' <small> - ' . $scholar['country'] . '</small></h2>';
// ' <small> - ' . $scholar['country'] . '</small></h2>';
//
// if (($scholar['position'] != null)||count($scholar['labs'])||count($scholar['institutions'])) {
// if (($scholar['position'] != null)||count($scholar['labs'])||count($scholar['institutions'])) { $content .= '<dl>';
// $content .= '<dl>'; }
// }
// if ($scholar['position'] != null) {
// if ($scholar['position'] != null) { $content .= '<dt>' . $scholar['position'] . '</dt>';
// $content .= '<dt>' . $scholar['position'] . '</dt>'; }
// } $lab = '';
// $lab = '';
// // new way: list of org.label values
// // new way: list of org.tostring values if (count($scholar['labs'])) {
// if (count($scholar['labs'])) { $labs_html = implode(
// $labs_html = implode( '<br>',
// '<br>', array_map(
// array_map( "clean_exp",
// "clean_exp", array_map("esc_html",
// array_map("esc_html", array_map(
// array_map( "weedout_alt_nulls",
// "weedout_alt_nulls", $scholar['labs']
// $scholar['labs'] )
// ) )
// ) )
// ) );
// ); $content .= '<dd class="labs-of-scholar">' ;
// $content .= '<dd class="labs-of-scholar">' ; $content .= $labs_html ;
// $content .= $labs_html ; $content .= '</dd> ';
// $content .= '</dd> ';
// # we don't need to recount the organisations globally,
// # we don't need to recount the organisations globally, # because we already have $lab_counts (per id)
// # because we already have $lab_counts (per id) }
// }
// // new way: list of org.label values
// // new way: list of org.tostring values if (count($scholar['institutions'])) {
// if (count($scholar['institutions'])) { $institutions_html = implode(
// $institutions_html = implode( '<br>',
// '<br>', array_map(
// array_map( "clean_exp",
// "clean_exp", array_map("esc_html",
// array_map("esc_html", $scholar['institutions']
// $scholar['institutions'] )
// ) )
// ) );
// );
// $content .= '<dd class="institutions-of-scholar">' ;
// $content .= '<dd class="institutions-of-scholar">' ; $content .= $institutions_html ;
// $content .= $institutions_html ; $content .= '</dd> ';
// $content .= '</dd> ';
// # here also we already have $insts_counts (per id)
// # here also we already have $insts_counts (per id) }
// }
//
// // POSS: url of lab as link, if filled in DB
// // POSS: url of lab as link, if filled in DB
// if (($scholar['position'] != null)
// if (($scholar['position'] != null) ||count($scholar['labs'])
// ||count($scholar['labs']) ||count($scholar['institutions'])
// ||count($scholar['institutions']) ) {
// ) { $content .= '</dl>';
// $content .= '</dl>'; }
// }
//
// $content .= '</div>';
// $content .= '</div>';
//
// if ($scholar['interests'] != null) {
// if ($scholar['interests'] != null) {
// $htmlsafe_interests = str_replace('%%%', '<br/>',
// $htmlsafe_interests = str_replace('%%%', '<br/>', htmlspecialchars($scholar['interests'],
// htmlspecialchars($scholar['interests'], ENT_HTML5, 'UTF-8')
// ENT_HTML5, 'UTF-8') );
// ); $content .= '<div>';
// $content .= '<div>'; $content .= '<h4>Research</h4>';
// $content .= '<h4>Research</h4>'; $content .= '<p>' . $htmlsafe_interests . '</p>';
// $content .= '<p>' . $htmlsafe_interests . '</p>'; $content .= '</div>';
// $content .= '</div>'; }
// }
// $content .= '</div>';
// $content .= '</div>';
//
// if ($scholar['keywords'] != null) {
// if ($scholar['keywords'] != null) { $content .= '<div class="span3" align="left">';
// $content .= '<div class="span3" align="left">';
// if ($scholar['keywords'] != null){
// if ($scholar['keywords'] != null){ $content .= '<i class="icon-tags"></i> ' . clean_exp($scholar['keywords']). '.<br/><br/>';
// $content .= '<i class="icon-tags"></i> ' . clean_exp($scholar['keywords']). '.<br/><br/>'; }
// } $content .= '</div>';
// $content .= '</div>'; }
// } $content .= '</div>';
// $content .= '</div>';
// $content .= '</div>';
// $content .= '</div>'; $content .= '</div>';
// $content .= '</div>';
// $content .= '
// $content .= ' <center><img src="static/img/bar.png"></center>';
// <center><img src="static/img/bar.png"></center>'; $content .= '<br/>';
// $content .= '<br/>'; $content .= '<br/>';
// $content .= '<br/>'; // fin du profil
// // fin du profil }
// }
// ----------------------------------------------- uncomment
// if (strcmp(substr($lab_query, 0,2),'OR')==0){ // if (strcmp(substr($lab_query, 0,2),'OR')==0){
// $lab_query=substr($lab_query,2); // $lab_query=substr($lab_query,2);
...@@ -221,13 +219,13 @@ for($i = 0; $i < $n_steps; $i++) { ...@@ -221,13 +219,13 @@ for($i = 0; $i < $n_steps; $i++) {
// //
$sql = <<< LABSQLEXTENDED $sql = <<< LABSQLEXTENDED
SELECT orgs.*, SELECT orgs.*,
GROUP_CONCAT( tgt_tostring ORDER BY tgt_freq DESC SEPARATOR '%%%') GROUP_CONCAT( tgt_label ORDER BY tgt_freq DESC SEPARATOR '%%%')
AS related_insts AS related_insts
FROM orgs FROM orgs
LEFT JOIN ( LEFT JOIN (
SELECT sch_org.orgid AS src_orgid, SELECT sch_org.orgid AS src_orgid,
sch_org2.orgid AS tgt_orgid, sch_org2.orgid AS tgt_orgid,
orgs2.tostring AS tgt_tostring, orgs2.label AS tgt_label,
count(*) AS tgt_freq count(*) AS tgt_freq
FROM sch_org FROM sch_org
LEFT JOIN sch_org AS sch_org2 LEFT JOIN sch_org AS sch_org2
......
...@@ -292,13 +292,13 @@ SELECT * FROM ( ...@@ -292,13 +292,13 @@ SELECT * FROM (
SELECT SELECT
scholars_and_labs.*, scholars_and_labs.*,
GROUP_CONCAT(insts.orgid SEPARATOR ',') AS insts_ids, GROUP_CONCAT(insts.orgid SEPARATOR ',') AS insts_ids,
GROUP_CONCAT(insts.tostring SEPARATOR '%%%') AS insts_list GROUP_CONCAT(insts.label SEPARATOR '%%%') AS insts_list
FROM ( FROM (
SELECT SELECT
scholars.*, scholars.*,
GROUP_CONCAT(labs.orgid SEPARATOR ',') AS labs_ids, GROUP_CONCAT(labs.orgid SEPARATOR ',') AS labs_ids,
GROUP_CONCAT(labs.tostring SEPARATOR '%%%') AS labs_list GROUP_CONCAT(labs.label SEPARATOR '%%%') AS labs_list
FROM scholars FROM scholars
LEFT JOIN sch_org AS map_labs LEFT JOIN sch_org AS map_labs
ON map_labs.uid = luid ON map_labs.uid = luid
......
...@@ -120,13 +120,13 @@ if ($userid) { ...@@ -120,13 +120,13 @@ if ($userid) {
SELECT SELECT
scholars_and_labs.*, scholars_and_labs.*,
-- GROUP_CONCAT(insts.orgid SEPARATOR ',') AS insts_ids, -- GROUP_CONCAT(insts.orgid SEPARATOR ',') AS insts_ids,
GROUP_CONCAT(insts.tostring SEPARATOR '%%%') AS insts_list GROUP_CONCAT(insts.label SEPARATOR '%%%') AS insts_list
FROM ( FROM (
SELECT SELECT
scholars.*, scholars.*,
GROUP_CONCAT(labs.orgid SEPARATOR ',') AS labs_ids, GROUP_CONCAT(labs.orgid SEPARATOR ',') AS labs_ids,
GROUP_CONCAT(labs.tostring SEPARATOR '%%%') AS labs_list GROUP_CONCAT(labs.label SEPARATOR '%%%') AS labs_list
FROM scholars FROM scholars
LEFT JOIN sch_org AS map_labs LEFT JOIN sch_org AS map_labs
ON map_labs.uid = luid ON map_labs.uid = luid
......
...@@ -42,20 +42,22 @@ USER_COLS = [ ...@@ -42,20 +42,22 @@ USER_COLS = [
("record_status", False, 25) ("record_status", False, 25)
] ]
# NAME, NOT NULL, MAXCHARS KEY elt
ORG_COLS = [ ORG_COLS = [
("class", False, 25), # "lab" or "inst" ("class", False, 25, True), # "lab" or "inst"
("name", False, 120), ("name", False, 120, True),
("acro", False, 30), # acronym or short name ("acro", False, 30, True), # acronym or short name
("locname", False, 120), ("locname", False, 120, False),
("inst_type", False, 50), ("inst_type", False, 50, None), # key elt only for inst
("lab_code", False, 25), # not in GUI yet ("lab_code", False, 25, False), # not in GUI yet
("url", False, 180), # not in GUI yet ("url", False, 180, False), # not in GUI yet
("contact_name", False, 80), # not in GUI yet ("contact_name", False, 80, False), # not in GUI yet
("contact_email", False, 255) # not in GUI yet ("contact_email", False, 255, False) # not in GUI yet
# also in concatenations: # also in concatenations:
# label = name + acro # label = name + acro
# tostring = name + acro + locname # toarray = json [name, acro, locname]
] ]
...@@ -148,7 +150,7 @@ def rm_scholar(luid, cmx_db = None): ...@@ -148,7 +150,7 @@ def rm_scholar(luid, cmx_db = None):
def get_full_scholar(uid, cmx_db = None): def get_full_scholar(uid, cmx_db = None):
""" """
uid : str uid : int or int str
local user id aka luid local user id aka luid
Autonomous function to be used by User class Autonomous function to be used by User class
...@@ -165,9 +167,9 @@ def get_full_scholar(uid, cmx_db = None): ...@@ -165,9 +167,9 @@ def get_full_scholar(uid, cmx_db = None):
db = connect_db() db = connect_db()
db_c = db.cursor(DictCursor) db_c = db.cursor(DictCursor)
#
print('DBG', 'uid', uid) # print('DBG', 'uid', uid)
print('DBG', 'type(uid)', type(uid)) # print('DBG', 'type(uid)', type(uid))
# one user + all linked infos concatenated in one row # one user + all linked infos concatenated in one row
# <= 3 LEFT JOINS sequentially GROUPed # <= 3 LEFT JOINS sequentially GROUPed
...@@ -326,7 +328,7 @@ def get_full_scholar(uid, cmx_db = None): ...@@ -326,7 +328,7 @@ def get_full_scholar(uid, cmx_db = None):
else: else:
org_info = """SELECT name, acro, locname, org_info = """SELECT name, acro, locname,
inst_type, lab_code, inst_type, lab_code,
tostring label
FROM orgs WHERE orgid IN (%s)""" % ','.join(id_list) FROM orgs WHERE orgid IN (%s)""" % ','.join(id_list)
mlog('DEBUGSQL', "org_info stmt :", org_info) mlog('DEBUGSQL', "org_info stmt :", org_info)
...@@ -336,6 +338,9 @@ def get_full_scholar(uid, cmx_db = None): ...@@ -336,6 +338,9 @@ def get_full_scholar(uid, cmx_db = None):
urow_dict[orgclass] = new_cursor.fetchall() urow_dict[orgclass] = new_cursor.fetchall()
print("get_full_scholar orgs::", urow_dict[orgclass])
# print('===urow_dict with orgs[]===') # print('===urow_dict with orgs[]===')
# print(urow_dict) # print(urow_dict)
# print('==/urow_dict with orgs[]===') # print('==/urow_dict with orgs[]===')
...@@ -593,6 +598,22 @@ def get_or_create_tokitems(tok_list, cmx_db, tok_table='keywords'): ...@@ -593,6 +598,22 @@ def get_or_create_tokitems(tok_list, cmx_db, tok_table='keywords'):
return found_ids return found_ids
def rm_sch_org_links(luid, cmx_db = None):
if cmx_db:
db = cmx_db
else:
db = connect_db()
db_c = db.cursor(DictCursor)
luid = int(luid)
db_c.execute(
'DELETE FROM sch_org WHERE uid = %i' % luid
)
if not cmx_db:
db.close()
def record_sch_org_link(luid, orgid, cmx_db = None): def record_sch_org_link(luid, orgid, cmx_db = None):
if cmx_db: if cmx_db:
db = cmx_db db = cmx_db
...@@ -618,18 +639,17 @@ def record_org_org_link(orgid_src, orgid_tgt, cmx_db = None): ...@@ -618,18 +639,17 @@ def record_org_org_link(orgid_src, orgid_tgt, cmx_db = None):
""" """
pass pass
def get_or_create_org(org_info, cmx_db = None): def get_or_create_org(org_info, oclass, cmx_db = None):
""" """
(scholar's parent org(s)) ---> lookup/add to *orgs* table -> orgid (scholar's parent org(s)) ---> lookup/add to *orgs* table -> orgid
1) query to *orgs* table 1) query to *orgs* table
<= unicity constraint is oclass + name + acro + org_type (<=> is_key)
=> £TODO if institution almost matches API to send suggestion
- then TODO also allow completing existing entry
2) return id 2) return id
=> TODO if institution almost matches API to send suggestion
=> unicity constraint on institution + lab + org_type
=> if an institution matches return orgid => if an institution matches return orgid
=> if no institution matches create new and return orgid => if no institution matches create new and return orgid
! WIP !
""" """
if cmx_db: if cmx_db:
db = cmx_db db = cmx_db
...@@ -642,28 +662,44 @@ def get_or_create_org(org_info, cmx_db = None): ...@@ -642,28 +662,44 @@ def get_or_create_org(org_info, cmx_db = None):
db_qstrvals = [] db_qstrvals = []
db_constraints = [] db_constraints = []
if oclass:
org_info['class'] = oclass
mlog("INFO", "get_or_create_org, org_info:", org_info) mlog("INFO", "get_or_create_org, org_info:", org_info)
for colinfo in ORG_COLS: for colinfo in ORG_COLS:
colname = colinfo[0] colname = colinfo[0]
# is_key <=> field is part of the distinctive "signature" of a known org
if colname == 'inst_type':
is_key = (oclass == "inst")
else:
is_key = colinfo[3]
val = org_info.get(colname, None) val = org_info.get(colname, None)
if val != None: if val != None:
val = str(normalize_forms(normalize_chars(val, rm_qt=True))) val = str(normalize_forms(normalize_chars(val, rm_qt=True)))
quotedstrval = "'"+val+"'"
# for insert if val and len(val):
quotedstrval = "'"+val+"'"
# for insert, if needed later
db_tgtcols.append(colname) db_tgtcols.append(colname)
db_qstrvals.append(quotedstrval) db_qstrvals.append(quotedstrval)
# for select if is_key:
db_constraints.append("%s = %s" % (colname, quotedstrval)) # for select
db_constraints.append("%s = %s" % (colname, quotedstrval))
# being NULL is also a distinctive feature if is_key
else: else:
db_constraints.append("%s IS NULL" % colname) if is_key:
db_constraints.append("%s IS NULL" % colname)
db_cursor = cmx_db.cursor() db_cursor = cmx_db.cursor()
mlog("DEBUGSQL", "SELECT org.. WHERE %s" % ("\n AND ".join(db_constraints))) mlog("INFO", "SELECT org.. WHERE %s" % ("\n AND ".join(db_constraints)))
n_matched = db_cursor.execute( n_matched = db_cursor.execute(
'SELECT orgid FROM orgs WHERE %s' % 'SELECT orgid FROM orgs WHERE %s' %
...@@ -673,7 +709,7 @@ def get_or_create_org(org_info, cmx_db = None): ...@@ -673,7 +709,7 @@ def get_or_create_org(org_info, cmx_db = None):
# ok existing affiliation => row id # ok existing affiliation => row id
if n_matched == 1: if n_matched == 1:
the_aff_id = db_cursor.fetchone()[0] the_aff_id = db_cursor.fetchone()[0]
mlog("DEBUG", "Found affiliation (orgid %i) (WHERE %s)" % (the_aff_id, " AND ".join(db_constraints))) mlog("INFO", "Found affiliation (orgid %i) (WHERE %s)" % (the_aff_id, " AND ".join(db_constraints)))
# no matching affiliation => add => row id # no matching affiliation => add => row id
elif n_matched == 0: elif n_matched == 0:
...@@ -684,7 +720,7 @@ def get_or_create_org(org_info, cmx_db = None): ...@@ -684,7 +720,7 @@ def get_or_create_org(org_info, cmx_db = None):
) )
the_aff_id = db_cursor.lastrowid the_aff_id = db_cursor.lastrowid
cmx_db.commit() cmx_db.commit()
mlog("DEBUG", "dbcrud: added org '%s'" % str(db_qstrvals)) mlog("INFO", "dbcrud: added org '%s'" % str(db_qstrvals))
else: else:
raise Exception("ERROR: get_or_create_org non-unique match '%s'" % str(db_qstrvals)) raise Exception("ERROR: get_or_create_org non-unique match '%s'" % str(db_qstrvals))
......
...@@ -46,12 +46,16 @@ FIELDS_FRONTEND_TO_SQL = { ...@@ -46,12 +46,16 @@ FIELDS_FRONTEND_TO_SQL = {
'type': "EQ_relation", 'type': "EQ_relation",
'grouped': "gender"}, 'grouped': "gender"},
"organizations": {'col':"orgs.tostring", "organizations": {'col':"orgs.label",
'class': "inst", 'class': "*", # all organizations
'type': "LIKE_relation", 'type': "LIKE_relation",
'grouped': "orgs_list"}, 'grouped': "orgs_list"},
"laboratories": {'col':"orgs.tostring", "institutions": {'col':"orgs.label",
'class': "lab", 'class': "inst", # <= local where clause
'type': "LIKE_relation",
'grouped': "orgs_list"},
"laboratories": {'col':"orgs.label",
'class': "lab", # <= idem
'type': "LIKE_relation", 'type': "LIKE_relation",
'grouped': "orgs_list"}, 'grouped': "orgs_list"},
# TODO use # TODO use
...@@ -271,7 +275,7 @@ class Org: ...@@ -271,7 +275,7 @@ class Org:
# DB specifications say that at least one of name||acr is NOT NULL # DB specifications say that at least one of name||acr is NOT NULL
self.any = self.acro if self.acro else self.name self.any = self.acro if self.acro else self.name
self.tostring = ( ( self.name if self.name else "") self.label = ( ( self.name if self.name else "")
+ ((' ('+self.acro+')') if self.acro else "") + ((' ('+self.acro+')') if self.acro else "")
+ ((', '+self.locname) if self.locname else "") + ((', '+self.locname) if self.locname else "")
) )
...@@ -489,6 +493,7 @@ class BipartiteExtractor: ...@@ -489,6 +493,7 @@ class BipartiteExtractor:
sql_constraints.append("(%s)" % clause) sql_constraints.append("(%s)" % clause)
# debug # debug
# £TODO_ORG rm
mlog("INFO", "SELECTing active users with sql_constraints", sql_constraints) mlog("INFO", "SELECTing active users with sql_constraints", sql_constraints)
# use constraints as WHERE-clause # use constraints as WHERE-clause
...@@ -514,7 +519,7 @@ class BipartiteExtractor: ...@@ -514,7 +519,7 @@ class BipartiteExtractor:
SELECT scholars.*, SELECT scholars.*,
-- org info -- org info
-- GROUP_CONCAT(orgs.orgid) AS orgs_ids_list, -- GROUP_CONCAT(orgs.orgid) AS orgs_ids_list,
GROUP_CONCAT(orgs_set.tostring) AS orgs_list GROUP_CONCAT(orgs_set.label) AS orgs_list
FROM scholars FROM scholars
LEFT JOIN sch_org ON luid = sch_org.uid LEFT JOIN sch_org ON luid = sch_org.uid
LEFT JOIN ( LEFT JOIN (
...@@ -680,13 +685,13 @@ class BipartiteExtractor: ...@@ -680,13 +685,13 @@ class BipartiteExtractor:
info['ACR'] = labs[0].acro if labs[0].acro else labs[0].any info['ACR'] = labs[0].acro if labs[0].acro else labs[0].any
#info['CC'] = res3['norm_country']; #info['CC'] = res3['norm_country'];
info['home_url'] = res3['home_url']; info['home_url'] = res3['home_url'];
info['team_lab'] = labs[0].tostring; info['team_lab'] = labs[0].label;
info['org'] = insts[0].tostring; info['org'] = insts[0].label;
if len(labs) > 1: if len(labs) > 1:
info['lab2'] = labs[1].tostring info['lab2'] = labs[1].label
if len(insts) > 1: if len(insts) > 1:
info['affiliation2'] = insts[1].tostring info['affiliation2'] = insts[1].label
info['hon_title'] = res3['hon_title'] if res3['hon_title'] else "" info['hon_title'] = res3['hon_title'] if res3['hon_title'] else ""
info['position'] = res3['position']; info['position'] = res3['position'];
info['job_looking'] = res3['job_looking']; info['job_looking'] = res3['job_looking'];
......
...@@ -101,12 +101,12 @@ SOURCE_FIELDS = [ ...@@ -101,12 +101,12 @@ SOURCE_FIELDS = [
("pic_file", False, None), # saved separately ("pic_file", False, None), # saved separately
# => for *scholars* table (optional) # => for *scholars* table (optional)
("lab_label", True, None), # ~ /acro (name)/ ("lab_label", True, "sorg"), # ~ /name (acro)?/
("lab_locname", True, None), # 'Paris, France' ("lab_locname", True, None), # 'Paris, France'
("inst_label", True, None), # ~ /acro (name)/ ("inst_label", True, "sorg"), # ~ /name (acro)?/
("inst_type", False, None), # predefined values ("inst_type", False, None), # predefined values
( "other_inst_type", True, None), # +=> org_type ( "other_inst_type", True, None), # +=> org_type
# => for *orgs* table via sort_affiliation_records # => for *orgs* table via parse_affiliation_records
("keywords", True, None), ("keywords", True, None),
# => for *keywords* table (after split str) # => for *keywords* table (after split str)
...@@ -765,7 +765,7 @@ def show_privacy(): ...@@ -765,7 +765,7 @@ def show_privacy():
########### SUBS ########### ########### SUBS ###########
def sort_affiliation_records(clean_records): def parse_affiliation_records(clean_records):
""" """
Transform GUI side input data into at most 2 orgs objects for DB Transform GUI side input data into at most 2 orgs objects for DB
...@@ -792,6 +792,7 @@ def sort_affiliation_records(clean_records): ...@@ -792,6 +792,7 @@ def sort_affiliation_records(clean_records):
- We return a map with 2 key/value submaps for lab and institutions - We return a map with 2 key/value submaps for lab and institutions
""" """
new_orgs = {'lab': None, 'inst': None} new_orgs = {'lab': None, 'inst': None}
print(clean_records)
for org_class in new_orgs: for org_class in new_orgs:
# can't create org without some kind of label # can't create org without some kind of label
if (org_class+"_label" not in clean_records if (org_class+"_label" not in clean_records
...@@ -804,30 +805,49 @@ def sort_affiliation_records(clean_records): ...@@ -804,30 +805,49 @@ def sort_affiliation_records(clean_records):
# 1) label analysis # 1) label analysis
clean_input = clean_records[org_class+"_label"] clean_input = clean_records[org_class+"_label"]
# custom split attempt # label split attempt
# eg 'CNRS (Centre National de la Recherche Scientifique)'
# vvvv vvvvvvvvvv
# acro name
test_two_groups = match( test_two_groups = match(
r'([^\(]{1,30}) \(([^\)]+)\)', r'([^\(]+)(?: *\(([^\)]{1,30})\))?',
clean_input clean_input
) )
if test_two_groups: if test_two_groups:
new_org_info['acro'] = test_two_groups.groups()[0] # ex 'Centre National de la Recherche Scientifique (CNRS)'
new_org_info['name'] = test_two_groups.groups()[1] # vvvvvvvvvvvvvvvv vvvv
# name acro
new_org_info['name'] = test_two_groups.groups()[0].strip()
new_org_info['acro'] = test_two_groups.groups()[1].strip()
# fallback cases mlog("DEBUG", "parse_affiliation_records found name='%s' and acro='%s'" % (new_org_info['name'], new_org_info['acro']))
elif len(clean_input) < 30:
new_org_info['acro'] = clean_input
else: else:
new_org_info['name'] = clean_input len_input = len(clean_input)
test_uppercase = sub(r'[^0-9A-ZÉ\.]', '')
uppercase_rate = len(test_uppercase) / len_input
# special case short and mostly uppercase => just acro
# POSS tune len and uppercase_rate
if (len_input <= 8 or
(len_input <= 20 and uppercase_rate > .7)):
# ex 'CNRS'
# vvvv
# acro
new_org_info['acro'] = clean_input
# normal fallback case => just name
else:
# ex 'Centre National de la Recherche Scientifique' None
# vvvvvvvvvvvvvvvv vvvv
# name acro
new_org_info['name'] = clean_input
# 2) enrich with any other optional org info # 2) enrich with any other optional org info
for detail_col in ['type', 'code', 'locname', for detail_col in ['inst_type', 'lab_code', 'locname',
'url', 'contact_email', 'contact_name']: 'url', 'contact_email', 'contact_name']:
# this is a convention in our templates if detail_col not in ['inst_type', 'lab_code']:
org_detail = org_class + '_' + detail_col # this is a convention in our templates
org_detail = org_class + '_' + detail_col
else:
org_detail = detail_col
if org_detail in clean_records: if org_detail in clean_records:
val = clean_records[org_detail] val = clean_records[org_detail]
...@@ -858,20 +878,25 @@ def save_form(clean_records, update_flag=False, previous_user_info=None): ...@@ -858,20 +878,25 @@ def save_form(clean_records, update_flag=False, previous_user_info=None):
reg_db = dbcrud.connect_db(config) reg_db = dbcrud.connect_db(config)
# B1) re-group the org fields into at most 2 org 'objects' # B1) re-group the org fields into at most 2 org 'objects'
declared_orgs = sort_affiliation_records(clean_records) declared_orgs = parse_affiliation_records(clean_records)
mlog('DBG', '=====> save_form: declared_orgs = ', declared_orgs)
# B2) check our constraint (cf. also E.) # B2) check our constraint (cf. also E.)
if (declared_orgs['lab'] is None or declared_orgs['inst'] is None): if (declared_orgs['lab'] is None and declared_orgs['inst'] is None):
raise ValueError("At least 1 org (lab or institution) must be filled") raise ValueError("At least 1 org (lab or institution) must be filled")
# B3) for each, read/fill the orgs table to get associated id(s) in DB # B3) for each, read/fill the orgs table to get associated id(s) in DB
orgids = [] orgids = []
for oclass in ['lab', 'inst']: for oclass in ['lab', 'inst']:
if (declared_orgs[oclass]): if (declared_orgs[oclass]):
orgids.append( orgids.append(
dbcrud.get_or_create_org(declared_orgs[oclass], reg_db) dbcrud.get_or_create_org(declared_orgs[oclass], oclass, reg_db)
) )
mlog('DBG orgids:', orgids)
# B4) save the org <=> org mappings TODO LATER (not a priority) # B4) save the org <=> org mappings TODO LATER (not a priority)
# dbcrud.record_org_org_link(src_orgid, tgt_orgid, reg_db) # dbcrud.record_org_org_link(src_orgid, tgt_orgid, reg_db)
...@@ -926,8 +951,11 @@ def save_form(clean_records, update_flag=False, previous_user_info=None): ...@@ -926,8 +951,11 @@ def save_form(clean_records, update_flag=False, previous_user_info=None):
map_table map_table
) )
# E) save the (uid <=> orgid) mapping(s) # E) overwrite the (uid <=> orgid) mapping(s)
dbcrud.rm_sch_org_links(luid, reg_db)
mlog("DBG", "removing all orgs for", luid)
for orgid in orgids: for orgid in orgids:
mlog("DBG", "recording orgs:", luid, orgid)
dbcrud.record_sch_org_link(luid, orgid, reg_db) dbcrud.record_sch_org_link(luid, orgid, reg_db)
# F) end connection # F) end connection
......
...@@ -9,43 +9,47 @@ else: ...@@ -9,43 +9,47 @@ else:
def sanitize(value, specific_type=None): def sanitize(value, specific_type=None):
""" """
simple and radical: leaves only alphanum and '@' '.' '-' ':' ',' '(', ')', '#', ' '
One of the main goals is to remove ';' One of the main goals is to remove ';'
POSS better POSS better
args: args:
@value: any string to santize @value: any string to santize
@specific_type: None or 'url' or 'date' @specific_type: None or one of {surl,sdate,sbool,sorg}
""" """
vtype = type(value) vtype = type(value)
if vtype not in [int, str]:
raise ValueError("Value has an incorrect type %s" % str(vtype))
str_val = str(value) str_val = str(value)
clean_val = sub(r'^\s+', '', str_val)
clean_val = sub(r'\s+$', '', clean_val)
if not specific_type: if specific_type == "sbool":
san_val = sub(r'[^\w@\.:,()# -]', '_', clean_val)
elif specific_type == "sbool":
# DB uses int(0) or int(1) # DB uses int(0) or int(1)
if match('^[01]$',clean_val): if match('^[01]$',str_val):
san_val = int(clean_val) san_val = int(str_val)
else: else:
san_val = 0 san_val = 0
# NB san_val_bool = bool(san_val) # NB san_val_bool = bool(san_val)
elif specific_type == "surl": elif specific_type == "surl":
san_val = sub(r'[^\w@\.: -/]', '_', clean_val) san_val = sub(r'[^\w@\.: -/]', '_', str_val)
elif specific_type == "sdate": elif specific_type == "sdate":
san_val = sub(r'[^0-9/-:]', '_', clean_val) san_val = sub(r'[^0-9/-:]', '_', str_val)
if vtype not in [int, str]: # free string types
raise ValueError("Value has an incorrect type %s" % str(vtype))
else: else:
# cast back to orginal type clean_val = normalize_forms(normalize_chars(str_val))
san_typed_val = vtype(san_val) san_val = sub(r'\b(?:drop|select|update|delete)\b', '_', clean_val)
return san_typed_val if not specific_type:
san_val = sub(r'[^\w@\.:,()# -]', '_', san_val)
elif specific_type == "sorg":
# most troublesome because we'll want to parse the label
# (to split name and acronym and perhaps suggest similar org)
san_val = sub(r'[\n;"\']', '_', san_val)
# cast back to orginal type
san_typed_val = vtype(san_val)
return san_typed_val
......
...@@ -69,16 +69,23 @@ CREATE TABLE orgs( ...@@ -69,16 +69,23 @@ CREATE TABLE orgs(
-- address... (...) -- address elements POSS NOT IMPLEMENTED -- address... (...) -- address elements POSS NOT IMPLEMENTED
reserved varchar(30), reserved varchar(30),
-- tostring: generated column
-- ex "Instituto de Fisica de Cantabria (IFCA), Santander, Spain" -- 1 generated columns for common uses as label
-- ex "Instituto de Fisica de Cantabria (IFCA)"
-- searchable + human readable, often useful for autocompletes etc -- searchable + human readable, often useful for autocompletes etc
tostring varchar(800) label varchar(800)
AS (CONCAT_WS( '', AS (CONCAT_WS( '',
CONCAT(name, ' '), CONCAT(name, ' '),
CONCAT('(',acro,')'), CONCAT('(',acro,')')) ),
CONCAT(', ', locname)) ),
-- 1 generated column for serialize
toarray varchar(800)
AS (JSON_ARRAY(name, acro, locname)),
PRIMARY KEY (orgid), PRIMARY KEY (orgid),
UNIQUE KEY full_org (name, acro, locname) INDEX class_index_orgs (class),
UNIQUE KEY full_org (class, name, acro, inst_type)
-- POSS add locname to UNIQUE KEY (but handle variants!!)
-- POSS FOREIGN KEY locname REFERENCES locs(locname) -- POSS FOREIGN KEY locname REFERENCES locs(locname)
-- (useful when we use the locs more in the app) -- (useful when we use the locs more in the app)
......
SELECT scholars.first_name, scholars.last_name, SELECT scholars.first_name, scholars.last_name,
GROUP_CONCAT(labs.tostring SEPARATOR '/') AS labs_list, GROUP_CONCAT(labs.label SEPARATOR '/') AS labs_list,
GROUP_CONCAT(insts.tostring SEPARATOR '/') AS insts_list GROUP_CONCAT(insts.label SEPARATOR '/') AS insts_list
FROM scholars FROM scholars
LEFT JOIN sch_org AS map_labs LEFT JOIN sch_org AS map_labs
ON map_labs.uid = luid ON map_labs.uid = luid
......
...@@ -32,45 +32,52 @@ function setupSavedItems(uinfo) { ...@@ -32,45 +32,52 @@ function setupSavedItems(uinfo) {
var colName = cmxClt.COLS[i][0] var colName = cmxClt.COLS[i][0]
var chosenV = uinfo[colName] var chosenV = uinfo[colName]
var tgtElt = document.getElementById(colName) // console.log('setupSavedItems', colName, '('+colType+')' , 'with', chosenV)
if (tgtElt && chosenV != null) {
// d <=> convert to YY/MM/DD from iso string YYYY-MM-DD
if (colType == 'd') {
// console.log('setting date', colName, 'with', chosenV)
tgtElt.value = chosenV.replace(/-/g,'/')
tgtElt.dispatchEvent(new CustomEvent('change'))
}
// m <=> select saved menus
if (colType == 'm') {
// console.log('setting menu', colName, 'with', chosenV)
var myOption = tgtElt.querySelector(`option[value="${chosenV}"]`)
// normal case
if (myOption) {
tgtElt.selectedIndex = myOption.index
tgtElt.dispatchEvent(new CustomEvent('change'))
}
// this case is really just for org_type right now // if the value is none => there's nothing to do
else if (tgtElt.querySelector(`option[value="other"]`)) { if (chosenV != undefined && chosenV != null) {
tgtElt.selectedIndex = tgtElt.querySelector(`option[value="other"]`).index
var tgtElt = document.getElementById(colName)
if (tgtElt != null) {
// d <=> convert to YY/MM/DD from iso string YYYY-MM-DD
if (colType == 'd') {
console.log('setting date', colName, 'with', chosenV)
tgtElt.value = chosenV.replace(/-/g,'/')
tgtElt.dispatchEvent(new CustomEvent('change')) tgtElt.dispatchEvent(new CustomEvent('change'))
}
// m <=> select saved menus
if (colType == 'm') {
// console.log('setting menu', colName, 'with', chosenV)
var myOption = tgtElt.querySelector(`option[value="${chosenV}"]`)
// normal case
if (myOption) {
tgtElt.selectedIndex = myOption.index
tgtElt.dispatchEvent(new CustomEvent('change'))
}
var relatedFreeTxt = document.getElementById('other_'+colName) // this case is really just for inst_type right now
if (relatedFreeTxt) { else if (tgtElt.querySelector(`option[value="other"]`)) {
relatedFreeTxt.value = chosenV console.log('setting menu option other for', colName, 'with', chosenV)
relatedFreeTxt.dispatchEvent(new CustomEvent('change')) tgtElt.selectedIndex = tgtElt.querySelector(`option[value="other"]`).index
tgtElt.dispatchEvent(new CustomEvent('change'))
var relatedFreeTxt = document.getElementById('other_'+colName)
if (relatedFreeTxt) {
relatedFreeTxt.value = chosenV
relatedFreeTxt.dispatchEvent(new CustomEvent('change'))
}
}
// fallback case
else {
var optionOthers =
console.warn(`setupSavedItems: couldn't find option: ${chosenV} for select element: ${colName}`)
} }
}
// fallback case
else {
var optionOthers =
console.warn(`setupSavedItems: couldn't find option: ${chosenV} for select element: ${colName}`)
} }
} }
} else {
else { console.warn("setupSavedItems: couldn't find element: "+colName)
console.warn("setupSavedItems: couldn't find element: "+colName) }
} }
} }
} }
......
...@@ -172,6 +172,6 @@ remoteAutocompleteInit('keywords') ...@@ -172,6 +172,6 @@ remoteAutocompleteInit('keywords')
remoteAutocompleteInit('lab_label', 1, 'laboratories') remoteAutocompleteInit('lab_label', 1, 'laboratories')
if (document.getElementById('hashtags')) remoteAutocompleteInit('hashtags') if (document.getElementById('hashtags')) remoteAutocompleteInit('hashtags')
if (document.getElementById('inst_label') ) remoteAutocompleteInit('inst_label', 1, 'organizations') if (document.getElementById('inst_label') ) remoteAutocompleteInit('inst_label', 1, 'institutions')
console.log("autocompletes load OK") console.log("autocompletes load OK")
...@@ -127,8 +127,11 @@ $(document).ready(function() { ...@@ -127,8 +127,11 @@ $(document).ready(function() {
$("#addfiltercountry").click(function() { $("#addfiltercountry").click(function() {
return popfilter("in", "countries", []); return popfilter("in", "countries", []);
}); });
$("#addfilterorganization").click(function() { // $("#addfilterorganization").click(function() {
return popfilter("from", "organizations", []); // return popfilter("from", "organizations", []);
// });
$("#addfilterinstitution").click(function() {
return popfilter("from", "institutions", []);
}); });
$("#addfilterlaboratory").click(function() { $("#addfilterlaboratory").click(function() {
var prefix; var prefix;
...@@ -211,7 +214,7 @@ $(document).ready(function() { ...@@ -211,7 +214,7 @@ $(document).ready(function() {
var value; var value;
// debug // debug
// console.log('collecting (filter '+k+') from elt:' + e) console.log('collecting (filter '+k+') from elt:' + e)
value = $(e).val(); value = $(e).val();
if (value != null && value != "") { if (value != null && value != "") {
...@@ -240,7 +243,7 @@ $(document).ready(function() { ...@@ -240,7 +243,7 @@ $(document).ready(function() {
} }
for (filterName of ["keywords", "countries", "laboratories", "tags", "organizations"]) { for (filterName of ["keywords", "countries", "laboratories", "tags", "institutions"]) {
var filterValuesArray = collect(filterName) var filterValuesArray = collect(filterName)
// we add only if something to add :) // we add only if something to add :)
......
...@@ -124,7 +124,7 @@ ...@@ -124,7 +124,7 @@
Filter by laboratory</a> Filter by laboratory</a>
</li> </li>
<li> <li>
<a id="addfilterorganization" href="#" <a id="addfilterinstitution" href="#"
onclick='$(this).parents(".dropdown-menu").toggle();'> onclick='$(this).parents(".dropdown-menu").toggle();'>
Filter by organization</a> Filter by organization</a>
</li> </li>
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
<label for="lab_label" class="smlabel input-group-addon">* Lab / Team / Dept</label> <label for="lab_label" class="smlabel input-group-addon">* Lab / Team / Dept</label>
<input id="lab_label" name="lab_label" maxlength="250" <input id="lab_label" name="lab_label" maxlength="250"
type="text" class="form-control" placeholder="More detailed affiliation, if relevant" type="text" class="form-control" placeholder="More detailed affiliation, if relevant"
value="{{ current_user.info.labs[0].tostring if current_user.info.labs|length > 0 }}"> value="{{ current_user.info.labs[0].label if current_user.info.labs|length > 0 }}">
</div> </div>
</div> </div>
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
<label for="inst_label" class="smlabel input-group-addon">Parent Institution</label> <label for="inst_label" class="smlabel input-group-addon">Parent Institution</label>
<input id="inst_label" name="inst_label" maxlength="250" <input id="inst_label" name="inst_label" maxlength="250"
type="text" class="form-control autocomp" placeholder='eg "CNRS" or "University of Oxford"' type="text" class="form-control autocomp" placeholder='eg "CNRS" or "University of Oxford"'
value="{{ current_user.info.insts[0].tostring if current_user.info.insts|length > 0 }}"> value="{{ current_user.info.insts[0].label if current_user.info.insts|length > 0 }}">
</div> </div>
</div> </div>
...@@ -53,7 +53,7 @@ ...@@ -53,7 +53,7 @@
</select> </select>
</div> </div>
<!-- Other institution type <=> only if previous choice == 5 --> <!-- Other institution type <=> only if previous choice == 5 -->
<div class="question conditional-q" id="other_org_div"> <div class="question conditional-q" id="other_inst_div">
<div class="input-group"> <div class="input-group">
<label for="other_inst_type" class="smlabel input-group-addon">Other type</label> <label for="other_inst_type" class="smlabel input-group-addon">Other type</label>
<input id="other_inst_type" name="other_inst_type" maxlength="120" <input id="other_inst_type" name="other_inst_type" maxlength="120"
......
...@@ -172,7 +172,7 @@ ...@@ -172,7 +172,7 @@
<div class="input-group"> <div class="input-group">
<label for="lab_locname" class="smlabel input-group-addon">Lab city</label> <label for="lab_locname" class="smlabel input-group-addon">Lab city</label>
<input id="lab_locname" name="lab_locname" maxlength="50" <input id="lab_locname" name="lab_locname" maxlength="50"
type="text" class="form-control" placeholder="Ville de votre institution" type="text" class="form-control" placeholder="Ville de votre labo"
placeholder="lab_locname"> placeholder="lab_locname">
</div> </div>
</div> </div>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment