Commit 848e6bcf authored by Romain Loth's avatar Romain Loth

cleanup comments + better orga connection in php directories + idea for keyword connection

parent 9b74f9fd
......@@ -130,7 +130,7 @@ JOIN orgs
-- NB: scholars <=> orgs is a one-to-many relationship
-- so this can return multiple lines
-- => in the app we use GROUP_CONCAT(orgs.tostring)
-- => in the app we use GROUP_CONCAT(orgs.label) or GROUP_CONCAT(orgs.toarray)
-- to fit in the scholars cardinality)
......
../static/js/comex_page_claim_profile_controllers.js
\ No newline at end of file
../static/js/comex_page_login_controllers.js
\ No newline at end of file
../static/js/comex_page_profile_controllers.js
\ No newline at end of file
../static/js/comex_page_reg_controllers.js
\ No newline at end of file
../static/js/whoswho.js
\ No newline at end of file
-- keywords
SELECT orgs.label,
keywords.kwstr,
keywords.occs
FROM orgs
-- transition via scholars
LEFT JOIN sch_org ON sch_org.orgid = orgs.orgid
JOIN scholars ON sch_org.uid = luid
-- linked keywords
LEFT JOIN sch_kw
ON sch_kw.uid = luid
JOIN keywords
ON sch_kw.kwid = keywords.kwid
WHERE orgs.orgid = 3476
GROUP BY orgs.orgid,keywords.kwid
ORDER BY orgs.name, orgs.acro, keywords.occs DESC, keywords.kwstr
;
-- *champion keywords*
-- =====================
-- same with LOCAL threshold (ie > local avg) on occs and concat
-- !!! THIS IS REALLY INTERESTING IN ANY context <=> word occs SITUATION !!!
SELECT orgs.label,
keywords.kwstr,
keywords.occs,
thresholds.thr
FROM orgs
-- transition via scholars
LEFT JOIN sch_org ON sch_org.orgid = orgs.orgid
JOIN scholars ON sch_org.uid = luid
-- linked keywords
LEFT JOIN sch_kw
ON sch_kw.uid = luid
JOIN keywords
ON sch_kw.kwid = keywords.kwid
LEFT JOIN (
-- create the threshold
SELECT orgs.orgid,
avg(keywords.occs) AS thr
FROM orgs
-- transition via scholars
LEFT JOIN sch_org ON sch_org.orgid = orgs.orgid
JOIN scholars ON sch_org.uid = luid
-- linked keywords
LEFT JOIN sch_kw
ON sch_kw.uid = luid
JOIN keywords
ON sch_kw.kwid = keywords.kwid
GROUP BY orgs.orgid
) AS thresholds
ON thresholds.orgid = orgs.orgid
WHERE orgs.orgid IN (3466, 3476, 3668, 3669,
3191, 3175, 3167)
AND keywords.occs >= MAX(2,thresholds.thr)
GROUP BY orgs.orgid
, keywords.kwid
ORDER BY orgs.name, orgs.acro, keywords.occs DESC, keywords.kwstr
;
-- *correlated keywords*
-- POSSIBLE same technique as champion keywords
-- but with normalization (like tfidf)
-- to nerf down tags that are common champions overall
-- TODO
......@@ -74,6 +74,29 @@ function web_search($a_query_string, $exact=false) {
return 'https://search.iscpif.fr/?q='.urlencode($a_query_string);
}
function org_info_to_search_link ($org_info) {
$search_elements = array();
foreach($org_info as $key => $val) {
if ($key == 'unique_id' || $key == 'admin') {
continue;
}
elseif ($key == 'related_insts' && count($org_info['related_insts'])) {
// we use only the most frequent one for search context
$search_elements[] = $org_info['related_insts'][0];
}
else {
// ... and we add all other strings (name, acro, lab_code, loc)
if ($val && strlen($val) > 3) {
$search_elements[] = $val;
}
}
}
// print_r($search_elements) ;
$www = web_search(implode(', ', $search_elements));
return $www;
}
// replace '@' and dots to avoid the email being harvested by robots/spiders
function safe_email($email_str) {
return preg_replace(
......
......@@ -19,10 +19,9 @@ $imsize = 150;
$content='';
// 2 lists for all the scholars' affiliations
// (used for "Lab's by alphabetical order" section)
$all_labs_list=array();
$all_orga_list=array();
// prepare the list of scholars' institutions
// (used for "Institutions by alphabetical order" section)
$additional_insts_ids=array();
// ajout des scholars
......@@ -219,13 +218,12 @@ for($i = 0; $i < $n_steps; $i++) {
//
$sql = <<< LABSQLEXTENDED
SELECT orgs.*,
GROUP_CONCAT( tgt_label ORDER BY tgt_freq DESC SEPARATOR '%%%')
GROUP_CONCAT( tgt_orgid ORDER BY tgt_freq DESC )
AS related_insts
FROM orgs
LEFT JOIN (
SELECT sch_org.orgid AS src_orgid,
sch_org2.orgid AS tgt_orgid,
orgs2.label AS tgt_label,
count(*) AS tgt_freq
FROM sch_org
LEFT JOIN sch_org AS sch_org2
......@@ -265,11 +263,11 @@ LABSQLEXTENDED;
// $info['keywords'] = $row['keywords'];
// most frequent parent orgs (max = 3)
$related_insts = array_slice(explode('%%%', $row['related_insts'] ?? ""),0,3) ;
$info['related_insts'] = $related_insts;
$related_insts_ids = array_slice(explode(',', $row['related_insts'] ?? ""),0,3) ;
$info['related_insts'] = array_filter($related_insts_ids);
// also add them to orga_list
$all_orga_list[] = $related_insts;
$additional_insts_ids[] = $related_insts_ids;
$info['admin'] = ucwords($row['contact_name'] ?? '');
if ($row['contact_email']) {
......@@ -294,42 +292,42 @@ LABSQLEXTENDED;
/// liste des organismes / affiliations institutionnelles ///
/////////////////////////////////////////////////////////////
// all direct institutions' orgids except ''
$inst_ids = array_filter(array_keys($inst_counts));
// any other institutions we want
// $insts_ids[] = $additional_insts_ids;
sort($inst_ids);
$insts_ids = array_unique($insts_ids);
// all org with infos to retrieve
$organiz = array();
// debug
// $content .= var_dump($all_orga_list) ;
// $content .= var_dump($inst_ids) ;
// $organiz = array();
// sort($all_orga_list);
// foreach ($all_orga_list as $name) {
// if ((trim($name))!=NULL){
// $sql = "SELECT * FROM affiliations WHERE org='" . $name. "'";
//
// $temp=true;
// foreach ($base->query($sql) as $row) {
// if ($temp){
// $info = array();
// $info['unique_id'] = $row['affid'];
// $info['name'] = $row['org'];
// // TODO RESTORE
// // $info['acronym'] = $row['acronym'];
// // $info['homepage'] = $row['homepage'];
// // $info['keywords'] = $row['keywords'];
// // $info['country'] = $row['country'];
// // $info['street'] = $row['street'];
// // $info['city'] = $row['city'];
// // $info['state'] = $row['state'];
// // $info['postal_code'] = $row['postal_code'];
// // $info['fields'] = $row['fields'];
// // $info['admin'] = $row['admin'];
// // $info['phone'] = $row['phone'];
// // $info['fax'] = $row['fax'];
// // $info['login'] = $row['login'];
// $organiz[$row['affid']] = $info;
// $temp=false;
// }
// }
// }
//
// }
foreach ($inst_ids as $inst_id) {
$sql = "SELECT * FROM orgs WHERE orgid='" . $inst_id. "'";
foreach ($base->query($sql) as $row) {
$info = array();
$info['unique_id'] = $inst_id;
$info['name'] = $row['name'];
$info['acronym'] = $row['acro'] ?? '';
$info['homepage'] = $row['url'] ?? '';
$info['inst_type'] = $row['inst_type'] ?? '';
$info['locname'] = $row['locname'] ?? ''; // ex: 'Barcelona, Spain'
// 'London, UK'
// 'UK'
// TODO RESTORE keywords and contact
// $info['keywords'] = $row['keywords'];
// $info['admin'] = $row['admin'];
$organiz[$inst_id] = $info;
}
}
......@@ -341,7 +339,6 @@ $content .='<br/> <A NAME="labs"> </A>
<h1>Labs by alphabetical order</h1>
<p><i>List of teams or labs mentioned by the scholars</i></p>';
// TODO RESTORE
include('labs_list.php');
......@@ -352,8 +349,7 @@ $content .= '<br/> <A NAME="orga"> </A>
<h1>Institutions by alphabetical order</h1>
<br/>
<p><i>List of institutions to which scholars are affiliated</i></p>';
// £TODO_ORGS
// include('orga_list.php');
include('orga_list.php');
?>
......@@ -22,11 +22,19 @@ foreach ($labs as $lab) {
<div class="span9" align="justify">';
$content .= '<div>';
$content .= '<h2 >' . $lab['name'];
$content .= '<h2 >';
$www = org_info_to_search_link($lab);
$content .= '<a href="'.$www.'"><i class="icon-search"></i></a>&nbsp;';
$content .= $lab['name'];
if (strlen($lab['acronym'])){
$content.=' (<b>'.$lab['acronym'].'</b>)';
}
$content.=' <small> - ' . $lab['locname'] . '</small></h2>';
if ($lab['locname'] != null) {
$content.=' <span style="color:grey"><small> - ' . $lab['locname'] . '<small></span>';
}
$content.="</h2>";
// var_dump($lab);
......@@ -35,30 +43,6 @@ foreach ($labs as $lab) {
$www = homepage_to_alink($lab['homepage']);
$content .= '<dl><dd><i class="icon-home"></i>'.$www.'</dd></dl>';
}
else {
$search_elements = array();
foreach($lab as $key => $val) {
if ($key == 'unique_id' || $key == 'admin') {
continue;
}
elseif ($key == 'related_insts' && count($lab['related_insts'])) {
// we use only the most frequent one for search context
$search_elements[] = $lab['related_insts'][0];
}
else {
// ... and we add all other strings (name, acro, lab_code, loc)
if ($val && strlen($val) > 2) {
$search_elements[] = $val;
}
}
}
// print_r($search_elements) ;
$www = web_search(implode(', ', $search_elements));
// print_r($www);
$content .= '<dl><dd><a href="'.$www.'"><small>search</small></a><i class="icon-search"></i></dd></dl>';
}
$lab_code = '';
if (array_key_exists('lab_code', $lab) && strlen($lab['lab_code'])) {
......@@ -70,14 +54,30 @@ foreach ($labs as $lab) {
$n_related_insts = count($lab['related_insts']);
if ($n_related_insts) {
$content .= '<dl>
<dt>Institutions:</dt>';
$content .= '<br><h4 title="Frequently related institutions">Institutions:</h4>';
$content .= '<ul>';
// $content .= "<p>".$n_related_insts."</p>";
foreach ($lab['related_insts'] as $rel_inst_id) {
$content .= '<li class="parent-org"><a href="#org-'.$rel_inst_id.'">';
$rel_inst_info = $organiz[$rel_inst_id];
$has_acro = false ;
if (strlen($rel_inst_info['acronym'])) {
$content .= $rel_inst_info['acronym'];
$has_acro = true ;
} else {
$content .= $rel_inst_info['name'];
}
$content .= '</a>';
$content .= "</li>";
foreach ($lab['related_insts'] as $rinstitution) {
$content .= '<dd class="parent-org">' . $rinstitution . '</dd> ';
// $content .= '<dd class="parent-org">' . ['label'] . '</dd> ';
}
$content .= '</dl>';
$content .= '</ul>';
}
$content .= '</div>';
......
......@@ -10,7 +10,7 @@ $content .='<br/>
$orga_count = 0;
// debug
// $content .= var_dump($organiz) ;
$content .= var_dump($organiz) ;
foreach ($organiz as $orga) {
......@@ -26,12 +26,31 @@ foreach ($organiz as $orga) {
<div class="span9" align="justify">';
$content .= '<div>';
$content .= '<h2 >' . $orga['name'];
if ($orga['acronym'] != null) {
$content.=' (' . $orga['acronym'] . ')';
// unique anchor
$content .= '<a name="org-'.$orga['unique_id'].'"></a>';
// title
$content .= '<h2>' ;
$has_acro = false ;
if (strlen($orga['acronym'])) {
$content .= $orga['acronym'];
$has_acro = true ;
}
if ($orga['name']) {
if ($has_acro) $content .= ' <small>(';
$content .= $orga['name'];
if ($has_acro) $content .= ')</small>';
}
$content.=' <small> - ' . $orga['country'] . '</small></h2>';
if ($orga['locname'] != null) {
$content.=' <small> - ' . $orga['locname'] . '</small>';
}
$www = org_info_to_search_link($orga);
$content .= '<a href="'.$www.'"><i class="icon-search"></i></a>';
$content.="</h2>";
$www = '';
if (array_key_exists('homepage', $lab) && strlen($lab['homepage'])) {
......@@ -59,7 +78,7 @@ foreach ($organiz as $orga) {
}
$content .= '</div>';
if (($orga['keywords'] != null) || ($orga['address'] != null) || ($orga['phone'] != null)) {
if (($orga['keywords'] != null) || ($orga['admin'] != null)) {
$content .= '<div class="span3" align="justify">';
if ($orga['keywords'] != null) {
......@@ -67,29 +86,9 @@ foreach ($organiz as $orga) {
$content .= '<i class="icon-tags"></i> ' . $orga['keywords'] . '<br/><br/>';
}
if ($orga['admin'] != null) {
$content .= '<address><i class="icon-info-sign"></i> Administrative contact: ' . ucwords($orga['admin']) . '<br/></address>';
}
if (trim($orga['street']) != null) {
$address = $orga['street'] . ', ' . $orga['city'] . ', ' . $orga['postal_code']
. ', ' . $orga['state']. ', ' . $orga['country'];
$address = str_replace(", , , , ", ", ", $address);
$address = str_replace(", , , ", ", ", $address);
$address = str_replace(", , ", ", ", $address);
$content .= '<address><i class="icon-envelope"></i> ' . $address . '<br/></address>';
}
if (($orga['phone'] != null) || ($orga['fax'] != null)) {
$content .= '<address><strong>Phone</strong>: ' . $orga['phone'] . '<br/>';
if ($orga['fax'] != null) {
$content .='<strong>Fax</strong>: ' . $orga['fax'] . '<br/>';
}
}
$content .= '</div>';
}
......
......@@ -338,7 +338,7 @@ def get_full_scholar(uid, cmx_db = None):
urow_dict[orgclass] = new_cursor.fetchall()
print("get_full_scholar orgs::", urow_dict[orgclass])
# print("get_full_scholar orgs::", urow_dict[orgclass])
# print('===urow_dict with orgs[]===')
......@@ -665,7 +665,7 @@ def get_or_create_org(org_info, oclass, cmx_db = None):
if oclass:
org_info['class'] = oclass
mlog("INFO", "get_or_create_org, org_info:", org_info)
mlog("DEBUG", "get_or_create_org, org_info:", org_info)
for colinfo in ORG_COLS:
colname = colinfo[0]
......@@ -699,7 +699,7 @@ def get_or_create_org(org_info, oclass, cmx_db = None):
db_cursor = cmx_db.cursor()
mlog("INFO", "SELECT org.. WHERE %s" % ("\n AND ".join(db_constraints)))
mlog("DEBUGSQL", "SELECT org.. WHERE %s" % ("\n AND ".join(db_constraints)))
n_matched = db_cursor.execute(
'SELECT orgid FROM orgs WHERE %s' %
......@@ -709,7 +709,7 @@ def get_or_create_org(org_info, oclass, cmx_db = None):
# ok existing affiliation => row id
if n_matched == 1:
the_aff_id = db_cursor.fetchone()[0]
mlog("INFO", "Found affiliation (orgid %i) (WHERE %s)" % (the_aff_id, " AND ".join(db_constraints)))
mlog("INFO", "dbcrud: found affiliation (orgid %i) (WHERE %s)" % (the_aff_id, " AND ".join(db_constraints)))
# no matching affiliation => add => row id
elif n_matched == 0:
......
......@@ -492,10 +492,6 @@ class BipartiteExtractor:
if len(clause):
sql_constraints.append("(%s)" % clause)
# debug
# £TODO_ORG rm
mlog("INFO", "SELECTing active users with sql_constraints", sql_constraints)
# use constraints as WHERE-clause
# NB we must cascade join because
......
......@@ -792,7 +792,6 @@ def parse_affiliation_records(clean_records):
- We return a map with 2 key/value submaps for lab and institutions
"""
new_orgs = {'lab': None, 'inst': None}
print(clean_records)
for org_class in new_orgs:
# can't create org without some kind of label
if (org_class+"_label" not in clean_records
......@@ -880,7 +879,7 @@ def save_form(clean_records, update_flag=False, previous_user_info=None):
# B1) re-group the org fields into at most 2 org 'objects'
declared_orgs = parse_affiliation_records(clean_records)
mlog('DBG', '=====> save_form: declared_orgs = ', declared_orgs)
mlog('DEBUG', 'save_form: declared values for org =', declared_orgs)
# B2) check our constraint (cf. also E.)
if (declared_orgs['lab'] is None and declared_orgs['inst'] is None):
......@@ -895,7 +894,7 @@ def save_form(clean_records, update_flag=False, previous_user_info=None):
dbcrud.get_or_create_org(declared_orgs[oclass], oclass, reg_db)
)
mlog('DBG orgids:', orgids)
mlog('DEBUG', 'save_form: found ids for orgs =', orgid)
# B4) save the org <=> org mappings TODO LATER (not a priority)
# dbcrud.record_org_org_link(src_orgid, tgt_orgid, reg_db)
......@@ -953,9 +952,9 @@ def save_form(clean_records, update_flag=False, previous_user_info=None):
# E) overwrite the (uid <=> orgid) mapping(s)
dbcrud.rm_sch_org_links(luid, reg_db)
mlog("DBG", "removing all orgs for", luid)
mlog("DEBUG", "removing all orgs for", luid)
for orgid in orgids:
mlog("DBG", "recording orgs:", luid, orgid)
mlog("DEBUG", "recording orgs:", luid, orgid)
dbcrud.record_sch_org_link(luid, orgid, reg_db)
# F) end connection
......
#! /bin/bash
# rotate the logs
# ex: 'logs/services.log' -> 'logs/archived/2017-03-10_services.log'
# 'logs/nginx_access.log' -> 'logs/archived/2017-03-10_nginx_access.log'
# nb: afterwards, it's good to do an app + nginx restart to recreate them
export PATH_TO_LOGDIR="./logs"
mkdir -p $PATH_TO_LOGDIR/archived
export curdate=`date +"%Y-%m-%d"`
echo $curdate
ls $PATH_TO_LOGDIR/*.log | while read fname
do
bname=`basename $fname`
newname="${curdate}_${bname}"
mv -bv $fname $PATH_TO_LOGDIR/archived/$newname
done
......@@ -214,7 +214,7 @@ $(document).ready(function() {
var value;
// debug
console.log('collecting (filter '+k+') from elt:' + e)
// console.log('collecting (filter '+k+') from elt:' + e)
value = $(e).val();
if (value != null && value != "") {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment