Commit 9fb58f01 authored by Romain Loth's avatar Romain Loth

autocomplete: replace old search_filter.php by call to new api

parent a8ac0399
<?php
/*
* Génère les agrégations pour les filtres à partir de la base mysql
* £TODO remplacer les appels à search_filter par des appels à api/aggs
*/
include("php_library/parametres.php");
include("php_library/normalize.php");
$base = new PDO($dsn, $user, $pass, $opt);
$category = trim(strtolower($_GET['category']));
$term = trim(strtolower($_GET['term']));
$q = "%".sanitize_input($term)."%";
$cat = '';
$query = '';
if ($category == 'country' || $category == 'countries') {
$cat = "country";
$query = 'LIKE upper(\''.strtoupper($q).'\')';
} elseif ($category == 'organization' || $category == 'organizations') {
// POSSIBLE: `concat(institution, ", ", IFNULL(team_lab, ""))`
// (change in $cat here and in print_directory args downstream)
$cat = 'org';
$query = 'LIKE upper(\''.strtoupper($q).'\')';
} elseif ($category == 'keyword' || $category == 'keywords') {
$cat = "keywords_list";
$query = 'LIKE upper(\''.strtoupper($q).'\')';
}
elseif ($category == 'tag' || $category == 'tags') {
$cat = "hashtags_list";
$query = 'LIKE upper(\''.strtoupper($q).'\')';
}
elseif (v == 'labs' || $category == 'laboratories' || $category == 'laboratory') {
$cat = "team_lab";
$query = 'LIKE upper(\''.strtoupper($q).'\')';
} else {
echo ("ERROR");
exit();
}
$filtered = array (
"yes", "1", "0", "nvgfpmeilym", "no", "mr", "ms", "", " ", " "
);
function filter_word($value) {
if ($value == null) return true;
return ! in_array(strtolower($value),$filtered);
}
// old way
// $req = "SELECT ".$cat." AS clef, count(".$cat.") AS value FROM scholars WHERE ".$cat." ".$query." GROUP BY ".$cat." ORDER BY value DESC";
// TODO differentiate req's target cols earlier: above in "if ($category == X)"
$req = <<<END_SQL
SELECT
{$cat} AS clef,
count({$cat}) AS value
FROM (
SELECT
scholars_affiliations_and_keywords.*,
GROUP_CONCAT(htstr) AS hashtags_list
FROM (
SELECT
scholars_and_affiliations.*,
GROUP_CONCAT(kwstr) AS keywords_list
FROM (
SELECT
scholars.luid,
scholars.country,
affiliations.org,
affiliations.team_lab
FROM scholars
LEFT JOIN affiliations
ON scholars.affiliation_id = affiliations.affid
WHERE (record_status = 'active'
OR (record_status = 'legacy' AND valid_date >= NOW()))
) AS scholars_and_affiliations
LEFT JOIN sch_kw
ON sch_kw.uid = scholars_and_affiliations.luid
LEFT JOIN keywords
ON sch_kw.kwid = keywords.kwid
GROUP BY luid
) AS scholars_affiliations_and_keywords
LEFT JOIN sch_ht
ON sch_ht.uid = luid
LEFT JOIN hashtags
ON sch_ht.htid = hashtags.htid
GROUP BY luid
) AS scholars_info
WHERE {$cat} {$query} -- <== our filter
GROUP BY $cat
ORDER BY value DESC ;
END_SQL;
// echo $req;
$results = array();
$i = 0;
foreach ($base->query($req) as $row) {
$nb = $row['value'];
if ($cat == "keywords_list" || $cat == "hashtags_list") {
//echo "in keywords\n";
$words = explode(",", $row["clef"]);
foreach ($words as $word) {
$pos = strpos($word,$term);
if($pos === false) {
continue;
}
//echo "match found\n";
// echo "(".$value." contains ".$term." ?)";
if (filter_word($word)) {
if (array_key_exists($word, $results)) {
$results[ $word ] += intval($nb);
} else {
$results[ $word ] = intval($nb);
}
}
}
} else {
$word = $row["clef"];
if ($cat == "country") {
$word = normalize_country($word);
}
if (filter_word($word)) {
if (array_key_exists($word, $results)) {
$results[ $word ] += intval($nb);
} else {
$results[ $word ] = intval($nb);
}
}
}
}
$nbresults = sizeof($results);
$results = array_slice($results,0,20);
$nbresults2 = sizeof($results);
$completion = array(
"results" => array()
);
foreach($results as $key => $value) {
array_push($completion["results"], array(
'id' => $key,
'label' => $key,
// 'value' => $value,
'score' => $value,
// F*** it, I'll put the meta data here...
'category' => $cat,
"term" => $term,
"size" => $nbresults2,
"total" => $nbresults,
"remaining" => ($nbresults - $nbresults2)
));
}
$i = 0;
echo json_encode($completion);
?>
......@@ -25,24 +25,48 @@ else:
from text.utils import CountryConverter
# col are for str stats api
# grouped is for full_scholar filters
FIELDS_FRONTEND_TO_SQL = {
"keywords": {'col':"keywords.kwstr", "type": "LIKE_relation"},
"tags": {'col':"hashtags.htstr", 'type': "LIKE_relation"},
"hashtags": {'col':"hashtags.htstr", 'type': "LIKE_relation"},
"countries": {'col':"scholars.country", 'type': "EQ_relation"},
"gender": {'col':"scholars.gender", 'type': "EQ_relation"},
"organizations": {'col':"affiliations.org", 'type': "LIKE_relation"},
"laboratories": {'col':"affiliations.team_lab", 'type': "LIKE_relation"},
"cities": {'col':"affiliations.org_city", 'type': "EQ_relation"},
"keywords": {'col':"keywords.kwstr",
'type': "LIKE_relation",
'grouped': "keywords_list"},
"tags": {'col':"hashtags.htstr",
'type': "LIKE_relation",
'grouped': "hashtags_list"},
"hashtags": {'col':"hashtags.htstr",
'type': "LIKE_relation",
'grouped': "hashtags_list"},
"countries": {'col':"scholars.country",
'type': "EQ_relation",
'grouped': "country"},
"gender": {'col':"scholars.gender",
'type': "EQ_relation",
'grouped': "gender"},
"organizations": {'col':"orgs.tostring",
'class': "inst",
'type': "LIKE_relation",
'grouped': "orgs_list"},
"laboratories": {'col':"orgs.tostring",
'class': "lab",
'type': "LIKE_relation",
'grouped': "orgs_list"},
# TODO use
"cities": {'col':"orgs.locname",
'type': "LIKE_relation",
'grouped': "locnames_list",
'class': "*"},
"linked": {'col':"linked_ids.ext_id_type", 'type': "EQ_relation"}
}
# TODO also add paging as param and to postfilters
def get_field_aggs(a_field,
hapax_threshold=int(REALCONFIG['HAPAX_THRESHOLD']),
hapax_threshold = None,
search_filter_str = None,
users_status = "ALL"):
"""
Use case: /services/api/aggs?field=a_field
......@@ -62,6 +86,9 @@ def get_field_aggs(a_field,
POSS: allow other fields than those in the mapping
if they are already in sql table.col format?
search_filter_str: str
if present, select only results LIKE this %%str%%
hapax_threshold: int
for all data_types, categories with a total equal or below this will be excluded from results
TODO: put them in an 'others' category
......@@ -89,6 +116,10 @@ def get_field_aggs(a_field,
# constraints 2, if any
postfilters = []
if search_filter_str is not None and len(search_filter_str):
search_filter_str = quotestr(search_filter_str)
postfilters.append( "x LIKE '%%%s%%'" % search_filter_str)
if hapax_threshold > 0:
count_col = 'occs' if sql_tab in ['keywords', 'hashtags'] else 'n'
postfilters.append( "%s > %i" % (count_col, hapax_threshold) )
......@@ -113,18 +144,25 @@ def get_field_aggs(a_field,
ORDER BY n DESC
""" % {'col': sql_col, 'post_filter': post_where}
elif sql_tab == 'affiliations':
elif sql_tab == 'orgs':
sql_class = FIELDS_FRONTEND_TO_SQL[a_field]['class']
sql_class_clause = ""
if len(sql_class) and sql_class != "*":
sql_class_clause = "WHERE class='%s'" % sql_class
stmt = """
SELECT x, n FROM (
SELECT %(col)s AS x, COUNT(*) AS n
FROM scholars
FROM sch_org
-- 0 or 1
LEFT JOIN affiliations
ON scholars.affiliation_id = affiliations.affid
LEFT JOIN orgs
ON sch_org.orgid = orgs.orgid
%(class_clause)s
GROUP BY %(col)s
) AS allcounts
%(post_filter)s
ORDER BY n DESC
""" % {'col': sql_col, 'post_filter': post_where}
""" % {'col': sql_col, 'class_clause': sql_class_clause,
'post_filter': post_where}
elif sql_tab == 'linked_ids':
stmt = """
......
......@@ -31,7 +31,7 @@ from json import dumps
from datetime import timedelta
from urllib.parse import unquote
from flask import Flask, render_template, request, \
redirect, url_for, session
redirect, url_for, session, jsonify
from flask_login import fresh_login_required, login_required, \
current_user, login_user, logout_user
......@@ -190,22 +190,34 @@ def services():
def aggs_api():
"""
API to read DB aggregation data (ex: for autocompletes)
REST params
like:str an optional filter for select
hapax:int an optional min count threshold
"""
if 'field' in request.args:
search_filter = None
hap_thresh = None
if 'like' in request.args:
try:
search_filter = str(request.args['like'])
except:
pass
if 'hapax' in request.args:
try:
hap_thresh = int(request.args['hapax'])
except:
pass
if hap_thresh is not None:
# field name itself is tested by db module
result = dbdatapi.get_field_aggs(request.args['field'], hapax_threshold=hap_thresh)
else:
result = dbdatapi.get_field_aggs(request.args['field'])
return dumps(result)
if hap_thresh is None:
hap_thresh = int(config['HAPAX_THRESHOLD'])
# field name itself is tested by db module
result = dbdatapi.get_field_aggs(
request.args['field'],
search_filter_str=search_filter,
hapax_threshold=hap_thresh
)
return jsonify(result)
else:
raise TypeError("aggs API query is missing 'field' argument")
......
/**
* @fileoverview
* rootindex link fixing
* @todo
* - package.json
*
* @version 1
* @copyright ISCPIF-CNRS 2016
* @author romain.loth@iscpif.fr
*
*/
// just workaround for the profile/register iframe's loading mechanism to https
// context:
// curiously, a relative path for iframe src:'/services/user/register'
// doesn't work like other relative paths (reproducing current scheme)
// so we put an absolute path and change the domain name at rendering.
var relsrc = document.getElementById('inlink').src
if (! /^https/.test(relsrc)) {
relsrc = 'https://'+location.host+'/'+relsrc
}
document.getElementById('inlink').src = relsrc
console.log("rootindex controllers load OK")
......@@ -72,20 +72,26 @@ $(document).ready(function() {
$('#' + id3).click(closeThisBox)
// debug
// console.log("whoswho.popfilter: adding autocomplete menu", $("#" + id1))
console.log("whoswho.popfilter: adding autocomplete menu", $("#" + id1))
$("#" + id2).autocomplete({
source: function (req, resp) {
$.ajax({
dataType: "json",
type: "GET",
url: "/search_filter.php",
// url: "/search_filter.php",
url: "/services/api/aggs",
data: {
"category": type,
"term": req.term,
"field": type,
"like": req.term,
},
success: function(data){
resp(data.results)
resp(data.map(function(info) {
return {
'label': info.x,
'score': info.n
}
}))
},
error: function(response) {
console.log("ERROR from search_filter AJAX", response)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment