Commit 5f17088b authored by Romain Loth's avatar Romain Loth

fix details of hashtags filter to sql constraints to map procedure

parent d8de98a5
......@@ -12,7 +12,7 @@ from networkx import Graph, DiGraph
from random import randint
from math import floor, log, log1p
from cgi import escape
from re import sub
from re import sub, match
from traceback import format_tb
if __package__ == 'services':
......@@ -392,6 +392,25 @@ class BipartiteExtractor:
# "LIKE_relation" or "EQ_relation"
rel_type = FIELDS_FRONTEND_TO_SQL[key]['type']
# pre-treatment: rewrite tables' names if they're inside the sub-query
# exemple:
# scholars.country ~~~~~> scholars_n_hashtags.country
# hashtags.htstr ~~~~~> scholars_n_hashtags.htstr
# (see cascaded join below for explanation)
if match("scholars", sql_column):
(sql_table, sql_field) = sql_column.split('.')
sql_column = 'scholars_n_hashtags.'+sql_field
mlog('DBG', "rewrote sql col", sql_column)
elif match("hashtags.htstr", sql_column):
sql_column = 'scholars_n_hashtags.hashtags_list'
mlog('DBG', "rewrote sql col", sql_column)
# now create the constraints
val = filter_dict[known_filter]
if len(val):
......@@ -442,19 +461,35 @@ class BipartiteExtractor:
mlog("INFO", "SELECTing active users with sql_constraints", sql_constraints)
# use constraints as WHERE-clause
# NB we must cascade join because
# both hashtags and keywords are one-to-many
# => it renames scholars and hashtag tables
# into 'scholars_n_hashtags'
sql_query = """
SELECT
scholars.luid,
scholars_n_hashtags.luid,
scholars_n_hashtags.affiliation_id,
-- kws info
COUNT(keywords.kwid) AS keywords_nb,
GROUP_CONCAT(keywords.kwstr) AS keywords_list,
GROUP_CONCAT(keywords.kwid) AS keywords_ids
FROM scholars
GROUP_CONCAT(keywords.kwstr) AS keywords_list
FROM (
SELECT
scholars.*,
-- hts info
GROUP_CONCAT(hashtags.htstr) AS hashtags_list
FROM scholars
LEFT JOIN sch_ht
ON uid = luid
JOIN hashtags
ON sch_ht.htid = hashtags.htid
GROUP BY luid
) AS scholars_n_hashtags
-- two step JOIN for keywords
JOIN sch_kw
LEFT JOIN sch_kw
ON uid = luid
JOIN keywords
ON sch_kw.kwid = keywords.kwid
......
......@@ -42,7 +42,6 @@ if __package__ == 'services':
from services import tools, dbcrud, dbdatapi
from services.user import User, login_manager, \
doors_login, doors_register
from services.dbdatapi import BipartiteExtractor
from services.text.utils import sanitize
else:
# when this script is run directly
......@@ -51,7 +50,6 @@ else:
import tools, dbcrud, dbdatapi
from user import User, login_manager, \
doors_login, doors_register
from dbdatapi import BipartiteExtractor
from text.utils import sanitize
# ============= app creation ============
......@@ -220,7 +218,13 @@ def graph_api():
(original author S. Castillo)
"""
if 'qtype' in request.args:
graphdb = BipartiteExtractor(config['SQL_HOST'])
graphdb = dbdatapi.BipartiteExtractor(config['SQL_HOST'])
# request.query_string
# => b'qtype=filters&tags[]=%23iscpif'
# tools.restparse(request.query_string.decode())
# => {'qtype': 'filters', 'tags': ['#iscpif']}
scholars = graphdb.getScholarsList(
request.args['qtype'],
tools.restparse(
......
......@@ -303,7 +303,7 @@ function bringTheNoise(sourceinfo,type){
var nameSubElts = []
for (var value of filteringKeyArrayPairs[fieldName]) {
// exemple: "countries[]=France"
restParams.push(fieldName+"[]="+value)
restParams.push(fieldName+'[]='+encodeURIComponent(value))
nameSubElts.push ('"'+value+'"')
}
nameElts.push("("+nameSubElts.join(" or ")+")")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment