Commit b1754f13 authored by Romain Loth's avatar Romain Loth

refine/extract: LIKE disjunctions for arrays of str filters

parent 1378cc7c
......@@ -48,17 +48,17 @@ ORG_COLS = [
FIELDS_FRONTEND_TO_SQL = {
"keywords": "keywords.kwstr",
"tags": "hashtags.htstr",
"keywords": {'col':"keywords.kwstr", "type": "LIKE_relation"},
"tags": {'col':"hashtags.htstr", 'type': "LIKE_relation"},
"countries": "scholars.country",
"gender": "scholars.gender",
"countries": {'col':"scholars.country", 'type': "EQ_relation"},
"gender": {'col':"scholars.gender", 'type': "EQ_relation"},
"organizations": "affiliations.org",
"laboratories": "affiliations.team_lab",
"cities": "affiliations.org_city",
"organizations": {'col':"affiliations.org", 'type': "LIKE_relation"},
"laboratories": {'col':"affiliations.team_lab", 'type': "LIKE_relation"},
"cities": {'col':"affiliations.org_city", 'type': "EQ_relation"},
"linked": "linked_ids.ext_id_type"
"linked": {'col':"linked_ids.ext_id_type", 'type': "EQ_relation"}
}
......@@ -139,7 +139,7 @@ def get_field_aggs(a_field,
if a_field in FIELDS_FRONTEND_TO_SQL:
sql_col = FIELDS_FRONTEND_TO_SQL[a_field]
sql_col = FIELDS_FRONTEND_TO_SQL[a_field]['col']
sql_tab = sql_col.split('.')[0]
mlog('INFO', "AGG API sql_col", sql_col)
......
......@@ -174,29 +174,56 @@ class MyExtractor:
continue
else:
known_filter = key
sql_column = FIELDS_FRONTEND_TO_SQL[key]
sql_column = FIELDS_FRONTEND_TO_SQL[key]['col']
# "LIKE_relation" or "EQ_relation"
rel_type = FIELDS_FRONTEND_TO_SQL[key]['type']
val = filter_dict[known_filter]
if len(val):
clause = ""
if isinstance(val, list) or isinstance(val, tuple):
tested_array = [x for x in val if x != '']
# clause type clause is full
# IN (val1, val2) False
# "= val" False
# "col LIKE '%val%'" True
clause_is_full = False
rhsclause = ""
fullclause = ""
if (isinstance(val, list) or isinstance(val, tuple)):
tested_array = [x for x in val if x]
mlog("DEBUG", "tested_array", tested_array)
if len(tested_array):
qwliststr = repr(tested_array)
qwliststr = sub(r'^\[', '(', qwliststr)
qwliststr = sub(r'\]$', ')', qwliststr)
clause = 'IN '+qwliststr
if rel_type == "EQ_relation":
qwliststr = repr(tested_array)
qwliststr = sub(r'^\[', '(', qwliststr)
qwliststr = sub(r'\]$', ')', qwliststr)
clause = 'IN '+qwliststr
elif rel_type == "LIKE_relation":
like_clauses = []
for singleval in tested_array:
if type(singleval) == str and len(singleval):
like_clauses.append(
sql_column+" LIKE '%"+singleval+"%'"
)
clause = " OR ".join(like_clauses)
# clause already includes col name
clause_is_full = True
elif isinstance(val, int):
clause = '= %i' % val
elif isinstance(val, float):
clause = '= %f' % val
# elif isinstance(val, str):
# clause = '= "%s"' % val
elif isinstance(val, str):
clause = '= "%s"' % val
clause = 'LIKE "%'+val+'%"'
clause_is_full = True
if len(clause):
sql_constraints.append("(%s %s)" % (sql_column, clause))
if clause_is_full:
sql_constraints.append("(%s)" % clause)
else:
sql_constraints.append("(%s %s)" % (sql_column, clause))
# debug
mlog("INFO", "SELECTing active users with sql_constraints", sql_constraints)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment