Commit 990a5aed authored by Romain Loth's avatar Romain Loth

add a simple aggregations API

parent ed8ea3da
......@@ -26,3 +26,7 @@ SQL_PORT=3306
#; (used only in ajax context of sending credentials) #; TODO SSL
DOORS_HOST=172.18.0.3
DOORS_PORT=8989
[content]
# used for all aggregations queries
HAPAX_THRESHOLD = 0
......@@ -26,3 +26,7 @@ SQL_PORT=3306
#; (used only in ajax context of sending credentials) #; TODO SSL
DOORS_HOST=134.158.75.71
DOORS_PORT=80
[content]
# used for all aggregations queries
HAPAX_THRESHOLD = 0
......@@ -26,3 +26,7 @@ SQL_PORT=3306
#; (used only in ajax context of sending credentials) #; TODO SSL
DOORS_HOST=134.158.75.71
DOORS_PORT=80
[content]
# used for all aggregations queries
HAPAX_THRESHOLD = 0
......@@ -46,6 +46,21 @@ ORG_COLS = [
("org_city", False, 50)
]
FIELDS_FRONTEND_TO_SQL = {
"keywords": "keywords.kwstr",
"countries": "scholars.country",
"organizations": "affiliations.org",
"laboratories": "affiliations.team_lab",
"tags": "scholars.community_hashtags",
# new
"gender": "scholars.gender",
"cities": "affiliations.org_city",
"linked": "linked_ids.ext_id_type"
}
def connect_db(config=REALCONFIG):
"""
Simple connection
......@@ -60,6 +75,109 @@ def connect_db(config=REALCONFIG):
db="comex_shared"
)
def get_field_aggs(a_field, hapax_threshold=int(REALCONFIG['HAPAX_THRESHOLD'])):
"""
Use case: api/aggs?field=a_field
=> Retrieves distinct field values and count having it
=> about *n* vs *occs*:
- for tables != keywords count is scholar count
- for table keywords count is occurrences count
NB relies on FIELDS_FRONTEND_TO_SQL mapping
POSS: allow other fields than those in the mapping
if they are already in sql table.col format?
"""
agg_rows = []
if a_field in FIELDS_FRONTEND_TO_SQL:
sql_col = FIELDS_FRONTEND_TO_SQL[a_field]
sql_tab = sql_col.split('.')[0]
mlog('DEBUG', "AGG API sql_col", sql_col)
mlog('DEBUG', "AGG API sql_tab", sql_tab)
db = connect_db()
db_c = db.cursor(DictCursor)
if type(hapax_threshold) == int and hapax_threshold > 0:
count_col = 'occs' if sql_tab == 'keywords' else 'n'
where_clause = "WHERE %s > %i" % (count_col, hapax_threshold)
else:
where_clause = ""
if sql_tab == 'scholars':
stmt = """
SELECT * FROM (
SELECT %(col)s AS x, COUNT(*) AS n
FROM scholars
GROUP BY %(col)s
) AS allcounts
%(filter)s
ORDER BY n DESC
""" % {'col': sql_col, 'filter': where_clause}
elif sql_tab == 'affiliations':
stmt = """
SELECT * FROM (
SELECT %(col)s AS x, COUNT(*) AS n
FROM scholars
-- 0 or 1
LEFT JOIN affiliations
ON scholars.affiliation_id = affiliations.affid
GROUP BY %(col)s
) AS allcounts
%(filter)s
ORDER BY n DESC
""" % {'col': sql_col, 'filter': where_clause}
elif sql_tab == 'linked_ids':
stmt = """
SELECT * FROM (
SELECT %(col)s AS x, COUNT(*) AS n
FROM scholars
-- 0 or 1
LEFT JOIN linked_ids
ON scholars.doors_uid = linked_ids.uid
GROUP BY %(col)s
) AS allcounts
%(filter)s
ORDER BY n DESC
""" % {'col': sql_col, 'filter': where_clause}
elif sql_tab == 'keywords':
stmt = """
SELECT * FROM (
SELECT %(col)s AS x, COUNT(*) AS occs
FROM scholars
-- 0 or many
LEFT JOIN sch_kw
ON scholars.doors_uid = sch_kw.uid
JOIN keywords
ON sch_kw.kwid = keywords.kwid
GROUP BY %(col)s
) AS allcounts
%(filter)s
ORDER BY occs DESC
""" % {'col': sql_col, 'filter': where_clause}
mlog("DEBUG", "get_field_aggs STATEMENT:\n-- SQL\n%s\n-- /SQL" % stmt)
# do it
n_rows = db_c.execute(stmt)
if n_rows > 0:
agg_rows = db_c.fetchall()
db.close()
mlog('INFO', agg_rows)
return agg_rows
def get_full_scholar(uid):
"""
Autonomous function to be used by User class
......
......@@ -10,18 +10,12 @@ from .converter import CountryConverter
if __package__ == "services.db_to_tina_api":
from services.tools import mlog
from services.db import FIELDS_FRONTEND_TO_SQL
else:
from tools import mlog
from db import FIELDS_FRONTEND_TO_SQL
whoswhofilters_to_sqlnames = {
"keywords": "keywords.kwstr",
"countries": "scholars.country",
"organizations": "affiliations.org",
"laboratories": "affiliations.team_lab",
"tags": "scholars.community_hashtags"
}
class MyExtractor:
def __init__(self,dbhost):
......@@ -147,11 +141,11 @@ class MyExtractor:
known_filter = None
sql_column = None
if key not in whoswhofilters_to_sqlnames:
if key not in FIELDS_FRONTEND_TO_SQL:
continue
else:
known_filter = key
sql_column = whoswhofilters_to_sqlnames[key]
sql_column = FIELDS_FRONTEND_TO_SQL[key]
val = filter_dict[known_filter]
......
......@@ -36,7 +36,7 @@ if __package__ == 'services':
from services.user import User, login_manager, doors_login
from services.text import keywords
from services.tools import restparse, mlog, re_hash, REALCONFIG
from services.db import connect_db, get_or_create_keywords, save_pairs_sch_kw, get_or_create_affiliation, save_scholar
from services.db import connect_db, get_or_create_keywords, save_pairs_sch_kw, get_or_create_affiliation, save_scholar, get_field_aggs
from services.db_to_tina_api.extractDataCustom import MyExtractor as MySQL
else:
# when this script is run directly
......@@ -44,7 +44,7 @@ else:
from user import User, login_manager, doors_login
from text import keywords
from tools import restparse, mlog, re_hash, REALCONFIG
from db import connect_db, get_or_create_keywords, save_pairs_sch_kw, get_or_create_affiliation, save_scholar
from db import connect_db, get_or_create_keywords, save_pairs_sch_kw, get_or_create_affiliation, save_scholar, get_field_aggs
from db_to_tina_api.extractDataCustom import MyExtractor as MySQL
# ============= read config ============
......@@ -113,6 +113,19 @@ MIN_KW = 5
def services():
return redirect(url_for('login', _external=True))
# /services/api/aggs
@app.route(config['PREFIX'] + config['API_ROUTE'] + '/aggs')
def aggs_api():
"""
API to read DB aggregation data (ex: for autocompletes)
"""
if 'field' in request.args:
# field name itself is tested by db module
result = get_field_aggs(request.args['field'])
return dumps(result)
else:
raise TypeError("aggs API query is missing 'field' argument")
# /services/api/graph
@app.route(config['PREFIX'] + config['API_ROUTE'] + '/graph')
......
......@@ -38,7 +38,10 @@ CONFIGMENU = [
{"sec": 'backends', "var":'SQL_HOST', "def": '172.17.0.2' },
{"sec": 'backends', "var":'SQL_PORT', "def": '3306' },
{"sec": 'backends', "var":'DOORS_HOST', "def": '0.0.0.0' },
{"sec": 'backends', "var":'DOORS_PORT', "def": '8989' }
{"sec": 'backends', "var":'DOORS_PORT', "def": '8989' },
# data processing
{"sec": 'content', "var":'HAPAX_THRESHOLD', "def": '1 ' }
]
def home_path():
......
......@@ -416,10 +416,9 @@ function checkJobDateStatus() {
// £TODO1 move autocomp data to an autocomplete module
// -> local data for countries, jobtitles
// -> ajax fetcher for the scholars, kws and labs
// £TODO2 add a fetcher API on services side
// £TODO move autocomp data to an autocomplete module
// -> local data for countries, jobtitles
// -> use ajax aggs api for the scholars, kws and labs
// autocomplete countries
$(function() {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment