Commit 5c6bdc51 authored by Romain Loth's avatar Romain Loth

finish up service modules refacto

parent 7e7aefbb
...@@ -164,7 +164,7 @@ More info in `doc/` directory ...@@ -164,7 +164,7 @@ More info in `doc/` directory
This project was developed over several years with 3 main development phases. This project was developed over several years with 3 main development phases.
Scholars statistics and directory structure originally developed by D. Chavalarias (scholars data exploration in the `php_library` folder). Scholars statistics and directory structure originally developed by D. Chavalarias (scholars data exploration in the `php_library` folder).
Graph extraction logic originally developed by [S. Castillo](https://github.com/PkSM3/) (SubsetExtractor in the `dbdatapi` module) Graph extraction logic originally developed by [S. Castillo](https://github.com/PkSM3/) (BipartiteExtractor in the `dbdatapi` module)
Ports to python3+MySQL, user registration and merge of the various layers into one deployable docker by [R. Loth](https://github.com/rloth/) Ports to python3+MySQL, user registration and merge of the various layers into one deployable docker by [R. Loth](https://github.com/rloth/)
##### Copyright ##### Copyright
......
#### Structure
- `extractDataCustom.py`: starting from a query or a scholar's id, retrieves all related kw+neighbors data from DB, and builds a graph's json with the metadata and custom html in node's `content` property
- `converter.py`: normalizes country names and codes
#### Requirements
see global documentation
(this module no longer need to be ran independantly, now part of main run.sh)
#### History
Several modules related to graph community region extraction and graph force atlas layout have been moved to `graph_manipulation_unused` dir, because they are replaced by a client-side FA2 in tinawebJS.
Extraction logic originally developed by [S. Castillo](https://github.com/PkSM3/)
python3 port and merge with regcomex into ../comex_main_backend by [R. Loth](https://github.com/rloth/)
Copyright 2014-2016 ISCPIF/CNRS - UPS 3611
...@@ -18,11 +18,11 @@ from traceback import format_tb ...@@ -18,11 +18,11 @@ from traceback import format_tb
if __package__ == 'services': if __package__ == 'services':
from services.tools import mlog, REALCONFIG from services.tools import mlog, REALCONFIG
from services.dbcrud import connect_db from services.dbcrud import connect_db
from services.text.converter import CountryConverter from services.text.utils import CountryConverter
else: else:
from tools import mlog, REALCONFIG from tools import mlog, REALCONFIG
from dbcrud import connect_db from dbcrud import connect_db
from text.converter import CountryConverter from text.utils import CountryConverter
FIELDS_FRONTEND_TO_SQL = { FIELDS_FRONTEND_TO_SQL = {
...@@ -219,7 +219,16 @@ def find_scholar(some_key, some_str_value, cmx_db = None): ...@@ -219,7 +219,16 @@ def find_scholar(some_key, some_str_value, cmx_db = None):
class SubsetExtractor: class BipartiteExtractor:
"""
JSON FILTERS => SQL SELECT => scholars subset
||
VV
keywords
||
VV
neighboors
"""
def __init__(self,dbhost): def __init__(self,dbhost):
self.connection=connect( self.connection=connect(
......
...@@ -25,11 +25,11 @@ __status__ = "Dev" ...@@ -25,11 +25,11 @@ __status__ = "Dev"
# ============== imports ============== # ============== imports ==============
from re import sub, match from re import sub
from os import path from os import path
from json import dumps from json import dumps
from datetime import timedelta from datetime import timedelta
from urllib.parse import urlparse, urljoin, unquote from urllib.parse import unquote
from flask import Flask, render_template, request, \ from flask import Flask, render_template, request, \
redirect, url_for, session redirect, url_for, session
from flask_login import fresh_login_required, login_required, \ from flask_login import fresh_login_required, login_required, \
...@@ -42,9 +42,8 @@ if __package__ == 'services': ...@@ -42,9 +42,8 @@ if __package__ == 'services':
from services import tools, dbcrud, dbdatapi from services import tools, dbcrud, dbdatapi
from services.user import User, login_manager, \ from services.user import User, login_manager, \
doors_login, doors_register doors_login, doors_register
from services.dbdatapi import SubsetExtractor from services.dbdatapi import BipartiteExtractor
# TODO move sanitize there from services.text.utils import sanitize
# from services.text import keywords, sanitize
else: else:
# when this script is run directly # when this script is run directly
print("*** comex services (dev server mode) ***") print("*** comex services (dev server mode) ***")
...@@ -52,8 +51,8 @@ else: ...@@ -52,8 +51,8 @@ else:
import tools, dbcrud, dbdatapi import tools, dbcrud, dbdatapi
from user import User, login_manager, \ from user import User, login_manager, \
doors_login, doors_register doors_login, doors_register
from db_to_tina_api.extractDataCustom import MyExtractor from dbdatapi import BipartiteExtractor
# from text import keywords, sanitize from text.utils import sanitize
# ============= app creation ============ # ============= app creation ============
config = tools.REALCONFIG config = tools.REALCONFIG
...@@ -162,6 +161,9 @@ def unauthorized(): ...@@ -162,6 +161,9 @@ def unauthorized():
) )
def reroute(function_name_str):
return redirect(url_for(function_name_str, _external=True))
# ============= views ============= # ============= views =============
...@@ -180,18 +182,10 @@ def rootindex(): ...@@ -180,18 +182,10 @@ def rootindex():
"rootindex.html" "rootindex.html"
) )
# # /test_base
# @app.route('/test_base')
# def test_base():
# return render_template(
# "base_layout.html"
# )
# /services/ # /services/
@app.route(config['PREFIX']+'/') @app.route(config['PREFIX']+'/')
def services(): def services():
return redirect(url_for('login', _external=True)) return reroute('login')
# /services/api/aggs # /services/api/aggs
@app.route(config['PREFIX'] + config['API_ROUTE'] + '/aggs') @app.route(config['PREFIX'] + config['API_ROUTE'] + '/aggs')
...@@ -226,7 +220,7 @@ def graph_api(): ...@@ -226,7 +220,7 @@ def graph_api():
(original author S. Castillo) (original author S. Castillo)
""" """
if 'qtype' in request.args: if 'qtype' in request.args:
graphdb = SubsetExtractor(config['SQL_HOST']) graphdb = BipartiteExtractor(config['SQL_HOST'])
scholars = graphdb.getScholarsList( scholars = graphdb.getScholarsList(
request.args['qtype'], request.args['qtype'],
tools.restparse( tools.restparse(
...@@ -269,7 +263,7 @@ def user_api(): ...@@ -269,7 +263,7 @@ def user_api():
# /services/user/ # /services/user/
@app.route(config['PREFIX'] + config['USR_ROUTE']+'/', methods=['GET']) @app.route(config['PREFIX'] + config['USR_ROUTE']+'/', methods=['GET'])
def user(): def user():
return redirect(url_for('login', _external=True)) return reroute('login')
# /services/user/login/ # /services/user/login/
...@@ -380,7 +374,7 @@ def login(): ...@@ -380,7 +374,7 @@ def login():
elif user.empty: elif user.empty:
mlog('DEBUG',"empty user redirected to profile") mlog('DEBUG',"empty user redirected to profile")
# we go straight to empty profile for the person to create infos # we go straight to empty profile for the person to create infos
return(redirect(url_for('profile', _external=True))) return reroute('profile')
# normal call, normal user # normal call, normal user
else: else:
...@@ -388,7 +382,7 @@ def login(): ...@@ -388,7 +382,7 @@ def login():
next_url = request.args.get('next', None) next_url = request.args.get('next', None)
if not next_url: if not next_url:
return(redirect(url_for('profile', _external=True))) return reroute('profile')
else: else:
next_url = unquote(next_url) next_url = unquote(next_url)
mlog("DEBUG", "login with next_url:", next_url) mlog("DEBUG", "login with next_url:", next_url)
...@@ -404,7 +398,7 @@ def login(): ...@@ -404,7 +398,7 @@ def login():
else: else:
# server name is different than ours # server name is different than ours
# in next_url so we won't go there # in next_url so we won't go there
return(redirect(url_for('rootindex', _external=True))) return reroute('rootindex')
# /services/user/logout/ # /services/user/logout/
...@@ -412,7 +406,7 @@ def login(): ...@@ -412,7 +406,7 @@ def login():
def logout(): def logout():
logout_user() logout_user()
mlog('INFO', 'logged out previous user') mlog('INFO', 'logged out previous user')
return redirect(url_for('rootindex', _external=True)) return reroute('rootindex')
# /services/user/profile/ # /services/user/profile/
@app.route(config['PREFIX'] + config['USR_ROUTE'] + '/profile/', methods=['GET', 'POST']) @app.route(config['PREFIX'] + config['USR_ROUTE'] + '/profile/', methods=['GET', 'POST'])
...@@ -458,7 +452,8 @@ def profile(): ...@@ -458,7 +452,8 @@ def profile():
"executing DELETE scholar's data at the request of user %s" % str(the_id_to_delete)) "executing DELETE scholar's data at the request of user %s" % str(the_id_to_delete))
logout_user() logout_user()
dbcrud.rm_scholar(the_id_to_delete) dbcrud.rm_scholar(the_id_to_delete)
return(redirect(url_for('rootindex', _external=True)))
return reroute('rootindex')
else: else:
...@@ -863,53 +858,11 @@ def read_record_from_request(request): ...@@ -863,53 +858,11 @@ def read_record_from_request(request):
if hasattr(request, "files") and 'pic_file' in request.files and request.files['pic_file']: if hasattr(request, "files") and 'pic_file' in request.files and request.files['pic_file']:
new_fname = tools.pic_blob_to_filename(request.files['pic_file']) new_fname = tools.pic_blob_to_filename(request.files['pic_file'])
clean_records['pic_fname'] = new_fname clean_records['pic_fname'] = new_fname
mlog("DEBUG", "new_fname", new_fname) mlog("INFO", "new_fname", new_fname)
return clean_records return clean_records
# TODO move to text submodules
def sanitize(value, specific_type=None):
"""
simple and radical: leaves only alphanum and '@' '.' '-' ':' ',' '(', ')', '#', ' '
One of the main goals is to remove ';'
POSS better
args:
@value: any string to santize
@specific_type: None or 'url' or 'date'
"""
vtype = type(value)
str_val = str(value)
clean_val = sub(r'^\s+', '', str_val)
clean_val = sub(r'\s+$', '', clean_val)
if not specific_type:
san_val = sub(r'[^\w@\.:,()# -]', '_', clean_val)
elif specific_type == "sbool":
# DB uses int(0) or int(1)
if match('^[01]$',clean_val):
san_val = int(clean_val)
else:
san_val = 0
# NB san_val_bool = bool(san_val)
elif specific_type == "surl":
san_val = sub(r'[^\w@\.: -/]', '_', clean_val)
elif specific_type == "sdate":
san_val = sub(r'[^0-9/-:]', '_', clean_val)
if vtype not in [int, str]:
raise ValueError("Value has an incorrect type %s" % str(vtype))
else:
# cast back to orginal type
san_typed_val = vtype(san_val)
return san_typed_val
########### MAIN ########### ########### MAIN ###########
# this can only be used for debug # this can only be used for debug
# (in general use comex-run.sh to run the app) # (in general use comex-run.sh to run the app)
......
# TODO a keywords class gathering to factorize all cleanup and split operations
from sqlite3 import connect, Row from sqlite3 import connect, Row
from re import sub, match
if __package__ == "services.text": if __package__ == "services.text":
from services.tools import mlog from services.tools import mlog
...@@ -6,6 +7,48 @@ else: ...@@ -6,6 +7,48 @@ else:
from tools import mlog from tools import mlog
def sanitize(value, specific_type=None):
"""
simple and radical: leaves only alphanum and '@' '.' '-' ':' ',' '(', ')', '#', ' '
One of the main goals is to remove ';'
POSS better
args:
@value: any string to santize
@specific_type: None or 'url' or 'date'
"""
vtype = type(value)
str_val = str(value)
clean_val = sub(r'^\s+', '', str_val)
clean_val = sub(r'\s+$', '', clean_val)
if not specific_type:
san_val = sub(r'[^\w@\.:,()# -]', '_', clean_val)
elif specific_type == "sbool":
# DB uses int(0) or int(1)
if match('^[01]$',clean_val):
san_val = int(clean_val)
else:
san_val = 0
# NB san_val_bool = bool(san_val)
elif specific_type == "surl":
san_val = sub(r'[^\w@\.: -/]', '_', clean_val)
elif specific_type == "sdate":
san_val = sub(r'[^0-9/-:]', '_', clean_val)
if vtype not in [int, str]:
raise ValueError("Value has an incorrect type %s" % str(vtype))
else:
# cast back to orginal type
san_typed_val = vtype(san_val)
return san_typed_val
class CountryConverter: class CountryConverter:
def __init__(self,dbname,dbtable,dbcolumnID,dbcolumnName): def __init__(self,dbname,dbtable,dbcolumnID,dbcolumnName):
......
...@@ -124,6 +124,18 @@ read_config() ...@@ -124,6 +124,18 @@ read_config()
# ============================ other tools ===================================== # ============================ other tools =====================================
from urllib.parse import urlparse, urljoin, unquote
def is_safe_url(target, host_url):
"""
Checks if url is ok for redirects
cf. http://flask.pocoo.org/snippets/62/
"""
ref_url = urlparse(host_url)
test_url = urlparse(urljoin(host_url, target))
return (test_url.scheme in ('http', 'https')
and ref_url.netloc == test_url.netloc)
def re_hash(userinput, salt="verylonverylongverylonverylongverylonverylong"): def re_hash(userinput, salt="verylonverylongverylonverylongverylonverylong"):
""" """
Build the captcha's verification hash server side Build the captcha's verification hash server side
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment