From 9604aecef543b08467db891e586af1de7f6dfd4b Mon Sep 17 00:00:00 2001 From: rloth <romain.loth@iscpif.fr> Date: Tue, 21 Feb 2017 17:36:44 +0100 Subject: [PATCH] specific sanitization for str types representing urls and dates --- services/main.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/services/main.py b/services/main.py index a9cec3f..858755a 100755 --- a/services/main.py +++ b/services/main.py @@ -98,7 +98,7 @@ SOURCE_FIELDS = [ ("hon_title", True, None), ("interests_text", True, None), ("gender", False, None), # M|F - ("job_looking_date", True, None), # def null: not looking for a job + ("job_looking_date", True, "date"), # def null: not looking for a job ("home_url", True, "url"), # scholar's homepage ("pic_url", True, "url"), ("pic_file", False, None), # saved separately @@ -114,7 +114,7 @@ SOURCE_FIELDS = [ ("keywords", True, None), # => for *keywords* table (after split str) - ("hashtags", True ) + ("hashtags", True, None) # => for *hashtags* table (after split str) ] @@ -733,10 +733,7 @@ def read_record_from_request(request): spec_type = field_info[2] if field in request.form: if do_sanitize: - val = sanitize( - request.form[field], - is_url = (spec_type == "url") - ) + val = sanitize(request.form[field], spec_type) if val != '': clean_records[field] = val else: @@ -777,22 +774,30 @@ def read_record_from_request(request): # TODO move to text submodules -def sanitize(value, is_url=False): +def sanitize(value, specific_type=None): """ simple and radical: leaves only alphanum and '@' '.' '-' ':' ',' '(', ')', '#', ' ' One of the main goals is to remove ';' POSS better + + + args: + @value: any string to santize + + @specific_type: None or 'url' or 'date' """ vtype = type(value) str_val = str(value) clean_val = sub(r'^\s+', '', str_val) clean_val = sub(r'\s+$', '', clean_val) - if is_url: - san_val = sub(r'[^\w@\.: -/]', '_', clean_val) - else: + if not specific_type: san_val = sub(r'[^\w@\.:,()# -]', '_', clean_val) + elif specific_type == "url": + san_val = sub(r'[^\w@\.: -/]', '_', clean_val) + elif specific_type == "date": + san_val = sub(r'[^0-9/-:]', '_', clean_val) if vtype not in [int, str]: raise ValueError("Value has an incorrect type %s" % str(vtype)) -- 2.21.0