Commit 9604aece authored by Romain Loth's avatar Romain Loth

specific sanitization for str types representing urls and dates

parent 39033a12
...@@ -98,7 +98,7 @@ SOURCE_FIELDS = [ ...@@ -98,7 +98,7 @@ SOURCE_FIELDS = [
("hon_title", True, None), ("hon_title", True, None),
("interests_text", True, None), ("interests_text", True, None),
("gender", False, None), # M|F ("gender", False, None), # M|F
("job_looking_date", True, None), # def null: not looking for a job ("job_looking_date", True, "date"), # def null: not looking for a job
("home_url", True, "url"), # scholar's homepage ("home_url", True, "url"), # scholar's homepage
("pic_url", True, "url"), ("pic_url", True, "url"),
("pic_file", False, None), # saved separately ("pic_file", False, None), # saved separately
...@@ -114,7 +114,7 @@ SOURCE_FIELDS = [ ...@@ -114,7 +114,7 @@ SOURCE_FIELDS = [
("keywords", True, None), ("keywords", True, None),
# => for *keywords* table (after split str) # => for *keywords* table (after split str)
("hashtags", True ) ("hashtags", True, None)
# => for *hashtags* table (after split str) # => for *hashtags* table (after split str)
] ]
...@@ -733,10 +733,7 @@ def read_record_from_request(request): ...@@ -733,10 +733,7 @@ def read_record_from_request(request):
spec_type = field_info[2] spec_type = field_info[2]
if field in request.form: if field in request.form:
if do_sanitize: if do_sanitize:
val = sanitize( val = sanitize(request.form[field], spec_type)
request.form[field],
is_url = (spec_type == "url")
)
if val != '': if val != '':
clean_records[field] = val clean_records[field] = val
else: else:
...@@ -777,22 +774,30 @@ def read_record_from_request(request): ...@@ -777,22 +774,30 @@ def read_record_from_request(request):
# TODO move to text submodules # TODO move to text submodules
def sanitize(value, is_url=False): def sanitize(value, specific_type=None):
""" """
simple and radical: leaves only alphanum and '@' '.' '-' ':' ',' '(', ')', '#', ' ' simple and radical: leaves only alphanum and '@' '.' '-' ':' ',' '(', ')', '#', ' '
One of the main goals is to remove ';' One of the main goals is to remove ';'
POSS better POSS better
args:
@value: any string to santize
@specific_type: None or 'url' or 'date'
""" """
vtype = type(value) vtype = type(value)
str_val = str(value) str_val = str(value)
clean_val = sub(r'^\s+', '', str_val) clean_val = sub(r'^\s+', '', str_val)
clean_val = sub(r'\s+$', '', clean_val) clean_val = sub(r'\s+$', '', clean_val)
if is_url: if not specific_type:
san_val = sub(r'[^\w@\.: -/]', '_', clean_val)
else:
san_val = sub(r'[^\w@\.:,()# -]', '_', clean_val) san_val = sub(r'[^\w@\.:,()# -]', '_', clean_val)
elif specific_type == "url":
san_val = sub(r'[^\w@\.: -/]', '_', clean_val)
elif specific_type == "date":
san_val = sub(r'[^0-9/-:]', '_', clean_val)
if vtype not in [int, str]: if vtype not in [int, str]:
raise ValueError("Value has an incorrect type %s" % str(vtype)) raise ValueError("Value has an incorrect type %s" % str(vtype))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment