Commit 5118824d authored by Romain Loth's avatar Romain Loth

risparser: fix handling of continuation lines

parent a4a744d7
...@@ -47,49 +47,66 @@ class RISParser(Parser): ...@@ -47,49 +47,66 @@ class RISParser(Parser):
for line in file: for line in file:
# bytes ~~> str # bytes ~~> str
line = line.decode("UTF-8").rstrip('\r\n') line = line.decode("UTF-8").rstrip('\r\n')
# print("RIS line:", line)
if len(line) >= 2 : if len(line) >= 2 :
# extract the parameter key... # print("(nonemptyline)")
parameter_key = line[:2]
# test if key line (otherwise: continuation line)
# ...and keep the rest for when we know what to do with it if match(r'[A-Z][A-Z0-9]\s', line):
current_value = line[self._begin:] parameter_key = line[:2]
# print("(matchparamline:"+parameter_key+")")
# it's a new key => therefore the previous key is finished
if parameter_key != last_key: # we can now be sure that the value is rest of the line
# (keep it for when we know what to do with it)
if last_key in self._parameters: current_value = line[self._begin:]
# translate key
parameter = self._parameters[last_key] # it's a new key => therefore the previous key is finished
# 1 - we record the previous value array... if parameter_key != last_key:
if parameter["type"] == "hyperdata":
separator = parameter["separator"] if "separator" in parameter else "" if last_key in self._parameters:
final_value = separator.join(last_values) # translate key
if last_key != 'PY': parameter = self._parameters[last_key]
hyperdata[parameter["key"]] = final_value # 1 - we record the previous value array...
else: if parameter["type"] == "hyperdata":
hyperdata = PY_values_decompose_and_save(final_value, hyperdata) separator = parameter["separator"] if "separator" in parameter else ""
final_value = separator.join(last_values)
#... or even finish the record (rare here, most often after empty line) if last_key != 'PY':
elif parameter["type"] == "delimiter": hyperdata[parameter["key"]] = final_value
if 'language_fullname' not in hyperdata.keys(): else:
if 'language_iso3' not in hyperdata.keys(): hyperdata = PY_values_decompose_and_save(final_value, hyperdata)
if 'language_iso2' not in hyperdata.keys(): # print("{saved previous"+last_key+"}")
hyperdata['language_iso2'] = 'en'
yield hyperdata #... or even finish the record (rare here, most often after empty line)
last_key = None elif parameter["type"] == "delimiter":
hyperdata = {} if 'language_fullname' not in hyperdata.keys():
if 'language_iso3' not in hyperdata.keys():
# 2 - new key: also we start a new value array and move on to the next key if 'language_iso2' not in hyperdata.keys():
last_values = [] hyperdata['language_iso2'] = 'en'
last_key = parameter_key yield hyperdata
# print("{saved previous record}")
# 3 - new key or old: in any case we pass contents to last_key = None
hyperdata = {}
# 2 - new key: also we start a new value array and move on to the next key
last_values = []
last_key = parameter_key
# continuation line: values start from position 0
else:
current_value = line
# print("(continuationline)")
# 3 - new key or old or no key: in any case we pass contents to
# the value array buffer (=> for the next loop only) # the value array buffer (=> for the next loop only)
last_values.append(current_value) last_values.append(current_value)
current_value = None current_value = None
# empty line => we need to check if PREVIOUS LINE was record delimiter # empty line => we need to check if PREVIOUS LINE was record delimiter
else: else:
# print("(emptyline)")
if last_key in self._parameters: if last_key in self._parameters:
if parameter["type"] == "delimiter": if parameter["type"] == "delimiter":
if 'language_fullname' not in hyperdata.keys(): if 'language_fullname' not in hyperdata.keys():
...@@ -97,6 +114,7 @@ class RISParser(Parser): ...@@ -97,6 +114,7 @@ class RISParser(Parser):
if 'language_iso2' not in hyperdata.keys(): if 'language_iso2' not in hyperdata.keys():
hyperdata['language_iso2'] = 'en' hyperdata['language_iso2'] = 'en'
yield hyperdata yield hyperdata
# print("{saved previous record}")
last_key = None last_key = None
hyperdata = {} hyperdata = {}
# [end of loop per lines] # [end of loop per lines]
...@@ -111,6 +129,7 @@ class RISParser(Parser): ...@@ -111,6 +129,7 @@ class RISParser(Parser):
hyperdata[parameter["key"]] = final_value hyperdata[parameter["key"]] = final_value
else: else:
hyperdata = PY_values_decompose_and_save(final_value, hyperdata) hyperdata = PY_values_decompose_and_save(final_value, hyperdata)
# print("{saved previous"+last_key+"}")
# if a hyperdata object is left in memory, yield it as well # if a hyperdata object is left in memory, yield it as well
if hyperdata: if hyperdata:
...@@ -119,8 +138,7 @@ class RISParser(Parser): ...@@ -119,8 +138,7 @@ class RISParser(Parser):
if 'language_iso2' not in hyperdata.keys(): if 'language_iso2' not in hyperdata.keys():
hyperdata['language_iso2'] = 'en' hyperdata['language_iso2'] = 'en'
yield hyperdata yield hyperdata
# print("{saved previous record}")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment