Commit 5118824d authored by Romain Loth's avatar Romain Loth

risparser: fix handling of continuation lines

parent a4a744d7
......@@ -47,49 +47,66 @@ class RISParser(Parser):
for line in file:
# bytes ~~> str
line = line.decode("UTF-8").rstrip('\r\n')
# print("RIS line:", line)
if len(line) >= 2 :
# extract the parameter key...
parameter_key = line[:2]
# ...and keep the rest for when we know what to do with it
current_value = line[self._begin:]
# it's a new key => therefore the previous key is finished
if parameter_key != last_key:
if last_key in self._parameters:
# translate key
parameter = self._parameters[last_key]
# 1 - we record the previous value array...
if parameter["type"] == "hyperdata":
separator = parameter["separator"] if "separator" in parameter else ""
final_value = separator.join(last_values)
if last_key != 'PY':
hyperdata[parameter["key"]] = final_value
else:
hyperdata = PY_values_decompose_and_save(final_value, hyperdata)
#... or even finish the record (rare here, most often after empty line)
elif parameter["type"] == "delimiter":
if 'language_fullname' not in hyperdata.keys():
if 'language_iso3' not in hyperdata.keys():
if 'language_iso2' not in hyperdata.keys():
hyperdata['language_iso2'] = 'en'
yield hyperdata
last_key = None
hyperdata = {}
# 2 - new key: also we start a new value array and move on to the next key
last_values = []
last_key = parameter_key
# 3 - new key or old: in any case we pass contents to
# print("(nonemptyline)")
# test if key line (otherwise: continuation line)
if match(r'[A-Z][A-Z0-9]\s', line):
parameter_key = line[:2]
# print("(matchparamline:"+parameter_key+")")
# we can now be sure that the value is rest of the line
# (keep it for when we know what to do with it)
current_value = line[self._begin:]
# it's a new key => therefore the previous key is finished
if parameter_key != last_key:
if last_key in self._parameters:
# translate key
parameter = self._parameters[last_key]
# 1 - we record the previous value array...
if parameter["type"] == "hyperdata":
separator = parameter["separator"] if "separator" in parameter else ""
final_value = separator.join(last_values)
if last_key != 'PY':
hyperdata[parameter["key"]] = final_value
else:
hyperdata = PY_values_decompose_and_save(final_value, hyperdata)
# print("{saved previous"+last_key+"}")
#... or even finish the record (rare here, most often after empty line)
elif parameter["type"] == "delimiter":
if 'language_fullname' not in hyperdata.keys():
if 'language_iso3' not in hyperdata.keys():
if 'language_iso2' not in hyperdata.keys():
hyperdata['language_iso2'] = 'en'
yield hyperdata
# print("{saved previous record}")
last_key = None
hyperdata = {}
# 2 - new key: also we start a new value array and move on to the next key
last_values = []
last_key = parameter_key
# continuation line: values start from position 0
else:
current_value = line
# print("(continuationline)")
# 3 - new key or old or no key: in any case we pass contents to
# the value array buffer (=> for the next loop only)
last_values.append(current_value)
current_value = None
# empty line => we need to check if PREVIOUS LINE was record delimiter
else:
# print("(emptyline)")
if last_key in self._parameters:
if parameter["type"] == "delimiter":
if 'language_fullname' not in hyperdata.keys():
......@@ -97,6 +114,7 @@ class RISParser(Parser):
if 'language_iso2' not in hyperdata.keys():
hyperdata['language_iso2'] = 'en'
yield hyperdata
# print("{saved previous record}")
last_key = None
hyperdata = {}
# [end of loop per lines]
......@@ -111,6 +129,7 @@ class RISParser(Parser):
hyperdata[parameter["key"]] = final_value
else:
hyperdata = PY_values_decompose_and_save(final_value, hyperdata)
# print("{saved previous"+last_key+"}")
# if a hyperdata object is left in memory, yield it as well
if hyperdata:
......@@ -119,8 +138,7 @@ class RISParser(Parser):
if 'language_iso2' not in hyperdata.keys():
hyperdata['language_iso2'] = 'en'
yield hyperdata
# print("{saved previous record}")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment