Commit 0426d57e authored by Mathieu Rodic's avatar Mathieu Rodic

[BUGFIX] Last metadata of RIS and ISI files are now appended to the corpus

https://forge.iscpif.fr/issues/1368
parent 38a39f84
...@@ -20,29 +20,31 @@ class RisFileParser(FileParser): ...@@ -20,29 +20,31 @@ class RisFileParser(FileParser):
metadata = {} metadata = {}
last_key = None last_key = None
last_values = [] last_values = []
# browse every line of the file
for line in file: for line in file:
if len(line) > 2: if len(line) > 2:
# extract the parameter key
parameter_key = line[:2] parameter_key = line[:2]
if parameter_key != b' ' and parameter_key != last_key: if parameter_key != b' ' and parameter_key != last_key:
if last_key in self._parameters: if last_key in self._parameters:
# translate the parameter key
parameter = self._parameters[last_key] parameter = self._parameters[last_key]
if parameter["type"] == "metadata": if parameter["type"] == "metadata":
separator = parameter["separator"] if "separator" in parameter else "" separator = parameter["separator"] if "separator" in parameter else ""
metadata[parameter["key"]] = separator.join(last_values) metadata[parameter["key"]] = separator.join(last_values)
elif parameter["type"] == "delimiter": elif parameter["type"] == "delimiter":
try: if 'language_fullname' not in metadata.keys():
if 'language_fullname' not in metadata.keys(): if 'language_iso3' not in metadata.keys():
if 'language_iso3' not in metadata.keys(): if 'language_iso2' not in metadata.keys():
if 'language_iso2' not in metadata.keys(): metadata['language_iso2'] = 'en'
metadata['language_iso2'] = 'en' yield metadata
yield metadata metadata = {}
metadata = {}
except:
pass
last_key = parameter_key last_key = parameter_key
last_values = [] last_values = []
try: try:
last_values.append(line[3:-1].decode()) last_values.append(line[3:-1].decode())
except Exception as error: except Exception as error:
print(error) print(error)
pass # if a metadata object is left in memory, yield it as well
if metadata:
yield metadata
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment