Commit 1263684b authored by Romain Loth's avatar Romain Loth

ris parser: finish code clean up

parent e05a8281
from ._Parser import Parser from ._Parser import Parser
from gargantext.util.languages import languages from gargantext.util.languages import languages
#from admin.utils import PrintException
class RISParser(Parser): class RISParser(Parser):
# def __init__(self, language_cache=None):
#
# #super(Parser, self).__init__()
# #super(Parser, self).__init__()
# self._languages_cache = LanguagesCache() if language_cache is None else language_cache
_begin = 6 _begin = 6
_parameters = { _parameters = {
"ER": {"type": "delimiter"}, # the record delimiter "ER": {"type": "delimiter"}, # the record delimiter
...@@ -33,6 +23,7 @@ class RISParser(Parser): ...@@ -33,6 +23,7 @@ class RISParser(Parser):
hyperdata = {} hyperdata = {}
last_key = None last_key = None
last_values = [] last_values = []
current_value = None
for line in file: for line in file:
# bytes ~~> str # bytes ~~> str
...@@ -69,15 +60,13 @@ class RISParser(Parser): ...@@ -69,15 +60,13 @@ class RISParser(Parser):
last_values = [] last_values = []
last_key = parameter_key last_key = parameter_key
# 3 - new key or old: in any case we feed the value array "buffer" # 3 - new key or old: in any case we pass contents to
try: # the value array buffer (=> for the next loop only)
last_values.append(current_value) last_values.append(current_value)
except Exception as error: current_value = None
print(error)
# empty line => we still need to check if PREVIOUS LINE was record delimiter # empty line => we need to check if PREVIOUS LINE was record delimiter
else: else:
# print("\n\n\nEMPTY LINE, with last_key", last_key)
if last_key in self._parameters: if last_key in self._parameters:
if parameter["type"] == "delimiter": if parameter["type"] == "delimiter":
if 'language_fullname' not in hyperdata.keys(): if 'language_fullname' not in hyperdata.keys():
...@@ -87,9 +76,15 @@ class RISParser(Parser): ...@@ -87,9 +76,15 @@ class RISParser(Parser):
yield hyperdata yield hyperdata
last_key = None last_key = None
hyperdata = {} hyperdata = {}
# [end of loop per lines] # [end of loop per lines]
# if we have any values left on previous line => put them in hd
if last_key in self._parameters:
parameter = self._parameters[last_key]
if parameter["type"] == "hyperdata":
separator = parameter["separator"] if "separator" in parameter else ""
hyperdata[parameter["key"]] = separator.join(last_values)
# if a hyperdata object is left in memory, yield it as well # if a hyperdata object is left in memory, yield it as well
if hyperdata: if hyperdata:
if 'language_fullname' not in hyperdata.keys(): if 'language_fullname' not in hyperdata.keys():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment