Commit 9f535e17 authored by Romain Loth's avatar Romain Loth

fix RIS parser encoding problem

parent 401ab95a
...@@ -15,18 +15,18 @@ class RISParser(Parser): ...@@ -15,18 +15,18 @@ class RISParser(Parser):
_begin = 6 _begin = 6
_parameters = { _parameters = {
b"ER": {"type": "delimiter"}, "ER": {"type": "delimiter"},
b"TI": {"type": "hyperdata", "key": "title", "separator": " "}, "TI": {"type": "hyperdata", "key": "title", "separator": " "},
b"ST": {"type": "hyperdata", "key": "subtitle", "separator": " "}, "ST": {"type": "hyperdata", "key": "subtitle", "separator": " "},
b"AU": {"type": "hyperdata", "key": "authors", "separator": "\n"}, "AU": {"type": "hyperdata", "key": "authors", "separator": "\n"},
b"T2": {"type": "hyperdata", "key": "journal"}, "T2": {"type": "hyperdata", "key": "journal"},
b"UR": {"type": "hyperdata", "key": "doi"}, "UR": {"type": "hyperdata", "key": "doi"},
b"PY": {"type": "hyperdata", "key": "publication_year"}, "PY": {"type": "hyperdata", "key": "publication_year"},
b"PD": {"type": "hyperdata", "key": "publication_month"}, "PD": {"type": "hyperdata", "key": "publication_month"},
b"N1": {"type": "hyperdata", "key": "references", "separator": ", "}, "N1": {"type": "hyperdata", "key": "references", "separator": ", "},
b"LA": {"type": "hyperdata", "key": "language_iso2"}, "LA": {"type": "hyperdata", "key": "language_iso2"},
b"AB": {"type": "hyperdata", "key": "abstract", "separator": " "}, "A": {"type": "hyperdata", "key": "abstract", "separator": " "},
b"WC": {"type": "hyperdata", "key": "fields"}, "WC": {"type": "hyperdata", "key": "fields"},
} }
def parse(self, file): def parse(self, file):
...@@ -34,13 +34,13 @@ class RISParser(Parser): ...@@ -34,13 +34,13 @@ class RISParser(Parser):
hyperdata = {} hyperdata = {}
last_key = None last_key = None
last_values = [] last_values = []
# browse every line of the file
for line in file: for line in file:
# bytes ~~> str
line = line.decode("UTF-8").rstrip('\r\n')
if len(line) > 2 : if len(line) > 2 :
# extract the parameter key # extract the parameter key
parameter_key = line[:2] parameter_key = line[:2]
if parameter_key != b' ' and parameter_key != last_key: if parameter_key != ' ' and parameter_key != last_key:
if last_key in self._parameters: if last_key in self._parameters:
# translate the parameter key # translate the parameter key
parameter = self._parameters[last_key] parameter = self._parameters[last_key]
...@@ -57,7 +57,7 @@ class RISParser(Parser): ...@@ -57,7 +57,7 @@ class RISParser(Parser):
last_key = parameter_key last_key = parameter_key
last_values = [] last_values = []
try: try:
last_values.append(line[self._begin:-1].decode()) last_values.append(line[self._begin:])
except Exception as error: except Exception as error:
print(error) print(error)
# if a hyperdata object is left in memory, yield it as well # if a hyperdata object is left in memory, yield it as well
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment