Commit b5eb0323 authored by delanoe's avatar delanoe

[FIX] Bug dates for WOS detected and fixed.

parent f6c9e87c
......@@ -11,7 +11,7 @@ class RISParser(Parser):
# #super(Parser, self).__init__()
# #super(Parser, self).__init__()
# self._languages_cache = LanguagesCache() if language_cache is None else language_cache
_begin = 6
_parameters = {
......@@ -29,13 +29,13 @@ class RISParser(Parser):
}
def parse(self, file):
hyperdata = {}
last_key = None
last_values = []
# browse every line of the file
for line in file:
if len(line) > 2:
if len(line) > 2 :
# extract the parameter key
parameter_key = line[:2]
if parameter_key != b' ' and parameter_key != last_key:
......@@ -60,11 +60,4 @@ class RISParser(Parser):
print(error)
# if a hyperdata object is left in memory, yield it as well
if hyperdata:
# try:
# if hyperdata['date_to_parse']:
# print(hyperdata['date_to_parse'])
# except:
# pass
#
#print(hyperdata['title'])
yield hyperdata
......@@ -3,6 +3,7 @@ import dateutil.parser
import zipfile
import re
import dateparser as date_parser
from gargantext.util.languages import languages
......@@ -50,7 +51,7 @@ class Parser:
default=DEFAULT_DATE
).strftime("%Y-%m-%d %H:%M:%S")
except Exception as error:
print(error, 'Parser Zotero, Date not parsed for:', date_string)
print(error, 'Date not parsed for:', date_string)
hyperdata['publication_date'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
......@@ -75,8 +76,21 @@ class Parser:
date_string += ":" + hyperdata[key]
try:
hyperdata[prefix + "_date"] = dateutil.parser.parse(date_string).strftime("%Y-%m-%d %H:%M:%S")
except:
pass
except Exception as error:
try:
print(error, date_string)
# Date format: 1994 NOV-DEC
hyperdata[prefix + "_date"] = date_parser.parse(str(date_string)[:8]).strftime("%Y-%m-%d %H:%M:%S")
except Exception as error:
try:
print(error)
# FIXME Date format: 1994 SPR
# By default, we take the year only
hyperdata[prefix + "_date"] = date_parser.parse(str(date_string)[:4]).strftime("%Y-%m-%d %H:%M:%S")
except Exception as error:
print(error)
else:
print("WARNING: Date unknown at _Parser level, using now()")
hyperdata['publication_date'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment