Commit 2b482bec authored by Administrator's avatar Administrator

[FEAT] Adding Zotero date parser.

parent 407b96ab
import collections import collections
import datetime
import dateutil.parser import dateutil.parser
import zipfile import zipfile
import chardet import chardet
import re
from ..Caches import LanguagesCache from ..Caches import LanguagesCache
DEFAULT_DATE = datetime.datetime(datetime.MINYEAR, 1, 1)
class FileParser: class FileParser:
"""Base class for performing files parsing depending on their type. """Base class for performing files parsing depending on their type.
""" """
...@@ -29,34 +34,43 @@ class FileParser: ...@@ -29,34 +34,43 @@ class FileParser:
""" """
# First, check the split dates... # First, check the split dates...
prefixes = [key[:-5] for key in hyperdata.keys() if key[-5:] == "_year"] date_to_parse = hyperdata.get('publication_date_to_parse', None)
for prefix in prefixes: if date_to_parse is not None:
date_string = hyperdata[prefix + "_year"] date_string = re.sub('\/+', '', date_to_parse)
key = prefix + "_month" hyperdata['publication' + "_date"] = dateutil.parser.parse(
if key in hyperdata: date_string,
date_string += " " + hyperdata[key] default=DEFAULT_DATE
key = prefix + "_day" ).strftime("%Y-%m-%d %H:%M:%S")
else:
prefixes = [key[:-5] for key in hyperdata.keys() if key[-5:] == "_year"]
for prefix in prefixes:
date_string = hyperdata[prefix + "_year"]
key = prefix + "_month"
if key in hyperdata: if key in hyperdata:
date_string += " " + hyperdata[key] date_string += " " + hyperdata[key]
key = prefix + "_hour" key = prefix + "_day"
if key in hyperdata: if key in hyperdata:
date_string += " " + hyperdata[key] date_string += " " + hyperdata[key]
key = prefix + "_minute" key = prefix + "_hour"
if key in hyperdata: if key in hyperdata:
date_string += ":" + hyperdata[key] date_string += " " + hyperdata[key]
key = prefix + "_second" key = prefix + "_minute"
if key in hyperdata: if key in hyperdata:
date_string += ":" + hyperdata[key] date_string += ":" + hyperdata[key]
try: key = prefix + "_second"
hyperdata[prefix + "_date"] = dateutil.parser.parse(date_string).strftime("%Y-%m-%d %H:%M:%S") if key in hyperdata:
except: date_string += ":" + hyperdata[key]
pass try:
hyperdata[prefix + "_date"] = dateutil.parser.parse(date_string).strftime("%Y-%m-%d %H:%M:%S")
except:
pass
# ...then parse all the "date" fields, to parse it into separate elements # ...then parse all the "date" fields, to parse it into separate elements
prefixes = [key[:-5] for key in hyperdata.keys() if key[-5:] == "_date"] prefixes = [key[:-5] for key in hyperdata.keys() if key[-5:] == "_date"]
for prefix in prefixes: for prefix in prefixes:
date = dateutil.parser.parse(hyperdata[prefix + "_date"]) date = dateutil.parser.parse(hyperdata[prefix + "_date"])
print('date') #print(date)
hyperdata[prefix + "_year"] = date.strftime("%Y") hyperdata[prefix + "_year"] = date.strftime("%Y")
hyperdata[prefix + "_month"] = date.strftime("%m") hyperdata[prefix + "_month"] = date.strftime("%m")
......
...@@ -13,7 +13,7 @@ class ZoteroFileParser(RisFileParser): ...@@ -13,7 +13,7 @@ class ZoteroFileParser(RisFileParser):
b"TI": {"type": "hyperdata", "key": "title", "separator": " "}, b"TI": {"type": "hyperdata", "key": "title", "separator": " "},
b"AU": {"type": "hyperdata", "key": "authors", "separator": ", "}, b"AU": {"type": "hyperdata", "key": "authors", "separator": ", "},
b"UR": {"type": "hyperdata", "key": "doi"}, b"UR": {"type": "hyperdata", "key": "doi"},
b"DA": {"type": "hyperdata", "key": "publication_date"}, b"DA": {"type": "hyperdata", "key": "publication_date_to_parse"},
b"PY": {"type": "hyperdata", "key": "publication_year"}, b"PY": {"type": "hyperdata", "key": "publication_year"},
b"PD": {"type": "hyperdata", "key": "publication_month"}, b"PD": {"type": "hyperdata", "key": "publication_month"},
b"LA": {"type": "hyperdata", "key": "language_iso2"}, b"LA": {"type": "hyperdata", "key": "language_iso2"},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment