Commit 6d6d0baf authored by sim's avatar sim

Little refactor of date parsing

parent 282cc8f2
......@@ -2,6 +2,7 @@ import os
from gargantext.settings import MEDIA_ROOT
from datetime import MINYEAR
from dateutil.parser import parse as parse_datetime_flexible
from django.utils.dateparse import parse_datetime
from django.utils.timezone import datetime as _datetime, utc as UTC, now as utcnow
......@@ -19,7 +20,8 @@ class datetime(_datetime):
@staticmethod
def parse(s):
dt = parse_datetime(s)
dt = parse_datetime(s) or \
parse_datetime_flexible(s, default=datetime(MINYEAR, 1, 1))
return dt.astimezone(UTC) if dt.tzinfo else dt.replace(tzinfo=UTC)
......
......@@ -3,10 +3,7 @@ import zipfile
import re
import dateparser as date_parser
from gargantext.util.languages import languages
from gargantext.util import datetime, convert_to_datetime, MINYEAR
DEFAULT_DATE = datetime(MINYEAR, 1, 1)
from gargantext.util import datetime, convert_to_datetime
class Parser:
......@@ -47,10 +44,7 @@ class Parser:
if date_string is not None:
date_string = re.sub(r'\/\/+(\w*|\d*)', '', date_string)
try:
hyperdata['publication_date'] = dateutil.parser.parse(
date_string,
default=DEFAULT_DATE
)
hyperdata['publication_date'] = datetime.parse(date_string)
except Exception as error:
print(error, 'Date not parsed for:', date_string)
hyperdata['publication_date'] = datetime.now()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment