Commit 6d6d0baf authored by sim's avatar sim

Little refactor of date parsing

parent 282cc8f2
...@@ -2,6 +2,7 @@ import os ...@@ -2,6 +2,7 @@ import os
from gargantext.settings import MEDIA_ROOT from gargantext.settings import MEDIA_ROOT
from datetime import MINYEAR from datetime import MINYEAR
from dateutil.parser import parse as parse_datetime_flexible
from django.utils.dateparse import parse_datetime from django.utils.dateparse import parse_datetime
from django.utils.timezone import datetime as _datetime, utc as UTC, now as utcnow from django.utils.timezone import datetime as _datetime, utc as UTC, now as utcnow
...@@ -19,7 +20,8 @@ class datetime(_datetime): ...@@ -19,7 +20,8 @@ class datetime(_datetime):
@staticmethod @staticmethod
def parse(s): def parse(s):
dt = parse_datetime(s) dt = parse_datetime(s) or \
parse_datetime_flexible(s, default=datetime(MINYEAR, 1, 1))
return dt.astimezone(UTC) if dt.tzinfo else dt.replace(tzinfo=UTC) return dt.astimezone(UTC) if dt.tzinfo else dt.replace(tzinfo=UTC)
......
...@@ -3,10 +3,7 @@ import zipfile ...@@ -3,10 +3,7 @@ import zipfile
import re import re
import dateparser as date_parser import dateparser as date_parser
from gargantext.util.languages import languages from gargantext.util.languages import languages
from gargantext.util import datetime, convert_to_datetime, MINYEAR from gargantext.util import datetime, convert_to_datetime
DEFAULT_DATE = datetime(MINYEAR, 1, 1)
class Parser: class Parser:
...@@ -47,10 +44,7 @@ class Parser: ...@@ -47,10 +44,7 @@ class Parser:
if date_string is not None: if date_string is not None:
date_string = re.sub(r'\/\/+(\w*|\d*)', '', date_string) date_string = re.sub(r'\/\/+(\w*|\d*)', '', date_string)
try: try:
hyperdata['publication_date'] = dateutil.parser.parse( hyperdata['publication_date'] = datetime.parse(date_string)
date_string,
default=DEFAULT_DATE
)
except Exception as error: except Exception as error:
print(error, 'Date not parsed for:', date_string) print(error, 'Date not parsed for:', date_string)
hyperdata['publication_date'] = datetime.now() hyperdata['publication_date'] = datetime.now()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment