Commit e2243575 authored by sim's avatar sim

Better utilities to parse and convert to datetime

parent d76f0f82
......@@ -37,7 +37,7 @@ import re
import importlib
from django.conf import settings
from gargantext.utils.lists import *
from gargantext.utils.dates import datetime, convert_to_datetime
from gargantext.utils.dates import datetime, to_datetime
# types & models (nodes, lists, hyperdata, resource) ---------------------------------------------
......@@ -110,8 +110,8 @@ INDEXED_HYPERDATA = {
'publication_date':
{ 'id' : 2
, 'type' : datetime
, 'convert_to_db' : convert_to_datetime
, 'convert_from_db': convert_to_datetime
, 'convert_to_db' : to_datetime
, 'convert_from_db': to_datetime
},
'title':
......
from datetime import MINYEAR
import re
from numbers import Number
from dateutil.parser import parse as parse_datetime_flexible
from django.utils.dateparse import parse_datetime
from django.utils.timezone import datetime as _datetime, utc as UTC, now as utcnow
__all__ = ['convert_to_datetime', 'datetime', 'MINYEAR']
__all__ = ['DEFAULT_DATETIME', 'SEASONS', 'MONTHS', 'SEASONS_ABBR',
'MONTHS_ABBR', 'datetime', 'to_datetime']
DEFAULT_DATETIME = _datetime(1900, 1, 1)
# Only valid for northern hemisphere, do something about this?
SEASONS = {
'spring': 3,
'summer': 6,
'fall': 9,
'autumn': 9,
'winter': 12,
}
MONTHS = {
'january': 1,
'february': 2,
'march': 3,
'april': 4,
'may': 5,
'june': 6,
'july': 7,
'august': 8,
'september': 9,
'october': 10,
'november': 11,
'december': 12,
}
MONTHS_ABBR = {k[:3]: v for k, v in MONTHS.items()}
SEASONS_ABBR = {k[:3]: v for k, v in SEASONS.items()}
_RE_SEASON = re.compile(r'\b(%s)\b' % '|'.join(SEASONS.keys()), re.I)
_RE_SEASON_ABBR = re.compile(r'\b(%s)\b' % '|'.join(SEASONS_ABBR.keys()), re.I)
class datetime(_datetime):
......@@ -16,22 +57,70 @@ class datetime(_datetime):
return _datetime.utcfromtimestamp(ts).replace(tzinfo=UTC)
@staticmethod
def parse(s):
dt = parse_datetime(s) or \
parse_datetime_flexible(s, default=datetime(MINYEAR, 1, 1))
return dt.astimezone(UTC) if dt.tzinfo else dt.replace(tzinfo=UTC)
def parse(s, default=DEFAULT_DATETIME):
# Replace any season name by its first month two-digits form
# eg. replace "April" by "04", "MAR" by "03" etc.
seasons = [(_RE_SEASON, SEASONS), (_RE_SEASON_ABBR, SEASONS_ABBR)]
for r, table in seasons:
s = r.sub(lambda m: '%0.2d' % table[m.group(1).lower()], s, count=1)
try:
# Try to parse first with Django parser utility, then with dateutil
dt = parse_datetime(s) or \
parse_datetime_flexible(s, default=default)
return dt.astimezone(UTC) if dt.tzinfo else dt.replace(tzinfo=UTC)
def convert_to_datetime(dt):
if isinstance(dt, (int, float)):
except ValueError:
# Second chance...
# For date intervals try taking first part only (eg. 2018-04 for
# "2018 Apr-May")
if '-' in s:
return datetime.parse(s.split('-', 1)[0])
# Otherwise, just fail
raise
def date_component_to_int(s):
if isinstance(s, Number):
return int(s)
s = s.strip()
if s.isnumeric():
return int(s)
s = s.lower()
for table in [MONTHS, MONTHS_ABBR, SEASONS, SEASONS_ABBR]:
if s in table:
return table[s]
def to_datetime(dt, default=DEFAULT_DATETIME):
if isinstance(dt, Number):
return datetime.utcfromtimestamp(dt)
elif isinstance(dt, str):
return datetime.parse(dt)
return datetime.parse(dt, default=default)
elif isinstance(dt, _datetime):
args = (dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)
return datetime(*args, tzinfo=dt.tzinfo or UTC).astimezone(UTC)
else:
raise ValueError("Can't convert to datetime: %r" % dt)
elif isinstance(dt, (list, tuple)):
# Try to convert each component to a number
args = tuple(date_component_to_int(x) for x in dt)
if not any(component is None for component in args):
# A datetime needs at least three components: year, month and day
args = args + ((1,) * (3-len(dt))) if len(dt) < 3 else args
return datetime(*args, tzinfo=UTC).astimezone(UTC)
# Otherwise try to parse first item
if len(dt) > 0 and isinstance(dt[0], str):
return datetime.parse(dt[0], default=default)
raise ValueError("Can't convert to datetime: %r" % dt)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment