Commit 2fcbf674 authored by sim's avatar sim

Better date parsing

parent 249469c3
...@@ -45,6 +45,7 @@ SEASONS_ABBR = {k[:3]: v for k, v in SEASONS.items()} ...@@ -45,6 +45,7 @@ SEASONS_ABBR = {k[:3]: v for k, v in SEASONS.items()}
_RE_SEASON = re.compile(r'\b(%s)\b' % '|'.join(SEASONS.keys()), re.I) _RE_SEASON = re.compile(r'\b(%s)\b' % '|'.join(SEASONS.keys()), re.I)
_RE_SEASON_ABBR = re.compile(r'\b(%s)\b' % '|'.join(SEASONS_ABBR.keys()), re.I) _RE_SEASON_ABBR = re.compile(r'\b(%s)\b' % '|'.join(SEASONS_ABBR.keys()), re.I)
_RE_SLASHES = re.compile(r'//+')
class datetime(_datetime): class datetime(_datetime):
...@@ -64,6 +65,12 @@ class datetime(_datetime): ...@@ -64,6 +65,12 @@ class datetime(_datetime):
for r, table in seasons: for r, table in seasons:
s = r.sub(lambda m: '%0.2d' % table[m.group(1).lower()], s, count=1) s = r.sub(lambda m: '%0.2d' % table[m.group(1).lower()], s, count=1)
# To parse partial RIS dates (eg. 2018/4//)
s = _RE_SLASHES.sub('/', s).strip(' /')
if not s:
return default
try: try:
# Try to parse first with Django parser utility, then with dateutil # Try to parse first with Django parser utility, then with dateutil
dt = parse_datetime(s) or \ dt = parse_datetime(s) or \
...@@ -74,10 +81,13 @@ class datetime(_datetime): ...@@ -74,10 +81,13 @@ class datetime(_datetime):
except ValueError: except ValueError:
# Second chance... # Second chance...
# For date intervals try taking first part only (eg. 2018-04 for # This may be a date interval, try taking first part only
# "2018 Apr-May") # eg. 2018-04 for "2018 Apr-May"
if '-' in s: for sep in ('-', '/', None):
return datetime.parse(s.split('-', 1)[0]) if sep in s or not sep:
break
if sep:
return datetime.parse(s.split(sep, 1)[0])
# Otherwise, just fail # Otherwise, just fail
raise raise
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment