Commit 7dae83bd authored by Administrator's avatar Administrator

[FIX] Date parser improved for Europresse corpus.

parent 4a641c04
......@@ -54,7 +54,7 @@ class EuropressFileParser(FileParser):
if isinstance(text, bytes):
text = text.decode(encoding)
format_date_fr = re.compile('\d+\s*\w+\s+\d{4}', re.UNICODE)
format_date_fr = re.compile('\d*\s*\w+\s+\d{4}', re.UNICODE)
test_date_fr = format_date_fr.match(text)
format_date_en = re.compile('\w+\s+\d+,\s+\d{4}', re.UNICODE)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment