Commit 91e2c2b8 authored by delanoe's avatar delanoe

[FIX] Bug if files empty + collectiv email address for Gargantext work.

parent 774060ae
......@@ -6,7 +6,7 @@
__author__ = "Gargantext Team"
__copyright__ = "Copyright 2014-16 ISCPIF-CNRS"
__version__ = "0.2"
__email__ = "romain.loth@iscpif.fr"
__email__ = "team@gargantext.org"
__status__ = "Test"
import re
......@@ -63,13 +63,13 @@ class EuropresseParser(Parser):
ValueError('Error while decoding from "latin1" to "%s"' % encoding)
try:
html_parser = etree.HTMLParser(encoding=codif)
html = etree.fromstring(contents, html_parser)
html_parser = html5parser.etree.HTMLParser(encoding=codif)
html = html5parser.etree.fromstring(contents, html_parser)
html_articles = html.xpath('//article')
except Exception as error:
html_articles = None
print ("Europresse lxml error:", error)
# all except detail_header are mandatory to parse the article
......@@ -113,6 +113,7 @@ class EuropresseParser(Parser):
# parse all the articles, one by one
if html_articles is not None:
for html_article in html_articles:
try:
# s'il n'y a pas du tout de header on doit skip
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment