[FIX] Bug if files empty + collectiv email address for Gargantext work.

91e2c2b8 · delanoe · 774060ae · 91e2c2b8
Commit 91e2c2b8 authored Sep 13, 2016 by delanoe
Show whitespace changes
Inline Side-by-side

Showing with 150 additions and 149 deletions

EUROPRESSE.py gargantext/util/parsers/EUROPRESSE.py +150 -149

No files found.
--- a/gargantext/util/parsers/EUROPRESSE.py
+++ b/gargantext/util/parsers/EUROPRESSE.py
@@ -6,7 +6,7 @@
 __author__    = "Gargantext Team"
 __copyright__ = "Copyright 2014-16 ISCPIF-CNRS"
 __version__   = "0.2"
-__email__     = "romain.loth@iscpif.fr"
+__email__     = "team@gargantext.org"
 __status__    = "Test"
 import re
@@ -63,13 +63,13 @@ class EuropresseParser(Parser):
                ValueError('Error while decoding from "latin1" to "%s"' % encoding)
        try:
-            html_parser = etree.HTMLParser(encoding=codif)
-            html = etree.fromstring(contents, html_parser)
            html_parser = html5parser.etree.HTMLParser(encoding=codif)
            html = html5parser.etree.fromstring(contents, html_parser)
            html_articles = html.xpath('//article')
        except Exception as error:
+            html_articles = None
            print ("Europresse lxml error:", error)
        # all except detail_header are mandatory to parse the article
@@ -113,6 +113,7 @@ class EuropresseParser(Parser):
        # parse all the articles, one by one
+        if html_articles is not None:
            for html_article in html_articles:
                try:
                    # s'il n'y a pas du tout de header on doit skip