DEBUG mode for EUROPRESSE + RIS

4b47955a · c24b · 56843d67 · 4b47955a · 4b47955a
Commit 4b47955a authored Aug 26, 2016 by c24b
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 5 deletions

EUROPRESSE.py gargantext/util/parsers/EUROPRESSE.py +3 -4

RIS.py gargantext/util/parsers/RIS.py +1 -1

No files found.
--- a/gargantext/util/parsers/EUROPRESSE.py
+++ b/gargantext/util/parsers/EUROPRESSE.py
@@ -32,7 +32,6 @@ from ._Parser import Parser
 class EuropresseParser(Parser):
    def parse(self, file):
-        #print("europr_parser file", file)
        localeEncoding          = "fr_FR"
        codif                   = "UTF-8"
@@ -55,7 +54,7 @@ class EuropresseParser(Parser):
        contents = file.read()
        encoding = self.detect_encoding(contents)
+        #print(encoding)
        if encoding != "utf-8":
            try:
                contents = contents.decode("latin1", errors='replace').encode(codif)
@@ -116,7 +115,7 @@ class EuropresseParser(Parser):
        try:
            for html_article in html_articles:
-                # print("==============================new article")
+                print("==============================new article")
                # s'il n'y a pas du tout de header on doit skip
                all_header = html_article.xpath(entire_header_xpath)
@@ -261,7 +260,7 @@ class EuropresseParser(Parser):
                        # most probably news_topic before beginning of date
                        hyperdata['rubrique']   = header_elts[0]
+                print(hyperdata)
                yield hyperdata
        except:

--- a/gargantext/util/parsers/RIS.py
+++ b/gargantext/util/parsers/RIS.py
@@ -30,7 +30,7 @@ class RISParser(Parser):
    }
    def parse(self, file):
-        print("=====> PARSING RIS")
+        print("=====> PARSING RIS", file)
        hyperdata = {}
        last_key = None
        last_values = []