Commit 4b47955a authored by c24b's avatar c24b

DEBUG mode for EUROPRESSE + RIS

parent 56843d67
...@@ -32,7 +32,6 @@ from ._Parser import Parser ...@@ -32,7 +32,6 @@ from ._Parser import Parser
class EuropresseParser(Parser): class EuropresseParser(Parser):
def parse(self, file): def parse(self, file):
#print("europr_parser file", file)
localeEncoding = "fr_FR" localeEncoding = "fr_FR"
codif = "UTF-8" codif = "UTF-8"
...@@ -55,7 +54,7 @@ class EuropresseParser(Parser): ...@@ -55,7 +54,7 @@ class EuropresseParser(Parser):
contents = file.read() contents = file.read()
encoding = self.detect_encoding(contents) encoding = self.detect_encoding(contents)
#print(encoding)
if encoding != "utf-8": if encoding != "utf-8":
try: try:
contents = contents.decode("latin1", errors='replace').encode(codif) contents = contents.decode("latin1", errors='replace').encode(codif)
...@@ -116,7 +115,7 @@ class EuropresseParser(Parser): ...@@ -116,7 +115,7 @@ class EuropresseParser(Parser):
try: try:
for html_article in html_articles: for html_article in html_articles:
# print("==============================new article") print("==============================new article")
# s'il n'y a pas du tout de header on doit skip # s'il n'y a pas du tout de header on doit skip
all_header = html_article.xpath(entire_header_xpath) all_header = html_article.xpath(entire_header_xpath)
...@@ -261,7 +260,7 @@ class EuropresseParser(Parser): ...@@ -261,7 +260,7 @@ class EuropresseParser(Parser):
# most probably news_topic before beginning of date # most probably news_topic before beginning of date
hyperdata['rubrique'] = header_elts[0] hyperdata['rubrique'] = header_elts[0]
print(hyperdata)
yield hyperdata yield hyperdata
except: except:
......
...@@ -30,7 +30,7 @@ class RISParser(Parser): ...@@ -30,7 +30,7 @@ class RISParser(Parser):
} }
def parse(self, file): def parse(self, file):
print("=====> PARSING RIS") print("=====> PARSING RIS", file)
hyperdata = {} hyperdata = {}
last_key = None last_key = None
last_values = [] last_values = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment