Commit 0fc6cb82 authored by Administrator's avatar Administrator

[BUG FIX] Europresse.html page can have tbody for each article.

parent 4b37259d
......@@ -39,7 +39,15 @@ class EuropressFileParser(FileParser):
try:
html_parser = etree.HTMLParser(encoding=codif)
html = etree.fromstring(contents, html_parser)
try:
html_articles = html.xpath('/html/body/table/tbody')
if len(html_articles) < 1:
html_articles = html.xpath('/html/body/table')
except Exception as error:
print(error)
except:
return []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment