Commit 0fc6cb82 authored by Administrator's avatar Administrator

[BUG FIX] Europresse.html page can have tbody for each article.

parent 4b37259d
...@@ -39,7 +39,15 @@ class EuropressFileParser(FileParser): ...@@ -39,7 +39,15 @@ class EuropressFileParser(FileParser):
try: try:
html_parser = etree.HTMLParser(encoding=codif) html_parser = etree.HTMLParser(encoding=codif)
html = etree.fromstring(contents, html_parser) html = etree.fromstring(contents, html_parser)
html_articles = html.xpath('/html/body/table')
try:
html_articles = html.xpath('/html/body/table/tbody')
if len(html_articles) < 1:
html_articles = html.xpath('/html/body/table')
except Exception as error:
print(error)
except: except:
return [] return []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment