[mod] add/modify image fetching for bing_news, qwant and twitter engines

f5128c7c · Alexandre Flament · Adam Tauber · 4cffd786 · f5128c7c · f5128c7c
Commit f5128c7c authored Feb 12, 2017 by Alexandre Flament Committed by Adam Tauber May 15, 2017
Showing with 27 additions and 10 deletions

bing_news.py searx/engines/bing_news.py +2 -3

qwant.py searx/engines/qwant.py +16 -3

twitter.py searx/engines/twitter.py +6 -1

test_bing_news.py tests/unit/engines/test_bing_news.py +3 -3

No files found.
--- a/searx/engines/bing_news.py
+++ b/searx/engines/bing_news.py
@@ -112,12 +112,11 @@ def response(resp):
        # append result
        if thumbnail is not None:
-            results.append({'template': 'videos.html',
+            results.append({'url': url,
-                            'url': url,
                            'title': title,
                            'publishedDate': publishedDate,
                            'content': content,
-                            'thumbnail': thumbnail})
+                            'img_src': thumbnail})
        else:
            results.append({'url': url,
                            'title': title,

--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
@@ -96,14 +96,27 @@ def response(resp):
                            'thumbnail_src': thumbnail_src,
                            'img_src': img_src})
-        elif (category_to_keyword.get(categories[0], '') == 'news' or
+        elif category_to_keyword.get(categories[0], '') == 'social':
-              category_to_keyword.get(categories[0], '') == 'social'):
            published_date = datetime.fromtimestamp(result['date'], None)
+            img_src = result.get('img', None)
+            results.append({'url': res_url,
+                            'title': title,
+                            'publishedDate': published_date,
+                            'content': content,
+                            'img_src': img_src})
+        elif category_to_keyword.get(categories[0], '') == 'news':
+            published_date = datetime.fromtimestamp(result['date'], None)
+            media = result.get('media', [])
+            if len(media) > 0:
+                img_src = media[0].get('pict', {}).get('url', None)
+            else:
+                img_src = None
            results.append({'url': res_url,
                            'title': title,
                            'publishedDate': published_date,
-                            'content': content})
+                            'content': content,
+                            'img_src': img_src})
    return results

--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
@@ -27,6 +27,7 @@ search_url = base_url + 'search?'
 # specific xpath variables
 results_xpath = '//li[@data-item-type="tweet"]'
+avatar_xpath = './/img[contains(@class, "avatar")]/@src'
 link_xpath = './/small[@class="time"]//a'
 title_xpath = './/span[contains(@class, "username")]'
 content_xpath = './/p[contains(@class, "tweet-text")]'
@@ -57,6 +58,8 @@ def response(resp):
        try:
            link = tweet.xpath(link_xpath)[0]
            content = extract_text(tweet.xpath(content_xpath)[0])
+            img_src = tweet.xpath(avatar_xpath)[0]
+            img_src = img_src.replace('_bigger', '_normal')
        except Exception:
            continue
@@ -71,12 +74,14 @@ def response(resp):
            results.append({'url': url,
                            'title': title,
                            'content': content,
+                            'img_src': img_src,
                            'publishedDate': publishedDate})
        else:
            # append result
            results.append({'url': url,
                            'title': title,
-                            'content': content})
+                            'content': content,
+                            'img_src': img_src})
    # return results
    return results
--- a/tests/unit/engines/test_bing_news.py
+++ b/tests/unit/engines/test_bing_news.py
@@ -81,11 +81,11 @@ class TestBingNewsEngine(SearxTestCase):
        self.assertEqual(results[0]['title'], 'Title')
        self.assertEqual(results[0]['url'], 'http://url.of.article/')
        self.assertEqual(results[0]['content'], 'Article Content')
-        self.assertEqual(results[0]['thumbnail'], 'https://www.bing.com/th?id=ON.13371337133713371337133713371337')
+        self.assertEqual(results[0]['img_src'], 'https://www.bing.com/th?id=ON.13371337133713371337133713371337')
        self.assertEqual(results[1]['title'], 'Another Title')
        self.assertEqual(results[1]['url'], 'http://another.url.of.article/')
        self.assertEqual(results[1]['content'], 'Another Article Content')
-        self.assertNotIn('thumbnail', results[1])
+        self.assertNotIn('img_src', results[1])
        html = """<?xml version="1.0" encoding="utf-8" ?>
 <rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&amp;setmkt=en-US&amp;first=1&amp;format=RSS">
@@ -120,7 +120,7 @@ class TestBingNewsEngine(SearxTestCase):
        self.assertEqual(results[0]['title'], 'Title')
        self.assertEqual(results[0]['url'], 'http://another.url.of.article/')
        self.assertEqual(results[0]['content'], 'Article Content')
-        self.assertEqual(results[0]['thumbnail'], 'http://another.bing.com/image')
+        self.assertEqual(results[0]['img_src'], 'http://another.bing.com/image')
        html = """<?xml version="1.0" encoding="utf-8" ?>
 <rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&amp;setmkt=en-US&amp;first=1&amp;format=RSS">