Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
searx-engine
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
searx-engine
Commits
f5128c7c
Commit
f5128c7c
authored
Feb 12, 2017
by
Alexandre Flament
Committed by
Adam Tauber
May 15, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[mod] add/modify image fetching for bing_news, qwant and twitter engines
parent
4cffd786
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
27 additions
and
10 deletions
+27
-10
bing_news.py
searx/engines/bing_news.py
+2
-3
qwant.py
searx/engines/qwant.py
+16
-3
twitter.py
searx/engines/twitter.py
+6
-1
test_bing_news.py
tests/unit/engines/test_bing_news.py
+3
-3
No files found.
searx/engines/bing_news.py
View file @
f5128c7c
...
@@ -112,12 +112,11 @@ def response(resp):
...
@@ -112,12 +112,11 @@ def response(resp):
# append result
# append result
if
thumbnail
is
not
None
:
if
thumbnail
is
not
None
:
results
.
append
({
'template'
:
'videos.html'
,
results
.
append
({
'url'
:
url
,
'url'
:
url
,
'title'
:
title
,
'title'
:
title
,
'publishedDate'
:
publishedDate
,
'publishedDate'
:
publishedDate
,
'content'
:
content
,
'content'
:
content
,
'
thumbnail
'
:
thumbnail
})
'
img_src
'
:
thumbnail
})
else
:
else
:
results
.
append
({
'url'
:
url
,
results
.
append
({
'url'
:
url
,
'title'
:
title
,
'title'
:
title
,
...
...
searx/engines/qwant.py
View file @
f5128c7c
...
@@ -96,14 +96,27 @@ def response(resp):
...
@@ -96,14 +96,27 @@ def response(resp):
'thumbnail_src'
:
thumbnail_src
,
'thumbnail_src'
:
thumbnail_src
,
'img_src'
:
img_src
})
'img_src'
:
img_src
})
elif
(
category_to_keyword
.
get
(
categories
[
0
],
''
)
==
'news'
or
elif
category_to_keyword
.
get
(
categories
[
0
],
''
)
==
'social'
:
category_to_keyword
.
get
(
categories
[
0
],
''
)
==
'social'
):
published_date
=
datetime
.
fromtimestamp
(
result
[
'date'
],
None
)
published_date
=
datetime
.
fromtimestamp
(
result
[
'date'
],
None
)
img_src
=
result
.
get
(
'img'
,
None
)
results
.
append
({
'url'
:
res_url
,
'title'
:
title
,
'publishedDate'
:
published_date
,
'content'
:
content
,
'img_src'
:
img_src
})
elif
category_to_keyword
.
get
(
categories
[
0
],
''
)
==
'news'
:
published_date
=
datetime
.
fromtimestamp
(
result
[
'date'
],
None
)
media
=
result
.
get
(
'media'
,
[])
if
len
(
media
)
>
0
:
img_src
=
media
[
0
]
.
get
(
'pict'
,
{})
.
get
(
'url'
,
None
)
else
:
img_src
=
None
results
.
append
({
'url'
:
res_url
,
results
.
append
({
'url'
:
res_url
,
'title'
:
title
,
'title'
:
title
,
'publishedDate'
:
published_date
,
'publishedDate'
:
published_date
,
'content'
:
content
})
'content'
:
content
,
'img_src'
:
img_src
})
return
results
return
results
...
...
searx/engines/twitter.py
View file @
f5128c7c
...
@@ -27,6 +27,7 @@ search_url = base_url + 'search?'
...
@@ -27,6 +27,7 @@ search_url = base_url + 'search?'
# specific xpath variables
# specific xpath variables
results_xpath
=
'//li[@data-item-type="tweet"]'
results_xpath
=
'//li[@data-item-type="tweet"]'
avatar_xpath
=
'.//img[contains(@class, "avatar")]/@src'
link_xpath
=
'.//small[@class="time"]//a'
link_xpath
=
'.//small[@class="time"]//a'
title_xpath
=
'.//span[contains(@class, "username")]'
title_xpath
=
'.//span[contains(@class, "username")]'
content_xpath
=
'.//p[contains(@class, "tweet-text")]'
content_xpath
=
'.//p[contains(@class, "tweet-text")]'
...
@@ -57,6 +58,8 @@ def response(resp):
...
@@ -57,6 +58,8 @@ def response(resp):
try
:
try
:
link
=
tweet
.
xpath
(
link_xpath
)[
0
]
link
=
tweet
.
xpath
(
link_xpath
)[
0
]
content
=
extract_text
(
tweet
.
xpath
(
content_xpath
)[
0
])
content
=
extract_text
(
tweet
.
xpath
(
content_xpath
)[
0
])
img_src
=
tweet
.
xpath
(
avatar_xpath
)[
0
]
img_src
=
img_src
.
replace
(
'_bigger'
,
'_normal'
)
except
Exception
:
except
Exception
:
continue
continue
...
@@ -71,12 +74,14 @@ def response(resp):
...
@@ -71,12 +74,14 @@ def response(resp):
results
.
append
({
'url'
:
url
,
results
.
append
({
'url'
:
url
,
'title'
:
title
,
'title'
:
title
,
'content'
:
content
,
'content'
:
content
,
'img_src'
:
img_src
,
'publishedDate'
:
publishedDate
})
'publishedDate'
:
publishedDate
})
else
:
else
:
# append result
# append result
results
.
append
({
'url'
:
url
,
results
.
append
({
'url'
:
url
,
'title'
:
title
,
'title'
:
title
,
'content'
:
content
})
'content'
:
content
,
'img_src'
:
img_src
})
# return results
# return results
return
results
return
results
tests/unit/engines/test_bing_news.py
View file @
f5128c7c
...
@@ -81,11 +81,11 @@ class TestBingNewsEngine(SearxTestCase):
...
@@ -81,11 +81,11 @@ class TestBingNewsEngine(SearxTestCase):
self
.
assertEqual
(
results
[
0
][
'title'
],
'Title'
)
self
.
assertEqual
(
results
[
0
][
'title'
],
'Title'
)
self
.
assertEqual
(
results
[
0
][
'url'
],
'http://url.of.article/'
)
self
.
assertEqual
(
results
[
0
][
'url'
],
'http://url.of.article/'
)
self
.
assertEqual
(
results
[
0
][
'content'
],
'Article Content'
)
self
.
assertEqual
(
results
[
0
][
'content'
],
'Article Content'
)
self
.
assertEqual
(
results
[
0
][
'
thumbnail
'
],
'https://www.bing.com/th?id=ON.13371337133713371337133713371337'
)
self
.
assertEqual
(
results
[
0
][
'
img_src
'
],
'https://www.bing.com/th?id=ON.13371337133713371337133713371337'
)
self
.
assertEqual
(
results
[
1
][
'title'
],
'Another Title'
)
self
.
assertEqual
(
results
[
1
][
'title'
],
'Another Title'
)
self
.
assertEqual
(
results
[
1
][
'url'
],
'http://another.url.of.article/'
)
self
.
assertEqual
(
results
[
1
][
'url'
],
'http://another.url.of.article/'
)
self
.
assertEqual
(
results
[
1
][
'content'
],
'Another Article Content'
)
self
.
assertEqual
(
results
[
1
][
'content'
],
'Another Article Content'
)
self
.
assertNotIn
(
'
thumbnail
'
,
results
[
1
])
self
.
assertNotIn
(
'
img_src
'
,
results
[
1
])
html
=
"""<?xml version="1.0" encoding="utf-8" ?>
html
=
"""<?xml version="1.0" encoding="utf-8" ?>
<rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS">
<rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS">
...
@@ -120,7 +120,7 @@ class TestBingNewsEngine(SearxTestCase):
...
@@ -120,7 +120,7 @@ class TestBingNewsEngine(SearxTestCase):
self
.
assertEqual
(
results
[
0
][
'title'
],
'Title'
)
self
.
assertEqual
(
results
[
0
][
'title'
],
'Title'
)
self
.
assertEqual
(
results
[
0
][
'url'
],
'http://another.url.of.article/'
)
self
.
assertEqual
(
results
[
0
][
'url'
],
'http://another.url.of.article/'
)
self
.
assertEqual
(
results
[
0
][
'content'
],
'Article Content'
)
self
.
assertEqual
(
results
[
0
][
'content'
],
'Article Content'
)
self
.
assertEqual
(
results
[
0
][
'
thumbnail
'
],
'http://another.bing.com/image'
)
self
.
assertEqual
(
results
[
0
][
'
img_src
'
],
'http://another.bing.com/image'
)
html
=
"""<?xml version="1.0" encoding="utf-8" ?>
html
=
"""<?xml version="1.0" encoding="utf-8" ?>
<rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS">
<rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS">
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment