Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
searx-engine
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
searx-engine
Commits
a51b2b6c
Unverified
Commit
a51b2b6c
authored
Oct 16, 2019
by
Mathieu Brunot
Committed by
GitHub
Oct 16, 2019
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' into feature/accessibility
parents
4d17d453
12f42d15
Changes
35
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
35 changed files
with
378 additions
and
478 deletions
+378
-478
Dockerfile
Dockerfile
+3
-1
docker-entrypoint.sh
dockerfiles/docker-entrypoint.sh
+4
-0
__init__.py
searx/engines/__init__.py
+15
-12
arxiv.py
searx/engines/arxiv.py
+1
-0
deviantart.py
searx/engines/deviantart.py
+22
-25
digg.py
searx/engines/digg.py
+12
-24
duckduckgo.py
searx/engines/duckduckgo.py
+26
-9
gigablast.py
searx/engines/gigablast.py
+4
-3
soundcloud.py
searx/engines/soundcloud.py
+1
-1
startpage.py
searx/engines/startpage.py
+18
-8
wolframalpha_noapi.py
searx/engines/wolframalpha_noapi.py
+1
-1
www1x.py
searx/engines/www1x.py
+6
-29
exceptions.py
searx/exceptions.py
+1
-0
https_rewrite.py
searx/plugins/https_rewrite.py
+3
-0
oa_doi_rewrite.py
searx/plugins/oa_doi_rewrite.py
+3
-0
tracker_url_remover.py
searx/plugins/tracker_url_remover.py
+19
-10
query.py
searx/query.py
+1
-1
results.py
searx/results.py
+45
-29
search.py
searx/search.py
+61
-13
settings.yml
searx/settings.yml
+9
-8
style.css
searx/static/themes/courgette/css/style.css
+1
-1
style.less
searx/static/themes/courgette/less/style.less
+4
-0
style.css
searx/static/themes/legacy/css/style.css
+1
-1
style.less
searx/static/themes/legacy/less/style.less
+4
-0
key-value.html
searx/templates/courgette/result_templates/key-value.html
+13
-0
key-value.html
searx/templates/legacy/result_templates/key-value.html
+13
-0
macros.html
searx/templates/oscar/macros.html
+9
-1
key-value.html
searx/templates/oscar/result_templates/key-value.html
+19
-0
key-value.html
searx/templates/simple/result_templates/key-value.html
+11
-0
utils.py
searx/utils.py
+18
-2
webapp.py
searx/webapp.py
+5
-2
test_deviantart.py
tests/unit/engines/test_deviantart.py
+0
-71
test_digg.py
tests/unit/engines/test_digg.py
+0
-85
test_startpage.py
tests/unit/engines/test_startpage.py
+25
-98
test_www1x.py
tests/unit/engines/test_www1x.py
+0
-43
No files found.
Dockerfile
View file @
a51b2b6c
...
...
@@ -11,7 +11,9 @@ ARG TIMESTAMP_UWSGI=0
ARG
LABEL_VCS_REF=
ARG
LABEL_VCS_URL=
ENV
BASE_URL= \
ENV
INSTANCE_NAME=searx \
AUTOCOMPLETE= \
BASE_URL= \
MORTY_KEY= \
MORTY_URL=
EXPOSE
8080
...
...
dockerfiles/docker-entrypoint.sh
View file @
a51b2b6c
...
...
@@ -29,6 +29,8 @@ do
printf
" -f Always update on the configuration files (existing files are renamed with the .old suffix)
\n
"
printf
" Without this option, new configuration files are copied with the .new suffix
\n
"
printf
"
\n
Environment variables:
\n\n
"
printf
" INSTANCE_NAME settings.yml : general.instance_name
\n
"
printf
" AUTOCOMPLETE settings.yml : search.autocomplete
\n
"
printf
" BASE_URL settings.yml : server.base_url
\n
"
printf
" MORTY_URL settings.yml : result_proxy.url
\n
"
printf
" MORTY_KEY settings.yml : result_proxy.key
\n
"
...
...
@@ -53,6 +55,8 @@ patch_searx_settings() {
# update settings.yml
sed
-i
-e
"s|base_url : False|base_url :
${
BASE_URL
}
|g"
\
-e
"s/instance_name :
\"
searx
\"
/instance_name :
\"
${
INSTANCE_NAME
}
\"
/g"
\
-e
"s/autocomplete :
\"\"
/autocomplete :
\"
${
AUTOCOMPLETE
}
\"
/g"
\
-e
"s/ultrasecretkey/
$(
openssl rand
-hex
32
)
/g"
\
"
${
CONF
}
"
...
...
searx/engines/__init__.py
View file @
a51b2b6c
...
...
@@ -27,7 +27,7 @@ from json import loads
from
requests
import
get
from
searx
import
settings
from
searx
import
logger
from
searx.utils
import
load_module
,
match_language
from
searx.utils
import
load_module
,
match_language
,
get_engine_from_settings
logger
=
logger
.
getChild
(
'engines'
)
...
...
@@ -53,7 +53,8 @@ engine_default_args = {'paging': False,
'disabled'
:
False
,
'suspend_end_time'
:
0
,
'continuous_errors'
:
0
,
'time_range_support'
:
False
}
'time_range_support'
:
False
,
'offline'
:
False
}
def
load_engine
(
engine_data
):
...
...
@@ -128,14 +129,16 @@ def load_engine(engine_data):
engine
.
stats
=
{
'result_count'
:
0
,
'search_count'
:
0
,
'page_load_time'
:
0
,
'page_load_count'
:
0
,
'engine_time'
:
0
,
'engine_time_count'
:
0
,
'score_count'
:
0
,
'errors'
:
0
}
if
not
engine
.
offline
:
engine
.
stats
[
'page_load_time'
]
=
0
engine
.
stats
[
'page_load_count'
]
=
0
for
category_name
in
engine
.
categories
:
categories
.
setdefault
(
category_name
,
[])
.
append
(
engine
)
...
...
@@ -173,11 +176,6 @@ def get_engines_stats():
results_num
=
\
engine
.
stats
[
'result_count'
]
/
float
(
engine
.
stats
[
'search_count'
])
if
engine
.
stats
[
'page_load_count'
]
!=
0
:
load_times
=
engine
.
stats
[
'page_load_time'
]
/
float
(
engine
.
stats
[
'page_load_count'
])
# noqa
else
:
load_times
=
0
if
engine
.
stats
[
'engine_time_count'
]
!=
0
:
this_engine_time
=
engine
.
stats
[
'engine_time'
]
/
float
(
engine
.
stats
[
'engine_time_count'
])
# noqa
else
:
...
...
@@ -189,14 +187,19 @@ def get_engines_stats():
else
:
score
=
score_per_result
=
0.0
max_pageload
=
max
(
load_times
,
max_pageload
)
if
not
engine
.
offline
:
load_times
=
0
if
engine
.
stats
[
'page_load_count'
]
!=
0
:
load_times
=
engine
.
stats
[
'page_load_time'
]
/
float
(
engine
.
stats
[
'page_load_count'
])
# noqa
max_pageload
=
max
(
load_times
,
max_pageload
)
pageloads
.
append
({
'avg'
:
load_times
,
'name'
:
engine
.
name
})
max_engine_times
=
max
(
this_engine_time
,
max_engine_times
)
max_results
=
max
(
results_num
,
max_results
)
max_score
=
max
(
score
,
max_score
)
max_score_per_result
=
max
(
score_per_result
,
max_score_per_result
)
max_errors
=
max
(
max_errors
,
engine
.
stats
[
'errors'
])
pageloads
.
append
({
'avg'
:
load_times
,
'name'
:
engine
.
name
})
engine_times
.
append
({
'avg'
:
this_engine_time
,
'name'
:
engine
.
name
})
results
.
append
({
'avg'
:
results_num
,
'name'
:
engine
.
name
})
scores
.
append
({
'avg'
:
score
,
'name'
:
engine
.
name
})
...
...
@@ -255,7 +258,7 @@ def initialize_engines(engine_list):
load_engines
(
engine_list
)
def
engine_init
(
engine_name
,
init_fn
):
init_fn
()
init_fn
(
get_engine_from_settings
(
engine_name
)
)
logger
.
debug
(
'
%
s engine: Initialized'
,
engine_name
)
for
engine_name
,
engine
in
engines
.
items
():
...
...
searx/engines/arxiv.py
View file @
a51b2b6c
...
...
@@ -17,6 +17,7 @@ from searx.url_utils import urlencode
categories
=
[
'science'
]
paging
=
True
base_url
=
'http://export.arxiv.org/api/query?search_query=all:'
\
+
'{query}&start={offset}&max_results={number_of_results}'
...
...
searx/engines/deviantart.py
View file @
a51b2b6c
...
...
@@ -24,7 +24,7 @@ time_range_support = True
# search-url
base_url
=
'https://www.deviantart.com/'
search_url
=
base_url
+
'
browse/all/?offset={offset
}&{query}'
search_url
=
base_url
+
'
search?page={page
}&{query}'
time_range_url
=
'&order={range}'
time_range_dict
=
{
'day'
:
11
,
...
...
@@ -37,9 +37,7 @@ def request(query, params):
if
params
[
'time_range'
]
and
params
[
'time_range'
]
not
in
time_range_dict
:
return
params
offset
=
(
params
[
'pageno'
]
-
1
)
*
24
params
[
'url'
]
=
search_url
.
format
(
offset
=
offset
,
params
[
'url'
]
=
search_url
.
format
(
page
=
params
[
'pageno'
],
query
=
urlencode
({
'q'
:
query
}))
if
params
[
'time_range'
]
in
time_range_dict
:
params
[
'url'
]
+=
time_range_url
.
format
(
range
=
time_range_dict
[
params
[
'time_range'
]])
...
...
@@ -57,28 +55,27 @@ def response(resp):
dom
=
html
.
fromstring
(
resp
.
text
)
regex
=
re
.
compile
(
r'\/200H\/'
)
# parse results
for
result
in
dom
.
xpath
(
'.//span[@class="thumb wide"]'
):
link
=
result
.
xpath
(
'.//a[@class="torpedo-thumb-link"]'
)[
0
]
url
=
link
.
attrib
.
get
(
'href'
)
title
=
extract_text
(
result
.
xpath
(
'.//span[@class="title"]'
))
thumbnail_src
=
link
.
xpath
(
'.//img'
)[
0
]
.
attrib
.
get
(
'src'
)
img_src
=
regex
.
sub
(
'/'
,
thumbnail_src
)
# http to https, remove domain sharding
thumbnail_src
=
re
.
sub
(
r"https?://(th|fc)\d+."
,
"https://th01."
,
thumbnail_src
)
thumbnail_src
=
re
.
sub
(
r"http://"
,
"https://"
,
thumbnail_src
)
url
=
re
.
sub
(
r"http://(.*)\.deviantart\.com/"
,
"https://
\\
1.deviantart.com/"
,
url
)
# append result
results
.
append
({
'url'
:
url
,
'title'
:
title
,
'img_src'
:
img_src
,
'thumbnail_src'
:
thumbnail_src
,
'template'
:
'images.html'
})
for
row
in
dom
.
xpath
(
'//div[contains(@data-hook, "content_row")]'
):
for
result
in
row
.
xpath
(
'./div'
):
link
=
result
.
xpath
(
'.//a[@data-hook="deviation_link"]'
)[
0
]
url
=
link
.
attrib
.
get
(
'href'
)
title
=
link
.
attrib
.
get
(
'title'
)
thumbnail_src
=
result
.
xpath
(
'.//img'
)[
0
]
.
attrib
.
get
(
'src'
)
img_src
=
thumbnail_src
# http to https, remove domain sharding
thumbnail_src
=
re
.
sub
(
r"https?://(th|fc)\d+."
,
"https://th01."
,
thumbnail_src
)
thumbnail_src
=
re
.
sub
(
r"http://"
,
"https://"
,
thumbnail_src
)
url
=
re
.
sub
(
r"http://(.*)\.deviantart\.com/"
,
"https://
\\
1.deviantart.com/"
,
url
)
# append result
results
.
append
({
'url'
:
url
,
'title'
:
title
,
'img_src'
:
img_src
,
'thumbnail_src'
:
thumbnail_src
,
'template'
:
'images.html'
})
# return results
return
results
searx/engines/digg.py
View file @
a51b2b6c
...
...
@@ -15,7 +15,8 @@ import string
from
dateutil
import
parser
from
json
import
loads
from
lxml
import
html
from
searx.url_utils
import
quote_plus
from
searx.url_utils
import
urlencode
from
datetime
import
datetime
# engine dependent config
categories
=
[
'news'
,
'social media'
]
...
...
@@ -23,7 +24,7 @@ paging = True
# search-url
base_url
=
'https://digg.com/'
search_url
=
base_url
+
'api/search/
{query}.json?position={position}
&format=html'
search_url
=
base_url
+
'api/search/
?{query}&from={position}&size=20
&format=html'
# specific xpath variables
results_xpath
=
'//article'
...
...
@@ -38,9 +39,9 @@ digg_cookie_chars = string.ascii_uppercase + string.ascii_lowercase +\
# do search-request
def
request
(
query
,
params
):
offset
=
(
params
[
'pageno'
]
-
1
)
*
1
0
offset
=
(
params
[
'pageno'
]
-
1
)
*
2
0
params
[
'url'
]
=
search_url
.
format
(
position
=
offset
,
query
=
quote_plus
(
query
))
query
=
urlencode
({
'q'
:
query
}
))
params
[
'cookies'
][
'frontend.auid'
]
=
''
.
join
(
random
.
choice
(
digg_cookie_chars
)
for
_
in
range
(
22
))
return
params
...
...
@@ -52,30 +53,17 @@ def response(resp):
search_result
=
loads
(
resp
.
text
)
if
'html'
not
in
search_result
or
search_result
[
'html'
]
==
''
:
return
results
dom
=
html
.
fromstring
(
search_result
[
'html'
])
# parse results
for
result
in
dom
.
xpath
(
results_xpath
):
url
=
result
.
attrib
.
get
(
'data-contenturl'
)
thumbnail
=
result
.
xpath
(
'.//img'
)[
0
]
.
attrib
.
get
(
'src'
)
title
=
''
.
join
(
result
.
xpath
(
title_xpath
))
content
=
''
.
join
(
result
.
xpath
(
content_xpath
))
pubdate
=
result
.
xpath
(
pubdate_xpath
)[
0
]
.
attrib
.
get
(
'datetime'
)
publishedDate
=
parser
.
parse
(
pubdate
)
# http to https
thumbnail
=
thumbnail
.
replace
(
"http://static.digg.com"
,
"https://static.digg.com"
)
for
result
in
search_result
[
'mapped'
]:
published
=
datetime
.
strptime
(
result
[
'created'
][
'ISO'
],
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
# append result
results
.
append
({
'url'
:
url
,
'title'
:
title
,
'content'
:
content
,
results
.
append
({
'url'
:
result
[
'url'
]
,
'title'
:
result
[
'title'
]
,
'content'
:
result
[
'excerpt'
]
,
'template'
:
'videos.html'
,
'publishedDate'
:
published
Date
,
'thumbnail'
:
thumbnail
})
'publishedDate'
:
published
,
'thumbnail'
:
result
[
'images'
][
'thumbImage'
]
})
# return results
return
results
searx/engines/duckduckgo.py
View file @
a51b2b6c
...
...
@@ -65,21 +65,36 @@ def get_region_code(lang, lang_list=[]):
def
request
(
query
,
params
):
if
params
[
'time_range'
]
and
params
[
'time_range'
]
not
in
time_range_dict
:
if
params
[
'time_range'
]
not
in
(
None
,
'None'
,
''
)
and
params
[
'time_range'
]
not
in
time_range_dict
:
return
params
offset
=
(
params
[
'pageno'
]
-
1
)
*
30
region_code
=
get_region_code
(
params
[
'language'
],
supported_languages
)
if
region_code
:
params
[
'url'
]
=
url
.
format
(
query
=
urlencode
({
'q'
:
query
,
'kl'
:
region_code
}),
offset
=
offset
,
dc_param
=
offset
)
params
[
'url'
]
=
'https://duckduckgo.com/html/'
if
params
[
'pageno'
]
>
1
:
params
[
'method'
]
=
'POST'
params
[
'data'
][
'q'
]
=
query
params
[
'data'
][
's'
]
=
offset
params
[
'data'
][
'dc'
]
=
30
params
[
'data'
][
'nextParams'
]
=
''
params
[
'data'
][
'v'
]
=
'l'
params
[
'data'
][
'o'
]
=
'json'
params
[
'data'
][
'api'
]
=
'/d.js'
if
params
[
'time_range'
]
in
time_range_dict
:
params
[
'data'
][
'df'
]
=
time_range_dict
[
params
[
'time_range'
]]
if
region_code
:
params
[
'data'
][
'kl'
]
=
region_code
else
:
params
[
'url'
]
=
url
.
format
(
query
=
urlencode
({
'q'
:
query
}),
offset
=
offset
,
dc_param
=
offset
)
if
region_code
:
params
[
'url'
]
=
url
.
format
(
query
=
urlencode
({
'q'
:
query
,
'kl'
:
region_code
}),
offset
=
offset
,
dc_param
=
offset
)
else
:
params
[
'url'
]
=
url
.
format
(
query
=
urlencode
({
'q'
:
query
}),
offset
=
offset
,
dc_param
=
offset
)
if
params
[
'time_range'
]
in
time_range_dict
:
params
[
'url'
]
+=
time_range_url
.
format
(
range
=
time_range_dict
[
params
[
'time_range'
]])
if
params
[
'time_range'
]
in
time_range_dict
:
params
[
'url'
]
+=
time_range_url
.
format
(
range
=
time_range_dict
[
params
[
'time_range'
]])
return
params
...
...
@@ -91,7 +106,9 @@ def response(resp):
doc
=
fromstring
(
resp
.
text
)
# parse results
for
r
in
doc
.
xpath
(
result_xpath
):
for
i
,
r
in
enumerate
(
doc
.
xpath
(
result_xpath
)):
if
i
>=
30
:
break
try
:
res_url
=
r
.
xpath
(
url_xpath
)[
-
1
]
except
:
...
...
searx/engines/gigablast.py
View file @
a51b2b6c
...
...
@@ -35,8 +35,8 @@ search_string = 'search?{query}'\
'&ff={safesearch}'
\
'&rxiec={rxieu}'
\
'&ulse={ulse}'
\
'&rand={rxikd}'
# current unix timestamp
'&rand={rxikd}'
\
'&dbez={dbez}'
# specific xpath variables
results_xpath
=
'//response//result'
url_xpath
=
'.//url'
...
...
@@ -70,7 +70,8 @@ def request(query, params):
rxieu
=
random
.
randint
(
1000000000
,
9999999999
),
ulse
=
random
.
randint
(
100000000
,
999999999
),
lang
=
language
,
safesearch
=
safesearch
)
safesearch
=
safesearch
,
dbez
=
random
.
randint
(
100000000
,
999999999
))
params
[
'url'
]
=
base_url
+
search_path
...
...
searx/engines/soundcloud.py
View file @
a51b2b6c
...
...
@@ -66,7 +66,7 @@ def get_client_id():
return
""
def
init
():
def
init
(
engine_settings
=
None
):
global
guest_client_id
# api-key
guest_client_id
=
get_client_id
()
...
...
searx/engines/startpage.py
View file @
a51b2b6c
...
...
@@ -15,6 +15,7 @@ from dateutil import parser
from
datetime
import
datetime
,
timedelta
import
re
from
searx.engines.xpath
import
extract_text
from
searx.languages
import
language_codes
# engine dependent config
categories
=
[
'general'
]
...
...
@@ -22,7 +23,7 @@ categories = ['general']
# (probably the parameter qid), require
# storing of qid's between mulitble search-calls
# paging = Fals
e
paging
=
Tru
e
language_support
=
True
# search-url
...
...
@@ -32,23 +33,32 @@ search_url = base_url + 'do/search'
# specific xpath variables
# ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
# not ads: div[@class="result"] are the direct childs of div[@id="results"]
results_xpath
=
'//
li[contains(@class, "search-result") and contains(@class, "search-item")
]'
link_xpath
=
'.//
h3/a
'
content_xpath
=
'./
p[@class="search-item__body
"]'
results_xpath
=
'//
div[@class="w-gl__result"
]'
link_xpath
=
'.//
a[@class="w-gl__result-title"]
'
content_xpath
=
'./
/p[@class="w-gl__description
"]'
# do search-request
def
request
(
query
,
params
):
offset
=
(
params
[
'pageno'
]
-
1
)
*
10
params
[
'url'
]
=
search_url
params
[
'method'
]
=
'POST'
params
[
'data'
]
=
{
'query'
:
query
,
'startat'
:
offset
}
params
[
'data'
]
=
{
'query'
:
query
,
'page'
:
params
[
'pageno'
],
'cat'
:
'web'
,
'cmd'
:
'process_search'
,
'engine0'
:
'v1all'
,
}
# set language if specified
if
params
[
'language'
]
!=
'all'
:
params
[
'data'
][
'with_language'
]
=
(
'lang_'
+
params
[
'language'
]
.
split
(
'-'
)[
0
])
language
=
'english'
for
lc
,
_
,
_
,
lang
in
language_codes
:
if
lc
==
params
[
'language'
]:
language
=
lang
params
[
'data'
][
'language'
]
=
language
params
[
'data'
][
'lui'
]
=
language
return
params
...
...
searx/engines/wolframalpha_noapi.py
View file @
a51b2b6c
...
...
@@ -55,7 +55,7 @@ def obtain_token():
return
token
def
init
():
def
init
(
engine_settings
=
None
):
obtain_token
()
...
...
searx/engines/www1x.py
View file @
a51b2b6c
...
...
@@ -11,8 +11,8 @@
"""
from
lxml
import
html
import
re
from
searx.url_utils
import
urlencode
,
urljoin
from
searx.engines.xpath
import
extract_text
# engine dependent config
categories
=
[
'images'
]
...
...
@@ -34,41 +34,18 @@ def request(query, params):
def
response
(
resp
):
results
=
[]
# get links from result-text
regex
=
re
.
compile
(
'(</a>|<a)'
)
results_parts
=
re
.
split
(
regex
,
resp
.
text
)
cur_element
=
''
# iterate over link parts
for
result_part
in
results_parts
:
dom
=
html
.
fromstring
(
resp
.
text
)
for
res
in
dom
.
xpath
(
'//div[@class="List-item MainListing"]'
):
# processed start and end of link
if
result_part
==
'<a'
:
cur_element
=
result_part
continue
elif
result_part
!=
'</a>'
:
cur_element
+=
result_part
continue
cur_element
+=
result_part
# fix xml-error
cur_element
=
cur_element
.
replace
(
'"></a>'
,
'"/></a>'
)
dom
=
html
.
fromstring
(
cur_element
)
link
=
dom
.
xpath
(
'//a'
)[
0
]
link
=
res
.
xpath
(
'//a'
)[
0
]
url
=
urljoin
(
base_url
,
link
.
attrib
.
get
(
'href'
))
title
=
link
.
attrib
.
get
(
'title'
,
''
)
title
=
extract_text
(
link
)
thumbnail_src
=
urljoin
(
base_url
,
link
.
xpath
(
'.//img'
)[
0
]
.
attrib
[
'src'
])
thumbnail_src
=
urljoin
(
base_url
,
res
.
xpath
(
'.//img'
)[
0
]
.
attrib
[
'src'
])
# TODO: get image with higher resolution
img_src
=
thumbnail_src
# check if url is showing to a photo
if
'/photo/'
not
in
url
:
continue
# append result
results
.
append
({
'url'
:
url
,
'title'
:
title
,
...
...
searx/exceptions.py
View file @
a51b2b6c
...
...
@@ -28,5 +28,6 @@ class SearxParameterException(SearxException):
else
:
message
=
'Invalid value "'
+
value
+
'" for parameter '
+
name
super
(
SearxParameterException
,
self
)
.
__init__
(
message
)
self
.
message
=
message
self
.
parameter_name
=
name
self
.
parameter_value
=
value
searx/plugins/https_rewrite.py
View file @
a51b2b6c
...
...
@@ -225,6 +225,9 @@ def https_url_rewrite(result):
def
on_result
(
request
,
search
,
result
):
if
'parsed_url'
not
in
result
:
return
True
if
result
[
'parsed_url'
]
.
scheme
==
'http'
:
https_url_rewrite
(
result
)
return
True
...
...
searx/plugins/oa_doi_rewrite.py
View file @
a51b2b6c
...
...
@@ -35,6 +35,9 @@ def get_doi_resolver(args, preference_doi_resolver):
def
on_result
(
request
,
search
,
result
):
if
'parsed_url'
not
in
result
:
return
True
doi
=
extract_doi
(
result
[
'parsed_url'
])
if
doi
and
len
(
doi
)
<
50
:
for
suffix
in
(
'/'
,
'.pdf'
,
'/full'
,
'/meta'
,
'/abstract'
):
...
...
searx/plugins/tracker_url_remover.py
View file @
a51b2b6c
...
...
@@ -17,10 +17,10 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
from
flask_babel
import
gettext
import
re
from
searx.url_utils
import
urlunparse
from
searx.url_utils
import
urlunparse
,
parse_qsl
,
urlencode
regexes
=
{
re
.
compile
(
r'utm_[^&]+
&?
'
),
re
.
compile
(
r'(wkey|wemail)[^&]
+&?
'
),
regexes
=
{
re
.
compile
(
r'utm_[^&]+'
),
re
.
compile
(
r'(wkey|wemail)[^&]
*
'
),
re
.
compile
(
r'&$'
)}
name
=
gettext
(
'Tracker URL remover'
)
...
...
@@ -30,16 +30,25 @@ preference_section = 'privacy'
def
on_result
(
request
,
search
,
result
):
if
'parsed_url'
not
in
result
:
return
True
query
=
result
[
'parsed_url'
]
.
query
if
query
==
""
:
return
True
for
reg
in
regexes
:
query
=
reg
.
sub
(
''
,
query
)
if
query
!=
result
[
'parsed_url'
]
.
query
:
result
[
'parsed_url'
]
=
result
[
'parsed_url'
]
.
_replace
(
query
=
query
)
result
[
'url'
]
=
urlunparse
(
result
[
'parsed_url'
])
parsed_query
=
parse_qsl
(
query
)
changed
=
False
for
i
,
(
param_name
,
_
)
in
enumerate
(
list
(
parsed_query
)):
for
reg
in
regexes
:
if
reg
.
match
(
param_name
):
parsed_query
.
pop
(
i
)
changed
=
True
break
if
changed
:
result
[
'parsed_url'
]
=
result
[
'parsed_url'
]
.
_replace
(
query
=
urlencode
(
parsed_query
))
result
[
'url'
]
=
urlunparse
(
result
[
'parsed_url'
])
return
True
searx/query.py
View file @
a51b2b6c
...
...
@@ -184,7 +184,7 @@ class SearchQuery(object):
self
.
lang
=
lang
self
.
safesearch
=
safesearch
self
.
pageno
=
pageno
self
.
time_range
=
time_range
self
.
time_range
=
None
if
time_range
in
(
''
,
'None'
,
None
)
else
time_range
self
.
timeout_limit
=
timeout_limit
def
__str__
(
self
):
...
...
searx/results.py
View file @
a51b2b6c
...
...
@@ -197,6 +197,13 @@ class ResultContainer(object):
self
.
infoboxes
.
append
(
infobox
)
def
_merge_result
(
self
,
result
,
position
):
if
'url'
in
result
:
self
.
__merge_url_result
(
result
,
position
)
return
self
.
__merge_result_no_url
(
result
,
position
)
def
__merge_url_result
(
self
,
result
,
position
):
result
[
'parsed_url'
]
=
urlparse
(
result
[
'url'
])
# if the result has no scheme, use http as default
...
...
@@ -210,51 +217,60 @@ class ResultContainer(object):
if
result
.
get
(
'content'
):
result
[
'content'
]
=
WHITESPACE_REGEX
.
sub
(
' '
,
result
[
'content'
])
# check for duplicates
duplicated
=
False
duplicated
=
self
.
__find_duplicated_http_result
(
result
)
if
duplicated
:
self
.
__merge_duplicated_http_result
(
duplicated
,
result
,
position
)
return
# if there is no duplicate found, append result
result
[
'positions'
]
=
[
position
]
with
RLock
():
self
.
_merged_results
.
append
(
result
)
def
__find_duplicated_http_result
(
self
,
result
):
result_template
=
result
.
get
(
'template'
)
for
merged_result
in
self
.
_merged_results
:
if
'parsed_url'
not
in
merged_result
:
continue
if
compare_urls
(
result
[
'parsed_url'
],
merged_result
[
'parsed_url'
])
\
and
result_template
==
merged_result
.
get
(
'template'
):
if
result_template
!=
'images.html'
:
# not an image, same template, same url : it's a duplicate
duplicated
=
merged_result
break
return
merged_result
else
:
# it's an image
# it's a duplicate if the parsed_url, template and img_src are differents
if
result
.
get
(
'img_src'
,
''
)
==
merged_result
.
get
(
'img_src'
,
''
):
duplicated
=
merged_result
break
return
merged_result
return
None
# merge duplicates together
if
duplicated
:
# using content with more text
if
result_content_len
(
result
.
get
(
'content'
,
''
))
>
\
result_content_len
(
duplicated
.
get
(
'content'
,
''
)):
duplicated
[
'content'
]
=
result
[
'content'
]
def
__merge_duplicated_http_result
(
self
,
duplicated
,
result
,
position
):
# using content with more text
if
result_content_len
(
result
.
get
(
'content'
,
''
))
>
\
result_content_len
(
duplicated
.
get
(
'content'
,
''
)):
duplicated
[
'content'
]
=
result
[
'content'
]
# merge all result's parameters not found in duplicate
for
key
in
result
.
keys
():
if
not
duplicated
.
get
(
key
):
duplicated
[
key
]
=
result
.
get
(
key
)
# merge all result's parameters not found in duplicate
for
key
in
result
.
keys
():
if
not
duplicated
.
get
(
key
):
duplicated
[
key
]
=
result
.
get
(
key
)
# add the new position
duplicated
[
'positions'
]
.
append
(
position
)
# add the new position
duplicated
[
'positions'
]
.
append
(
position
)
# add engine to list of result-engines
duplicated
[
'engines'
]
.
add
(
result
[
'engine'
])
# add engine to list of result-engines
duplicated
[
'engines'
]
.
add
(
result
[
'engine'
])
# using https if possible
if
duplicated
[
'parsed_url'
]
.
scheme
!=
'https'
and
result
[
'parsed_url'
]
.
scheme
==
'https'
:
duplicated
[
'url'
]
=
result
[
'parsed_url'
]
.
geturl
()
duplicated
[
'parsed_url'
]
=
result
[
'parsed_url'
]
# using https if possible
if
duplicated
[
'parsed_url'
]
.
scheme
!=
'https'
and
result
[
'parsed_url'
]
.
scheme
==
'https'
:
duplicated
[
'url'
]
=
result
[
'parsed_url'
]
.
geturl
()
duplicated
[
'parsed_url'
]
=
result
[
'parsed_url'
]
# if there is no duplicate found, append result
else
:
result
[
'positions'
]
=
[
position
]
with
RLock
():
self
.
_merged_results
.
append
(
result
)
def
__merge_result_no_url
(
self
,
result
,
position
):
result
[
'engines'
]
=
set
([
result
[
'engine'
]])
result
[
'positions'
]
=
[
position
]
with
RLock
():
self
.
_merged_results
.
append
(
result
)
def
order_results
(
self
):
for
result
in
self
.
_merged_results
:
...
...
searx/search.py
View file @
a51b2b6c
...
...
@@ -77,7 +77,7 @@ def send_http_request(engine, request_params):
return
req
(
request_params
[
'url'
],
**
request_args
)
def
search_one_request
(
engine
,
query
,
request_params
):
def
search_one_
http_
request
(
engine
,
query
,
request_params
):
# update request parameters dependent on
# search-engine (contained in engines folder)
engine
.
request
(
query
,
request_params
)
...
...
@@ -97,7 +97,53 @@ def search_one_request(engine, query, request_params):
return
engine
.
response
(
response
)
def
search_one_offline_request
(
engine
,
query
,
request_params
):
return
engine
.
search
(
query
,
request_params
)
def
search_one_request_safe
(
engine_name
,
query
,
request_params
,
result_container
,
start_time
,
timeout_limit
):
if
engines
[
engine_name
]
.
offline
:
return
search_one_offline_request_safe
(
engine_name
,
query
,
request_params
,
result_container
,
start_time
,
timeout_limit
)
# noqa
return
search_one_http_request_safe
(
engine_name
,
query
,
request_params
,
result_container
,
start_time
,
timeout_limit
)
def
search_one_offline_request_safe
(
engine_name
,
query
,
request_params
,
result_container
,
start_time
,
timeout_limit
):
engine
=
engines
[
engine_name
]
try
:
search_results
=
search_one_offline_request
(
engine
,
query
,
request_params
)
if
search_results
:
result_container
.
extend
(
engine_name
,
search_results
)
engine_time
=
time
()
-
start_time
result_container
.
add_timing
(
engine_name
,
engine_time
,
engine_time
)
with
threading
.
RLock
():
engine
.
stats
[
'engine_time'
]
+=
engine_time
engine
.
stats
[
'engine_time_count'
]
+=
1
except
ValueError
as
e
:
record_offline_engine_stats_on_error
(
engine
,
result_container
,
start_time
)
logger
.
exception
(
'engine {0} : invalid input : {1}'
.
format
(
engine_name
,
e
))
except
Exception
as
e
:
record_offline_engine_stats_on_error
(
engine
,
result_container
,
start_time
)
result_container
.
add_unresponsive_engine
((
engine_name
,
u'{0}: {1}'
.
format
(
gettext
(
'unexpected crash'
),
e
),
))
logger
.
exception
(
'engine {0} : exception : {1}'
.
format
(
engine_name
,
e
))
def
record_offline_engine_stats_on_error
(
engine
,
result_container
,
start_time
):
engine_time
=
time
()
-
start_time
result_container
.
add_timing
(
engine
.
name
,
engine_time
,
engine_time
)
with
threading
.
RLock
():
engine
.
stats
[
'errors'
]
+=
1
def
search_one_http_request_safe
(
engine_name
,
query
,
request_params
,
result_container
,
start_time
,
timeout_limit
):
# set timeout for all HTTP requests
requests_lib
.
set_timeout_for_thread
(
timeout_limit
,
start_time
=
start_time
)
# reset the HTTP total time
...
...
@@ -111,7 +157,7 @@ def search_one_request_safe(engine_name, query, request_params, result_container
try
:
# send requests and parse the results
search_results
=
search_one_request
(
engine
,
query
,
request_params
)
search_results
=
search_one_
http_
request
(
engine
,
query
,
request_params
)
# check if the engine accepted the request
if
search_results
is
not
None
:
...
...
@@ -427,19 +473,21 @@ class Search(object):
continue
# set default request parameters
request_params
=
default_request_params
()
request_params
[
'headers'
][
'User-Agent'
]
=
user_agent
request_params
[
'category'
]
=
selected_engine
[
'category'
]
request_params
[
'pageno'
]
=
search_query
.
pageno
request_params
=
{}
if
not
engine
.
offline
:
request_params
=
default_request_params
()
request_params
[
'headers'
][
'User-Agent'
]
=
user_agent
if
hasattr
(
engine
,
'language'
)
and
engine
.
language
:
request_params
[
'language'
]
=
engine
.
language
else
:
request_params
[
'language'
]
=
search_query
.
lang
if
hasattr
(
engine
,
'language'
)
and
engine
.
language
:
request_params
[
'language'
]
=
engine
.
language
else
:
request_params
[
'language'
]
=
search_query
.
lang
# 0 = None, 1 = Moderate, 2 = Strict
request_params
[
'safesearch'
]
=
search_query
.
safesearch
request_params
[
'time_range'
]
=
search_query
.
time_range
request_params
[
'safesearch'
]
=
search_query
.
safesearch
request_params
[
'time_range'
]
=
search_query
.
time_range
request_params
[
'category'
]
=
selected_engine
[
'category'
]
request_params
[
'pageno'
]
=
search_query
.
pageno
# append request to list
requests
.
append
((
selected_engine
[
'name'
],
search_query
.
query
,
request_params
))
...
...
searx/settings.yml
View file @
a51b2b6c
...
...
@@ -161,11 +161,12 @@ engines:
weight
:
2
disabled
:
True
-
name
:
digbt
engine
:
digbt
shortcut
:
dbt
timeout
:
6.0
disabled
:
True
# cloudflare protected
# - name : digbt
# engine : digbt
# shortcut : dbt
# timeout : 6.0
# disabled : True
-
name
:
digg
engine
:
digg
...
...
@@ -703,9 +704,9 @@ engines:
shortcut
:
vo
categories
:
social media
search_url
:
https://searchvoat.co/?t={query}
url_xpath
:
//div[@class="entry"]/
p/a[contains(@class, "title")]
/@href
title_xpath
:
//div[@class="entry"]/
p/a[contains(@class, "title")]
content_xpath
:
//div[@class="entry"]/
p
/span[@class="domain"]/a/text()
url_xpath
:
//div[@class="entry"]/
/p[@class="title"]/a
/@href
title_xpath
:
//div[@class="entry"]/
/p[@class="title"]/a/text()
content_xpath
:
//div[@class="entry"]//span[@class="domain"]/a/text()
timeout
:
10.0
disabled
:
True
...
...
searx/static/themes/courgette/css/style.css
View file @
a51b2b6c
This diff is collapsed.
Click to expand it.
searx/static/themes/courgette/less/style.less
View file @
a51b2b6c
...
...
@@ -325,6 +325,10 @@ a {
font-size: 0.9em;
}
.result .engines {
text-align: right;
}
.result .content {
margin: 0;
color: #666;
...
...
searx/static/themes/legacy/css/style.css
View file @
a51b2b6c
This diff is collapsed.
Click to expand it.
searx/static/themes/legacy/less/style.less
View file @
a51b2b6c
...
...
@@ -376,6 +376,10 @@ table {
width: 100%;
}
.result-table {
margin-bottom: 10px;
}
td {
padding: 0 4px;
}
...
...
searx/templates/courgette/result_templates/key-value.html
0 → 100644
View file @
a51b2b6c
<div
class=
"result"
>
<table>
{% for key, value in result.items() %}
{% if key in ['engine', 'engines', 'template', 'score', 'category', 'positions'] %}
{% continue %}
{% endif %}
<tr>
<td><b>
{{ key|upper }}
</b>
: {{ value|safe }}
</td>
</tr>
{% endfor %}
</table>
<p
class=
"engines"
>
{{ result.engines|join(', ') }}
</p>
</div>
searx/templates/legacy/result_templates/key-value.html
0 → 100644
View file @
a51b2b6c
<table
class=
"result-table"
>
{% for key, value in result.items() %}
{% if key in ['engine', 'engines', 'template', 'score', 'category', 'positions'] %}
{% continue %}
{% endif %}
<tr>
<td><b>
{{ key|upper }}
</b>
: {{ value|safe }}
</td>
</tr>
{% endfor %}
<tr>
<td><b>
ENGINES
</b>
: {{ result.engines|join(', ') }}
</td>
</tr>
</table>
searx/templates/oscar/macros.html
View file @
a51b2b6c
...
...
@@ -14,7 +14,7 @@
<!-- Draw result header -->
{% macro result_header(result, favicons) -%}
<h4
class=
"result_header"
>
{% if result.engine~".png" in favicons %}{{ draw_favicon(result.engine) }} {% endif %}{
{ result_link(result.url, result.title|safe) }
}
</h4>
<h4
class=
"result_header"
>
{% if result.engine~".png" in favicons %}{{ draw_favicon(result.engine) }} {% endif %}{
% if result.url %}{{ result_link(result.url, result.title|safe) }}{% else %}{{ result.title|safe}}{% endif %
}
</h4>
{%- endmacro %}
<!-- Draw result sub header -->
...
...
@@ -31,12 +31,16 @@
{% for engine in result.engines %}
<span
class=
"label label-default"
>
{{ engine }}
</span>
{% endfor %}
{% if result.url %}
<small>
{{ result_link("https://web.archive.org/web/" + result.url, icon('link') + _('cached'), "text-info") }}
</small>
{% endif %}
{% if proxify %}
<small>
{{ result_link(proxify(result.url), icon('sort') + _('proxied'), "text-info") }}
</small>
{% endif %}
</div>
{% if result.pretty_url %}
<div
class=
"external-link"
>
{{ result.pretty_url }}
</div>
{% endif %}
{%- endmacro %}
<!-- Draw result footer -->
...
...
@@ -45,11 +49,15 @@
{% for engine in result.engines %}
<span
class=
"label label-default"
>
{{ engine }}
</span>
{% endfor %}
{% if result.url %}
<small>
{{ result_link("https://web.archive.org/web/" + result.url, icon('link') + _('cached'), "text-info") }}
</small>
{% endif %}
{% if proxify %}
<small>
{{ result_link(proxify(result.url), icon('sort') + _('proxied'), "text-info") }}
</small>
{% endif %}
{% if result.pretty_url %}
<div
class=
"external-link"
>
{{ result.pretty_url }}
</div>
{% endif %}
{%- endmacro %}
{% macro preferences_item_header(info, label, rtl) -%}
...
...
searx/templates/oscar/result_templates/key-value.html
0 → 100644
View file @
a51b2b6c
{% from 'oscar/macros.html' import result_footer, result_footer_rtl with context %}
<div
class=
"panel panel-default"
>
<table
class=
"table table-responsive table-bordered table-condensed"
>
{% for key, value in result.items() %}
{% if key in ['engine', 'engines', 'template', 'score', 'category', 'positions'] %}
{% continue %}
{% endif %}
<tr>
<td><b>
{{ key|upper }}
</b>
: {{ value }}
</td>
</tr>
{% endfor %}
</table>
{% if rtl %}
{{ result_footer_rtl(result) }}
{% else %}
{{ result_footer(result) }}
{% endif %}
</div>
searx/templates/simple/result_templates/key-value.html
0 → 100644
View file @
a51b2b6c
<table>
{% for key, value in result.items() %}
{% if key in ['engine', 'engines', 'template', 'score', 'category', 'positions'] %}
{% continue %}
{% endif %}
<tr>
<td><b>
{{ key|upper }}
</b>
: {{ value }}
</td>
</tr>
{% endfor %}
</table>
<div
class=
"engines"
>
{% for engine in result.engines %}
<span>
{{ engine }}
</span>
{% endfor %}
</div>
{{- '' -}}
searx/utils.py
View file @
a51b2b6c
...
...
@@ -308,14 +308,15 @@ def int_or_zero(num):
def
is_valid_lang
(
lang
):
is_abbr
=
(
len
(
lang
)
==
2
)
lang
=
lang
.
lower
()
.
decode
(
'utf-8'
)
if
is_abbr
:
for
l
in
language_codes
:
if
l
[
0
][:
2
]
==
lang
.
lower
()
:
if
l
[
0
][:
2
]
==
lang
:
return
(
True
,
l
[
0
][:
2
],
l
[
3
]
.
lower
())
return
False
else
:
for
l
in
language_codes
:
if
l
[
1
]
.
lower
()
==
lang
.
lower
()
:
if
l
[
1
]
.
lower
()
==
lang
or
l
[
3
]
.
lower
()
==
lang
:
return
(
True
,
l
[
0
][:
2
],
l
[
3
]
.
lower
())
return
False
...
...
@@ -434,3 +435,18 @@ def ecma_unescape(s):
# "%20" becomes " ", "%F3" becomes "ó"
s
=
ecma_unescape2_re
.
sub
(
lambda
e
:
unichr
(
int
(
e
.
group
(
1
),
16
)),
s
)
return
s
def
get_engine_from_settings
(
name
):
"""Return engine configuration from settings.yml of a given engine name"""
if
'engines'
not
in
settings
:
return
{}
for
engine
in
settings
[
'engines'
]:
if
'name'
not
in
engine
:
continue
if
name
==
engine
[
'name'
]:
return
engine
return
{}
searx/webapp.py
View file @
a51b2b6c
...
...
@@ -124,6 +124,7 @@ app = Flask(
app
.
jinja_env
.
trim_blocks
=
True
app
.
jinja_env
.
lstrip_blocks
=
True
app
.
jinja_env
.
add_extension
(
'jinja2.ext.loopcontrols'
)
app
.
secret_key
=
settings
[
'server'
][
'secret_key'
]
if
not
searx_debug
\
...
...
@@ -538,14 +539,16 @@ def index():
if
output_format
==
'html'
:
if
'content'
in
result
and
result
[
'content'
]:
result
[
'content'
]
=
highlight_content
(
escape
(
result
[
'content'
][:
1024
]),
search_query
.
query
)
result
[
'title'
]
=
highlight_content
(
escape
(
result
[
'title'
]
or
u''
),
search_query
.
query
)
if
'title'
in
result
and
result
[
'title'
]:
result
[
'title'
]
=
highlight_content
(
escape
(
result
[
'title'
]
or
u''
),
search_query
.
query
)
else
:
if
result
.
get
(
'content'
):
result
[
'content'
]
=
html_to_text
(
result
[
'content'
])
.
strip
()
# removing html content and whitespace duplications
result
[
'title'
]
=
' '
.
join
(
html_to_text
(
result
[
'title'
])
.
strip
()
.
split
())
result
[
'pretty_url'
]
=
prettify_url
(
result
[
'url'
])
if
'url'
in
result
:
result
[
'pretty_url'
]
=
prettify_url
(
result
[
'url'
])
# TODO, check if timezone is calculated right
if
'publishedDate'
in
result
:
...
...
tests/unit/engines/test_deviantart.py
View file @
a51b2b6c
...
...
@@ -22,74 +22,3 @@ class TestDeviantartEngine(SearxTestCase):
dicto
[
'time_range'
]
=
'year'
params
=
deviantart
.
request
(
query
,
dicto
)
self
.
assertEqual
({},
params
[
'url'
])
def
test_response
(
self
):
self
.
assertRaises
(
AttributeError
,
deviantart
.
response
,
None
)
self
.
assertRaises
(
AttributeError
,
deviantart
.
response
,
[])
self
.
assertRaises
(
AttributeError
,
deviantart
.
response
,
''
)
self
.
assertRaises
(
AttributeError
,
deviantart
.
response
,
'[]'
)
response
=
mock
.
Mock
(
text
=
'<html></html>'
)
self
.
assertEqual
(
deviantart
.
response
(
response
),
[])
response
=
mock
.
Mock
(
status_code
=
302
)
self
.
assertEqual
(
deviantart
.
response
(
response
),
[])
html
=
"""
<div id="page-1-results" class="page-results results-page-thumb torpedo-container">
<span class="thumb wide" href="http://amai911.deviantart.com/art/Horse-195212845"
data-super-full-width="900" data-super-full-height="600">
<a class="torpedo-thumb-link" href="https://url.of.image">
<img data-sigil="torpedo-img" src="https://url.of.thumbnail" />
</a>
<span class="info"><span class="title-wrap"><span class="title">Title of image</span></span>
</div>
"""
response
=
mock
.
Mock
(
text
=
html
)
results
=
deviantart
.
response
(
response
)
self
.
assertEqual
(
type
(
results
),
list
)
self
.
assertEqual
(
len
(
results
),
1
)
self
.
assertEqual
(
results
[
0
][
'title'
],
'Title of image'
)
self
.
assertEqual
(
results
[
0
][
'url'
],
'https://url.of.image'
)
self
.
assertNotIn
(
'content'
,
results
[
0
])
self
.
assertEqual
(
results
[
0
][
'thumbnail_src'
],
'https://url.of.thumbnail'
)
html
=
"""
<span class="tt-fh-tc" style="width: 202px;">
<span class="tt-bb" style="width: 202px;">
</span>
<span class="shadow">
<a class="thumb" href="http://url.of.result/2nd.part.of.url"
title="Behoimi BE Animation Test by test-0, Jan 4,
2010 in Digital Art > Animation"> <i></i>
<img width="200" height="200" alt="Test"
src="http://url.of.thumbnail" data-src="http://th08.deviantart.net/test.jpg">
</a>
</span>
<!-- ^TTT -->
</span>
<span class="details">
<a href="http://test-0.deviantart.com/art/Test" class="t"
title="Behoimi BE Animation Test by test-0, Jan 4, 2010">
<span class="tt-fh-oe">Title of image</span> </a>
<small>
<span class="category">
<span class="age">
5 years ago
</span>
in <a title="Behoimi BE Animation Test by test-0, Jan 4, 2010"
href="http://www.deviantart.com/browse/all/digitalart/animation/">Animation</a>
</span>
<div class="commentcount">
<a href="http://test-0.deviantart.com/art/Test#comments">
<span class="iconcommentsstats"></span>9 Comments</a>
</div>
<a class="mlt-link" href="http://www.deviantart.com/morelikethis/149167425">
<span class="mlt-icon"></span> <span class="mlt-text">More Like This</span> </a>
</span>
</small> <!-- TTT$ -->
"""
response
=
mock
.
Mock
(
text
=
html
)
results
=
deviantart
.
response
(
response
)
self
.
assertEqual
(
type
(
results
),
list
)
self
.
assertEqual
(
len
(
results
),
0
)
tests/unit/engines/test_digg.py
View file @
a51b2b6c
...
...
@@ -14,88 +14,3 @@ class TestDiggEngine(SearxTestCase):
self
.
assertIn
(
'url'
,
params
)
self
.
assertIn
(
query
,
params
[
'url'
])
self
.
assertIn
(
'digg.com'
,
params
[
'url'
])
def
test_response
(
self
):
self
.
assertRaises
(
AttributeError
,
digg
.
response
,
None
)
self
.
assertRaises
(
AttributeError
,
digg
.
response
,
[])
self
.
assertRaises
(
AttributeError
,
digg
.
response
,
''
)
self
.
assertRaises
(
AttributeError
,
digg
.
response
,
'[]'
)
response
=
mock
.
Mock
(
text
=
'{}'
)
self
.
assertEqual
(
digg
.
response
(
response
),
[])
response
=
mock
.
Mock
(
text
=
'{"data": []}'
)
self
.
assertEqual
(
digg
.
response
(
response
),
[])
json
=
"""
{
"status": "ok",
"num": 10,
"next_position": 20,
"html": "<article itemscope itemtype=
\\
"http://schema.org/Article
\\
"
class=
\\
"story-container digg-story-el hentry entry story-1sRANah col-1
\\
"
data-content-id=
\\
"1sRANah
\\
" data-contenturl=
\\
"http://url.of.link
\\
"
data-position=
\\
"0
\\
" data-diggs=
\\
"24
\\
" data-tweets=
\\
"69
\\
"
data-digg-score=
\\
"1190
\\
"> <div class=
\\
"story-image story-image-thumb
\\
">
<a data-position=
\\
"0
\\
" data-content-id=
\\
"1sRANah
\\
"
class=
\\
"story-link
\\
" href=
\\
"http://www.thedailybeast.com/
\\
"
target=
\\
"_blank
\\
"><img class=
\\
"story-image-img
\\
"
src=
\\
"http://url.of.image.jpeg
\\
" width=
\\
"312
\\
" height=
\\
"170
\\
"
alt=
\\
"
\\
" /> </a> </div> <div class=
\\
"story-content
\\
"><header
class=
\\
"story-header
\\
"> <div itemprop=
\\
"alternativeHeadline
\\
"
class=
\\
"story-kicker
\\
" >Kicker</div> <h2 itemprop=
\\
"headline
\\
"
class=
\\
"story-title entry-title
\\
"><a class=
\\
"story-title-link story-link
\\
"
rel=
\\
"bookmark
\\
" itemprop=
\\
"url
\\
" href=
\\
"http://www.thedailybeast.com/
\\
"
target=
\\
"_blank
\\
">Title of article</h2> <div class=
\\
"story-meta
\\
">
<div class=
\\
"story-score
\\
">
<div class=
\\
"story-score-diggscore diggscore-1sRANah
\\
">1190</div>
<div class=
\\
"story-score-details
\\
"> <div class=
\\
"arrow
\\
"></div>
<ul class=
\\
"story-score-details-list
\\
"> <li
class=
\\
"story-score-detail story-score-diggs
\\
"><span
class=
\\
"label
\\
">Diggs:</span> <span class=
\\
"count diggs-1sRANah
\\
">24</span>
</li> <li class=
\\
"story-score-detail story-score-twitter
\\
"><span
class=
\\
"label
\\
">Tweets:</span> <span class=
\\
"count tweets-1sRANah
\\
">69</span>
</li> <li class=
\\
"story-score-detail story-score-facebook
\\
"><span
class=
\\
"label
\\
">Facebook Shares:</span> <span
class=
\\
"count fb_shares-1sRANah
\\
">1097</span></li> </ul> </div> </div>
<span class=
\\
"story-meta-item story-source
\\
"> <a
itemprop=
\\
"publisher copyrightHolder sourceOrganization provider
\\
"
class=
\\
"story-meta-item-link story-source-link
\\
"
href=
\\
"/source/thedailybeast.com
\\
">The Daily Beast </a> </span>
<span class=
\\
"story-meta-item story-tag first-tag
\\
"> <a
itemprop=
\\
"keywords
\\
" rel=
\\
"tag
\\
"
class=
\\
"story-meta-item-link story-tag-link
\\
" href=
\\
"/tag/news
\\
">News</a>
</span> <abbr class=
\\
"published story-meta-item story-timestamp
\\
"
title=
\\
"2014-10-18 14:53:45
\\
"> <time datetime=
\\
"2014-10-18 14:53:45
\\
">18 Oct 2014</time>
</abbr> </div> </header> </div> <ul class=
\\
"story-actions
\\
"> <li
class=
\\
"story-action story-action-digg btn-story-action-container
\\
">
<a class=
\\
"target digg-1sRANah
\\
" href=
\\
"#
\\
">Digg</a></li> <li
class=
\\
"story-action story-action-save btn-story-action-container
\\
">
<a class=
\\
"target save-1sRANah
\\
" href=
\\
"#
\\
">Save</a></li> <li
class=
\\
"story-action story-action-share
\\
"><a
class=
\\
"target share-facebook
\\
" href=
\\
"https://www.facebook.com/
\\
">Facebook</a></li>
<li class=
\\
"story-action story-action-share
\\
"><a class=
\\
"target share-twitter
\\
"
href=
\\
"https://twitter.com/
\\
">Twitter</a></li> </ul> </article>"
}
"""
json
=
json
.
replace
(
'
\r\n
'
,
''
)
.
replace
(
'
\n
'
,
''
)
.
replace
(
'
\r
'
,
''
)
response
=
mock
.
Mock
(
text
=
json
)
results
=
digg
.
response
(
response
)
self
.
assertEqual
(
type
(
results
),
list
)
self
.
assertEqual
(
len
(
results
),
1
)
self
.
assertEqual
(
results
[
0
][
'title'
],
'Title of article'
)
self
.
assertEqual
(
results
[
0
][
'url'
],
'http://url.of.link'
)
self
.
assertEqual
(
results
[
0
][
'thumbnail'
],
'http://url.of.image.jpeg'
)
self
.
assertEqual
(
results
[
0
][
'content'
],
''
)
json
=
"""
{
"status": "error",
"num": 10,
"next_position": 20
}
"""
response
=
mock
.
Mock
(
text
=
json
)
results
=
digg
.
response
(
response
)
self
.
assertEqual
(
type
(
results
),
list
)
self
.
assertEqual
(
len
(
results
),
0
)
tests/unit/engines/test_startpage.py
View file @
a51b2b6c
...
...
@@ -18,12 +18,9 @@ class TestStartpageEngine(SearxTestCase):
self
.
assertIn
(
'data'
,
params
)
self
.
assertIn
(
'query'
,
params
[
'data'
])
self
.
assertIn
(
query
,
params
[
'data'
][
'query'
])
self
.
assertIn
(
'with_language'
,
params
[
'data'
])
self
.
assertIn
(
'lang_fr'
,
params
[
'data'
][
'with_language'
])
dicto
[
'language'
]
=
'all'
params
=
startpage
.
request
(
query
,
dicto
)
self
.
assertNotIn
(
'with_language'
,
params
[
'data'
])
def
test_response
(
self
):
self
.
assertRaises
(
AttributeError
,
startpage
.
response
,
None
)
...
...
@@ -35,33 +32,32 @@ class TestStartpageEngine(SearxTestCase):
self
.
assertEqual
(
startpage
.
response
(
response
),
[])
html
=
"""
<li class="search-result search-item">
<h3>
<a href='http://this.should.be.the.link/' id='title_2' name='title_2' >
This should be the title
<div class="w-gl__result">
<a
class="w-gl__result-title"
href="http://this.should.be.the.link/"
data-onw="1"
rel="noopener noreferrer"
target="_blank">
<h3>This should be the title</h3>
</a>
<span id='title_stars_2' name='title_stars_2'> </span>
</h3>
<p class="search-item__body">
This should be the content.
</p>
<p>
<span class='url'>www.speed<b>test</b>.net/fr/
</span>
-
<A class="proxy" id="proxy_link" HREF="https://ixquick-proxy.com/do/spg/proxy?ep=&edata=&ek=&ekdata="
class='proxy'>
Navigation avec Ixquick Proxy
</A>
-
<A HREF="https://ixquick-proxy.com/do/spg/highlight.pl?l=francais&c=hf&cat=web&q=test&rl=NONE&rid=
&hlq=https://startpage.com/do/search&mtabp=-1&mtcmd=process_search&mtlanguage=francais&mtengine0=
&mtcat=web&u=http:
%2
F
%2
Fwww.speedtest.net
%2
Ffr
%2
F" class='proxy'>
Mis en surbrillance
</A>
</p>
</li>
"""
<div class="w-gl__result-second-line-container">
<div class="w-gl__result-url-container">
<a
class="w-gl__result-url"
href="http://this.should.be.the.link/"
rel="noopener noreferrer"
target="_blank">https://www.cnbc.com/2019/10/12/dj-zedd-banned-in-china-for-liking-a-south-park-tweet.html</a>
</div>
<a
class="w-gl__anonymous-view-url"
href="https://eu-browse.startpage.com/do/proxy?ep=556b554d576b6f5054554546423167764b5445616455554d5342675441774659495246304848774f5267385453304941486b5949546c63704e33774f526b705544565647516d4a61554246304847674f4a556f6957415a4f436b455042426b6b4f7a64535a52784a56514a4f45307743446c567250445a4f4c52514e5677554e46776b4b545563704c7931554c5167465467644f42464d4f4255426f4d693152624634525741305845526c595746636b626d67494e42705743466c515252634f4267456e597a7346596b7856435134465345634f564249794b5752785643315863546769515773764a5163494c5877505246315865456f5141426b4f41774167596d6c5a4e30395758773442465251495677596c624770665a6b786344466b4151455663425249794d6a78525a55554157516f4342556766526b51314b57514e&ek=4q58686o5047786n6343527259445247576p6o38&ekdata=84abd523dc13cba5c65164d04d7d7263"
target="_blank">Anonymous View</a>
</div>
<p class="w-gl__description">This should be the content.</p>
</div>
"""
# noqa
response
=
mock
.
Mock
(
text
=
html
.
encode
(
'utf-8'
))
results
=
startpage
.
response
(
response
)
self
.
assertEqual
(
type
(
results
),
list
)
...
...
@@ -69,72 +65,3 @@ class TestStartpageEngine(SearxTestCase):
self
.
assertEqual
(
results
[
0
][
'title'
],
'This should be the title'
)
self
.
assertEqual
(
results
[
0
][
'url'
],
'http://this.should.be.the.link/'
)
self
.
assertEqual
(
results
[
0
][
'content'
],
'This should be the content.'
)
html
=
"""
<li class="search-result search-item">
<h3>
<a href='http://www.google.com/aclk?sa=l&ai=C' id='title_2' name='title_2' >
This should be the title
</a>
<span id='title_stars_2' name='title_stars_2'> </span>
</h3>
<p class="search-item__body">
This should be the content.
</p>
<p>
<span class='url'>www.speed<b>test</b>.net/fr/
</span>
-
<A class="proxy" id="proxy_link" HREF="https://ixquick-proxy.com/do/spg/proxy?ep=&edata=&ek=&ekdata="
class='proxy'>
Navigation avec Ixquick Proxy
</A>
-
<A HREF="https://ixquick-proxy.com/do/spg/highlight.pl?l=francais&c=hf&cat=web&q=test&rl=NONE&rid=
&hlq=https://startpage.com/do/search&mtabp=-1&mtcmd=process_search&mtlanguage=francais&mtengine0=
&mtcat=web&u=http:
%2
F
%2
Fwww.speedtest.net
%2
Ffr
%2
F" class='proxy'>
Mis en surbrillance
</A>
</p>
</li>
<li class="search-result search-item">
<h3>
<span id='title_stars_2' name='title_stars_2'> </span>
</h3>
<p class="search-item__body">
This should be the content.
</p>
<p>
<span class='url'>www.speed<b>test</b>.net/fr/
</span>
</p>
</li>
<li class="search-result search-item">
<h3>
<a href='http://this.should.be.the.link/' id='title_2' name='title_2' >
This should be the title
</a>
<span id='title_stars_2' name='title_stars_2'> </span>
</h3>
<p>
<span class='url'>www.speed<b>test</b>.net/fr/
</span>
-
<A class="proxy" id="proxy_link" HREF="https://ixquick-proxy.com/do/spg/proxy?ep=&edata=&ek=&ekdata="
class='proxy'>
Navigation avec Ixquick Proxy
</A>
-
<A HREF="https://ixquick-proxy.com/do/spg/highlight.pl?l=francais&c=hf&cat=web&q=test&rl=NONE&rid=
&hlq=https://startpage.com/do/search&mtabp=-1&mtcmd=process_search&mtlanguage=francais&mtengine0=
&mtcat=web&u=http:
%2
F
%2
Fwww.speedtest.net
%2
Ffr
%2
F" class='proxy'>
Mis en surbrillance
</A>
</p>
</li>
"""
response
=
mock
.
Mock
(
text
=
html
.
encode
(
'utf-8'
))
results
=
startpage
.
response
(
response
)
self
.
assertEqual
(
type
(
results
),
list
)
self
.
assertEqual
(
len
(
results
),
1
)
self
.
assertEqual
(
results
[
0
][
'content'
],
''
)
tests/unit/engines/test_www1x.py
View file @
a51b2b6c
...
...
@@ -12,46 +12,3 @@ class TestWww1xEngine(SearxTestCase):
self
.
assertTrue
(
'url'
in
params
)
self
.
assertTrue
(
query
in
params
[
'url'
])
self
.
assertTrue
(
'1x.com'
in
params
[
'url'
])
def
test_response
(
self
):
self
.
assertRaises
(
AttributeError
,
www1x
.
response
,
None
)
self
.
assertRaises
(
AttributeError
,
www1x
.
response
,
[])
self
.
assertRaises
(
AttributeError
,
www1x
.
response
,
''
)
self
.
assertRaises
(
AttributeError
,
www1x
.
response
,
'[]'
)
response
=
mock
.
Mock
(
text
=
'<html></html>'
)
self
.
assertEqual
(
www1x
.
response
(
response
),
[])
html
=
"""
<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE characters
[
<!ELEMENT characters (character*) >
<!ELEMENT character (#PCDATA ) >
<!ENTITY iexcl "¡" >
<!ENTITY cent "¢" >
<!ENTITY pound "£" >
]
><root><searchresult><![CDATA[<table border="0" cellpadding="0" cellspacing="0" width="100
%
">
<tr>
<td style="min-width: 220px;" valign="top">
<div style="font-size: 30px; margin: 0px 0px 20px 0px;">Photos</div>
<div>
<a href="/photo/123456" class="dynamiclink">
<img border="0" class="searchresult" src="/images/user/testimage-123456.jpg" style="width: 125px; height: 120px;">
</a>
<a title="sjoerd lammers street photography" href="/member/sjoerdlammers" class="dynamiclink">
<img border="0" class="searchresult" src="/images/profile/60c48b394c677d2fa4d9e7d263aabf44-square.jpg">
</a>
</div>
</td>
</table>
]]></searchresult></root>
"""
response
=
mock
.
Mock
(
text
=
html
)
results
=
www1x
.
response
(
response
)
self
.
assertEqual
(
type
(
results
),
list
)
self
.
assertEqual
(
len
(
results
),
1
)
self
.
assertEqual
(
results
[
0
][
'url'
],
'https://1x.com/photo/123456'
)
self
.
assertEqual
(
results
[
0
][
'thumbnail_src'
],
'https://1x.com/images/user/testimage-123456.jpg'
)
self
.
assertEqual
(
results
[
0
][
'content'
],
''
)
self
.
assertEqual
(
results
[
0
][
'template'
],
'images.html'
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment