Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
searx-engine
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
searx-engine
Commits
a6f20caf
Commit
a6f20caf
authored
Sep 23, 2019
by
Noémi Ványi
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add initial support for offline engines && command engine
parent
2946c5f1
Changes
19
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
228 additions
and
61 deletions
+228
-61
__init__.py
searx/engines/__init__.py
+16
-12
soundcloud.py
searx/engines/soundcloud.py
+1
-1
wolframalpha_noapi.py
searx/engines/wolframalpha_noapi.py
+1
-1
https_rewrite.py
searx/plugins/https_rewrite.py
+3
-0
oa_doi_rewrite.py
searx/plugins/oa_doi_rewrite.py
+3
-0
tracker_url_remover.py
searx/plugins/tracker_url_remover.py
+3
-0
results.py
searx/results.py
+45
-29
search.py
searx/search.py
+61
-13
style.css
searx/static/themes/courgette/css/style.css
+1
-1
style.less
searx/static/themes/courgette/less/style.less
+4
-0
style.css
searx/static/themes/legacy/css/style.css
+1
-1
style.less
searx/static/themes/legacy/less/style.less
+4
-0
key-value.html
searx/templates/courgette/result_templates/key-value.html
+13
-0
key-value.html
searx/templates/legacy/result_templates/key-value.html
+13
-0
macros.html
searx/templates/oscar/macros.html
+9
-1
key-value.html
searx/templates/oscar/result_templates/key-value.html
+19
-0
key-value.html
searx/templates/simple/result_templates/key-value.html
+11
-0
utils.py
searx/utils.py
+15
-0
webapp.py
searx/webapp.py
+5
-2
No files found.
searx/engines/__init__.py
View file @
a6f20caf
...
...
@@ -27,7 +27,7 @@ from json import loads
from
requests
import
get
from
searx
import
settings
from
searx
import
logger
from
searx.utils
import
load_module
,
match_language
from
searx.utils
import
load_module
,
match_language
,
get_engine_from_settings
logger
=
logger
.
getChild
(
'engines'
)
...
...
@@ -53,7 +53,8 @@ engine_default_args = {'paging': False,
'disabled'
:
False
,
'suspend_end_time'
:
0
,
'continuous_errors'
:
0
,
'time_range_support'
:
False
}
'time_range_support'
:
False
,
'offline'
:
False
}
def
load_engine
(
engine_data
):
...
...
@@ -128,14 +129,17 @@ def load_engine(engine_data):
engine
.
stats
=
{
'result_count'
:
0
,
'search_count'
:
0
,
'page_load_time'
:
0
,
'page_load_count'
:
0
,
'engine_time'
:
0
,
'engine_time_count'
:
0
,
'score_count'
:
0
,
'errors'
:
0
}
if
not
engine
.
offline
:
engine
.
stats
[
'page_load_time'
]
=
0
engine
.
stats
[
'page_load_count'
]
=
0
for
category_name
in
engine
.
categories
:
categories
.
setdefault
(
category_name
,
[])
.
append
(
engine
)
...
...
@@ -173,11 +177,6 @@ def get_engines_stats():
results_num
=
\
engine
.
stats
[
'result_count'
]
/
float
(
engine
.
stats
[
'search_count'
])
if
engine
.
stats
[
'page_load_count'
]
!=
0
:
load_times
=
engine
.
stats
[
'page_load_time'
]
/
float
(
engine
.
stats
[
'page_load_count'
])
# noqa
else
:
load_times
=
0
if
engine
.
stats
[
'engine_time_count'
]
!=
0
:
this_engine_time
=
engine
.
stats
[
'engine_time'
]
/
float
(
engine
.
stats
[
'engine_time_count'
])
# noqa
else
:
...
...
@@ -189,14 +188,19 @@ def get_engines_stats():
else
:
score
=
score_per_result
=
0.0
max_pageload
=
max
(
load_times
,
max_pageload
)
if
not
engine
.
offline
:
load_times
=
0
if
engine
.
stats
[
'page_load_count'
]
!=
0
:
load_times
=
engine
.
stats
[
'page_load_time'
]
/
float
(
engine
.
stats
[
'page_load_count'
])
# noqa
max_pageload
=
max
(
load_times
,
max_pageload
)
pageloads
.
append
({
'avg'
:
load_times
,
'name'
:
engine
.
name
})
max_engine_times
=
max
(
this_engine_time
,
max_engine_times
)
max_results
=
max
(
results_num
,
max_results
)
max_score
=
max
(
score
,
max_score
)
max_score_per_result
=
max
(
score_per_result
,
max_score_per_result
)
max_errors
=
max
(
max_errors
,
engine
.
stats
[
'errors'
])
pageloads
.
append
({
'avg'
:
load_times
,
'name'
:
engine
.
name
})
engine_times
.
append
({
'avg'
:
this_engine_time
,
'name'
:
engine
.
name
})
results
.
append
({
'avg'
:
results_num
,
'name'
:
engine
.
name
})
scores
.
append
({
'avg'
:
score
,
'name'
:
engine
.
name
})
...
...
@@ -255,7 +259,7 @@ def initialize_engines(engine_list):
load_engines
(
engine_list
)
def
engine_init
(
engine_name
,
init_fn
):
init_fn
()
init_fn
(
get_engine_from_settings
(
engine_name
)
)
logger
.
debug
(
'
%
s engine: Initialized'
,
engine_name
)
for
engine_name
,
engine
in
engines
.
items
():
...
...
searx/engines/soundcloud.py
View file @
a6f20caf
...
...
@@ -66,7 +66,7 @@ def get_client_id():
return
""
def
init
():
def
init
(
engine_settings
=
None
):
global
guest_client_id
# api-key
guest_client_id
=
get_client_id
()
...
...
searx/engines/wolframalpha_noapi.py
View file @
a6f20caf
...
...
@@ -55,7 +55,7 @@ def obtain_token():
return
token
def
init
():
def
init
(
engine_settings
=
None
):
obtain_token
()
...
...
searx/plugins/https_rewrite.py
View file @
a6f20caf
...
...
@@ -225,6 +225,9 @@ def https_url_rewrite(result):
def
on_result
(
request
,
search
,
result
):
if
'parsed_url'
not
in
result
:
return
True
if
result
[
'parsed_url'
]
.
scheme
==
'http'
:
https_url_rewrite
(
result
)
return
True
...
...
searx/plugins/oa_doi_rewrite.py
View file @
a6f20caf
...
...
@@ -35,6 +35,9 @@ def get_doi_resolver(args, preference_doi_resolver):
def
on_result
(
request
,
search
,
result
):
if
'parsed_url'
not
in
result
:
return
True
doi
=
extract_doi
(
result
[
'parsed_url'
])
if
doi
and
len
(
doi
)
<
50
:
for
suffix
in
(
'/'
,
'.pdf'
,
'/full'
,
'/meta'
,
'/abstract'
):
...
...
searx/plugins/tracker_url_remover.py
View file @
a6f20caf
...
...
@@ -30,6 +30,9 @@ preference_section = 'privacy'
def
on_result
(
request
,
search
,
result
):
if
'parsed_url'
not
in
result
:
return
True
query
=
result
[
'parsed_url'
]
.
query
if
query
==
""
:
...
...
searx/results.py
View file @
a6f20caf
...
...
@@ -197,6 +197,13 @@ class ResultContainer(object):
self
.
infoboxes
.
append
(
infobox
)
def
_merge_result
(
self
,
result
,
position
):
if
'url'
in
result
:
self
.
__merge_url_result
(
result
,
position
)
return
self
.
__merge_result_no_url
(
result
,
position
)
def
__merge_url_result
(
self
,
result
,
position
):
result
[
'parsed_url'
]
=
urlparse
(
result
[
'url'
])
# if the result has no scheme, use http as default
...
...
@@ -210,51 +217,60 @@ class ResultContainer(object):
if
result
.
get
(
'content'
):
result
[
'content'
]
=
WHITESPACE_REGEX
.
sub
(
' '
,
result
[
'content'
])
# check for duplicates
duplicated
=
False
duplicated
=
self
.
__find_duplicated_http_result
(
result
)
if
duplicated
:
self
.
__merge_duplicated_http_result
(
duplicated
,
result
,
position
)
return
# if there is no duplicate found, append result
result
[
'positions'
]
=
[
position
]
with
RLock
():
self
.
_merged_results
.
append
(
result
)
def
__find_duplicated_http_result
(
self
,
result
):
result_template
=
result
.
get
(
'template'
)
for
merged_result
in
self
.
_merged_results
:
if
'parsed_url'
not
in
merged_result
:
continue
if
compare_urls
(
result
[
'parsed_url'
],
merged_result
[
'parsed_url'
])
\
and
result_template
==
merged_result
.
get
(
'template'
):
if
result_template
!=
'images.html'
:
# not an image, same template, same url : it's a duplicate
duplicated
=
merged_result
break
return
merged_result
else
:
# it's an image
# it's a duplicate if the parsed_url, template and img_src are differents
if
result
.
get
(
'img_src'
,
''
)
==
merged_result
.
get
(
'img_src'
,
''
):
duplicated
=
merged_result
break
return
merged_result
return
None
# merge duplicates together
if
duplicated
:
# using content with more text
if
result_content_len
(
result
.
get
(
'content'
,
''
))
>
\
result_content_len
(
duplicated
.
get
(
'content'
,
''
)):
duplicated
[
'content'
]
=
result
[
'content'
]
def
__merge_duplicated_http_result
(
self
,
duplicated
,
result
,
position
):
# using content with more text
if
result_content_len
(
result
.
get
(
'content'
,
''
))
>
\
result_content_len
(
duplicated
.
get
(
'content'
,
''
)):
duplicated
[
'content'
]
=
result
[
'content'
]
# merge all result's parameters not found in duplicate
for
key
in
result
.
keys
():
if
not
duplicated
.
get
(
key
):
duplicated
[
key
]
=
result
.
get
(
key
)
# merge all result's parameters not found in duplicate
for
key
in
result
.
keys
():
if
not
duplicated
.
get
(
key
):
duplicated
[
key
]
=
result
.
get
(
key
)
# add the new position
duplicated
[
'positions'
]
.
append
(
position
)
# add the new position
duplicated
[
'positions'
]
.
append
(
position
)
# add engine to list of result-engines
duplicated
[
'engines'
]
.
add
(
result
[
'engine'
])
# add engine to list of result-engines
duplicated
[
'engines'
]
.
add
(
result
[
'engine'
])
# using https if possible
if
duplicated
[
'parsed_url'
]
.
scheme
!=
'https'
and
result
[
'parsed_url'
]
.
scheme
==
'https'
:
duplicated
[
'url'
]
=
result
[
'parsed_url'
]
.
geturl
()
duplicated
[
'parsed_url'
]
=
result
[
'parsed_url'
]
# using https if possible
if
duplicated
[
'parsed_url'
]
.
scheme
!=
'https'
and
result
[
'parsed_url'
]
.
scheme
==
'https'
:
duplicated
[
'url'
]
=
result
[
'parsed_url'
]
.
geturl
()
duplicated
[
'parsed_url'
]
=
result
[
'parsed_url'
]
# if there is no duplicate found, append result
else
:
result
[
'positions'
]
=
[
position
]
with
RLock
():
self
.
_merged_results
.
append
(
result
)
def
__merge_result_no_url
(
self
,
result
,
position
):
result
[
'engines'
]
=
set
([
result
[
'engine'
]])
result
[
'positions'
]
=
[
position
]
with
RLock
():
self
.
_merged_results
.
append
(
result
)
def
order_results
(
self
):
for
result
in
self
.
_merged_results
:
...
...
searx/search.py
View file @
a6f20caf
...
...
@@ -77,7 +77,7 @@ def send_http_request(engine, request_params):
return
req
(
request_params
[
'url'
],
**
request_args
)
def
search_one_request
(
engine
,
query
,
request_params
):
def
search_one_
http_
request
(
engine
,
query
,
request_params
):
# update request parameters dependent on
# search-engine (contained in engines folder)
engine
.
request
(
query
,
request_params
)
...
...
@@ -97,7 +97,53 @@ def search_one_request(engine, query, request_params):
return
engine
.
response
(
response
)
def
search_one_offline_request
(
engine
,
query
,
request_params
):
return
engine
.
search
(
query
,
request_params
)
def
search_one_request_safe
(
engine_name
,
query
,
request_params
,
result_container
,
start_time
,
timeout_limit
):
if
engines
[
engine_name
]
.
offline
:
return
search_one_offline_request_safe
(
engine_name
,
query
,
request_params
,
result_container
,
start_time
,
timeout_limit
)
return
search_one_http_request_safe
(
engine_name
,
query
,
request_params
,
result_container
,
start_time
,
timeout_limit
)
def
search_one_offline_request_safe
(
engine_name
,
query
,
request_params
,
result_container
,
start_time
,
timeout_limit
):
engine
=
engines
[
engine_name
]
try
:
search_results
=
search_one_offline_request
(
engine
,
query
,
request_params
)
if
search_results
:
result_container
.
extend
(
engine_name
,
search_results
)
engine_time
=
time
()
-
start_time
result_container
.
add_timing
(
engine_name
,
engine_time
,
engine_time
)
with
threading
.
RLock
():
engine
.
stats
[
'engine_time'
]
+=
engine_time
engine
.
stats
[
'engine_time_count'
]
+=
1
except
ValueError
as
e
:
record_offline_engine_stats_on_error
(
engine
,
result_container
,
start_time
)
logger
.
exception
(
'engine {0} : invalid input : {1}'
.
format
(
engine_name
,
e
))
except
Exception
as
e
:
record_offline_engine_stats_on_error
(
engine
,
result_container
,
start_time
)
result_container
.
add_unresponsive_engine
((
engine_name
,
u'{0}: {1}'
.
format
(
gettext
(
'unexpected crash'
),
e
),
))
logger
.
exception
(
'engine {0} : exception : {1}'
.
format
(
engine_name
,
e
))
def
record_offline_engine_stats_on_error
(
engine
,
result_container
,
start_time
):
engine_time
=
time
()
-
start_time
result_container
.
add_timing
(
engine
.
name
,
engine_time
,
engine_time
)
with
threading
.
RLock
():
engine
.
stats
[
'errors'
]
+=
1
def
search_one_http_request_safe
(
engine_name
,
query
,
request_params
,
result_container
,
start_time
,
timeout_limit
):
# set timeout for all HTTP requests
requests_lib
.
set_timeout_for_thread
(
timeout_limit
,
start_time
=
start_time
)
# reset the HTTP total time
...
...
@@ -111,7 +157,7 @@ def search_one_request_safe(engine_name, query, request_params, result_container
try
:
# send requests and parse the results
search_results
=
search_one_request
(
engine
,
query
,
request_params
)
search_results
=
search_one_
http_
request
(
engine
,
query
,
request_params
)
# check if the engine accepted the request
if
search_results
is
not
None
:
...
...
@@ -427,19 +473,21 @@ class Search(object):
continue
# set default request parameters
request_params
=
default_request_params
()
request_params
[
'headers'
][
'User-Agent'
]
=
user_agent
request_params
[
'category'
]
=
selected_engine
[
'category'
]
request_params
[
'pageno'
]
=
search_query
.
pageno
request_params
=
{}
if
not
engine
.
offline
:
request_params
=
default_request_params
()
request_params
[
'headers'
][
'User-Agent'
]
=
user_agent
if
hasattr
(
engine
,
'language'
)
and
engine
.
language
:
request_params
[
'language'
]
=
engine
.
language
else
:
request_params
[
'language'
]
=
search_query
.
lang
if
hasattr
(
engine
,
'language'
)
and
engine
.
language
:
request_params
[
'language'
]
=
engine
.
language
else
:
request_params
[
'language'
]
=
search_query
.
lang
# 0 = None, 1 = Moderate, 2 = Strict
request_params
[
'safesearch'
]
=
search_query
.
safesearch
request_params
[
'time_range'
]
=
search_query
.
time_range
request_params
[
'safesearch'
]
=
search_query
.
safesearch
request_params
[
'time_range'
]
=
search_query
.
time_range
request_params
[
'category'
]
=
selected_engine
[
'category'
]
request_params
[
'pageno'
]
=
search_query
.
pageno
# append request to list
requests
.
append
((
selected_engine
[
'name'
],
search_query
.
query
,
request_params
))
...
...
searx/static/themes/courgette/css/style.css
View file @
a6f20caf
This diff is collapsed.
Click to expand it.
searx/static/themes/courgette/less/style.less
View file @
a6f20caf
...
...
@@ -325,6 +325,10 @@ a {
font-size: 0.9em;
}
.result .engines {
text-align: right;
}
.result .content {
margin: 0;
color: #666;
...
...
searx/static/themes/legacy/css/style.css
View file @
a6f20caf
This diff is collapsed.
Click to expand it.
searx/static/themes/legacy/less/style.less
View file @
a6f20caf
...
...
@@ -376,6 +376,10 @@ table {
width: 100%;
}
.result-table {
margin-bottom: 10px;
}
td {
padding: 0 4px;
}
...
...
searx/templates/courgette/result_templates/key-value.html
0 → 100644
View file @
a6f20caf
<div
class=
"result"
>
<table>
{% for key, value in result.items() %}
{% if key in ['engine', 'engines', 'template', 'score', 'category', 'positions'] %}
{% continue %}
{% endif %}
<tr>
<td><b>
{{ key|upper }}
</b>
: {{ value|safe }}
</td>
</tr>
{% endfor %}
</table>
<p
class=
"engines"
>
{{ result.engines|join(', ') }}
</p>
</div>
searx/templates/legacy/result_templates/key-value.html
0 → 100644
View file @
a6f20caf
<table
class=
"result-table"
>
{% for key, value in result.items() %}
{% if key in ['engine', 'engines', 'template', 'score', 'category', 'positions'] %}
{% continue %}
{% endif %}
<tr>
<td><b>
{{ key|upper }}
</b>
: {{ value|safe }}
</td>
</tr>
{% endfor %}
<tr>
<td><b>
ENGINES
</b>
: {{ result.engines|join(', ') }}
</td>
</tr>
</table>
searx/templates/oscar/macros.html
View file @
a6f20caf
...
...
@@ -14,7 +14,7 @@
<!-- Draw result header -->
{% macro result_header(result, favicons) -%}
<h4
class=
"result_header"
>
{% if result.engine~".png" in favicons %}{{ draw_favicon(result.engine) }} {% endif %}{
{ result_link(result.url, result.title|safe) }
}
</h4>
<h4
class=
"result_header"
>
{% if result.engine~".png" in favicons %}{{ draw_favicon(result.engine) }} {% endif %}{
% if result.url %}{{ result_link(result.url, result.title|safe) }}{% else %}{{ result.title|safe}}{% endif %
}
</h4>
{%- endmacro %}
<!-- Draw result sub header -->
...
...
@@ -31,12 +31,16 @@
{% for engine in result.engines %}
<span
class=
"label label-default"
>
{{ engine }}
</span>
{% endfor %}
{% if result.url %}
<small>
{{ result_link("https://web.archive.org/web/" + result.url, icon('link') + _('cached'), "text-info") }}
</small>
{% endif %}
{% if proxify %}
<small>
{{ result_link(proxify(result.url), icon('sort') + _('proxied'), "text-info") }}
</small>
{% endif %}
</div>
{% if result.pretty_url %}
<div
class=
"external-link"
>
{{ result.pretty_url }}
</div>
{% endif %}
{%- endmacro %}
<!-- Draw result footer -->
...
...
@@ -45,11 +49,15 @@
{% for engine in result.engines %}
<span
class=
"label label-default"
>
{{ engine }}
</span>
{% endfor %}
{% if result.url %}
<small>
{{ result_link("https://web.archive.org/web/" + result.url, icon('link') + _('cached'), "text-info") }}
</small>
{% endif %}
{% if proxify %}
<small>
{{ result_link(proxify(result.url), icon('sort') + _('proxied'), "text-info") }}
</small>
{% endif %}
{% if result.pretty_url %}
<div
class=
"external-link"
>
{{ result.pretty_url }}
</div>
{% endif %}
{%- endmacro %}
{% macro preferences_item_header(info, label, rtl) -%}
...
...
searx/templates/oscar/result_templates/key-value.html
0 → 100644
View file @
a6f20caf
{% from 'oscar/macros.html' import result_footer, result_footer_rtl with context %}
<div
class=
"panel panel-default"
>
<table
class=
"table table-responsive table-bordered table-condensed"
>
{% for key, value in result.items() %}
{% if key in ['engine', 'engines', 'template', 'score', 'category', 'positions'] %}
{% continue %}
{% endif %}
<tr>
<td><b>
{{ key|upper }}
</b>
: {{ value }}
</td>
</tr>
{% endfor %}
</table>
{% if rtl %}
{{ result_footer_rtl(result) }}
{% else %}
{{ result_footer(result) }}
{% endif %}
</div>
searx/templates/simple/result_templates/key-value.html
0 → 100644
View file @
a6f20caf
<table>
{% for key, value in result.items() %}
{% if key in ['engine', 'engines', 'template', 'score', 'category', 'positions'] %}
{% continue %}
{% endif %}
<tr>
<td><b>
{{ key|upper }}
</b>
: {{ value }}
</td>
</tr>
{% endfor %}
</table>
<div
class=
"engines"
>
{% for engine in result.engines %}
<span>
{{ engine }}
</span>
{% endfor %}
</div>
{{- '' -}}
searx/utils.py
View file @
a6f20caf
...
...
@@ -435,3 +435,18 @@ def ecma_unescape(s):
# "%20" becomes " ", "%F3" becomes "ó"
s
=
ecma_unescape2_re
.
sub
(
lambda
e
:
unichr
(
int
(
e
.
group
(
1
),
16
)),
s
)
return
s
def
get_engine_from_settings
(
name
):
"""Return engine configuration from settings.yml of a given engine name"""
if
'engines'
not
in
settings
:
return
{}
for
engine
in
settings
[
'engines'
]:
if
'name'
not
in
engine
:
continue
if
name
==
engine
[
'name'
]:
return
engine
return
{}
searx/webapp.py
View file @
a6f20caf
...
...
@@ -124,6 +124,7 @@ app = Flask(
app
.
jinja_env
.
trim_blocks
=
True
app
.
jinja_env
.
lstrip_blocks
=
True
app
.
jinja_env
.
add_extension
(
'jinja2.ext.loopcontrols'
)
app
.
secret_key
=
settings
[
'server'
][
'secret_key'
]
if
not
searx_debug
\
...
...
@@ -538,14 +539,16 @@ def index():
if
output_format
==
'html'
:
if
'content'
in
result
and
result
[
'content'
]:
result
[
'content'
]
=
highlight_content
(
escape
(
result
[
'content'
][:
1024
]),
search_query
.
query
)
result
[
'title'
]
=
highlight_content
(
escape
(
result
[
'title'
]
or
u''
),
search_query
.
query
)
if
'title'
in
result
and
result
[
'title'
]:
result
[
'title'
]
=
highlight_content
(
escape
(
result
[
'title'
]
or
u''
),
search_query
.
query
)
else
:
if
result
.
get
(
'content'
):
result
[
'content'
]
=
html_to_text
(
result
[
'content'
])
.
strip
()
# removing html content and whitespace duplications
result
[
'title'
]
=
' '
.
join
(
html_to_text
(
result
[
'title'
])
.
strip
()
.
split
())
result
[
'pretty_url'
]
=
prettify_url
(
result
[
'url'
])
if
'url'
in
result
:
result
[
'pretty_url'
]
=
prettify_url
(
result
[
'url'
])
# TODO, check if timezone is calculated right
if
'publishedDate'
in
result
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment