Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
searx-engine
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
searx-engine
Commits
731e3429
Unverified
Commit
731e3429
authored
Dec 02, 2019
by
Adam Tauber
Committed by
GitHub
Dec 02, 2019
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1744 from dalf/optimizations
[mod] speed optimization
parents
574cb25a
85b37233
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
106 additions
and
103 deletions
+106
-103
bing.py
searx/engines/bing.py
+10
-10
dictzone.py
searx/engines/dictzone.py
+4
-4
doku.py
searx/engines/doku.py
+8
-7
duckduckgo.py
searx/engines/duckduckgo.py
+5
-5
duden.py
searx/engines/duden.py
+8
-7
gigablast.py
searx/engines/gigablast.py
+3
-2
google.py
searx/engines/google.py
+16
-16
startpage.py
searx/engines/startpage.py
+5
-4
wikidata.py
searx/engines/wikidata.py
+1
-17
xpath.py
searx/engines/xpath.py
+10
-10
yahoo.py
searx/engines/yahoo.py
+9
-9
results.py
searx/results.py
+5
-3
utils.py
searx/utils.py
+15
-0
webapp.py
searx/webapp.py
+7
-9
No files found.
searx/engines/bing.py
View file @
731e3429
...
...
@@ -18,7 +18,7 @@ from lxml import html
from
searx
import
logger
,
utils
from
searx.engines.xpath
import
extract_text
from
searx.url_utils
import
urlencode
from
searx.utils
import
match_language
,
gen_useragent
from
searx.utils
import
match_language
,
gen_useragent
,
eval_xpath
logger
=
logger
.
getChild
(
'bing engine'
)
...
...
@@ -65,11 +65,11 @@ def response(resp):
dom
=
html
.
fromstring
(
resp
.
text
)
# parse results
for
result
in
dom
.
xpath
(
'//div[@class="sa_cc"]'
):
link
=
result
.
xpath
(
'.//h3/a'
)[
0
]
for
result
in
eval_xpath
(
dom
,
'//div[@class="sa_cc"]'
):
link
=
eval_xpath
(
result
,
'.//h3/a'
)[
0
]
url
=
link
.
attrib
.
get
(
'href'
)
title
=
extract_text
(
link
)
content
=
extract_text
(
result
.
xpath
(
'.//p'
))
content
=
extract_text
(
eval_xpath
(
result
,
'.//p'
))
# append result
results
.
append
({
'url'
:
url
,
...
...
@@ -77,11 +77,11 @@ def response(resp):
'content'
:
content
})
# parse results again if nothing is found yet
for
result
in
dom
.
xpath
(
'//li[@class="b_algo"]'
):
link
=
result
.
xpath
(
'.//h2/a'
)[
0
]
for
result
in
eval_xpath
(
dom
,
'//li[@class="b_algo"]'
):
link
=
eval_xpath
(
result
,
'.//h2/a'
)[
0
]
url
=
link
.
attrib
.
get
(
'href'
)
title
=
extract_text
(
link
)
content
=
extract_text
(
result
.
xpath
(
'.//p'
))
content
=
extract_text
(
eval_xpath
(
result
,
'.//p'
))
# append result
results
.
append
({
'url'
:
url
,
...
...
@@ -89,7 +89,7 @@ def response(resp):
'content'
:
content
})
try
:
result_len_container
=
""
.
join
(
dom
.
xpath
(
'//span[@class="sb_count"]/text()'
))
result_len_container
=
""
.
join
(
eval_xpath
(
dom
,
'//span[@class="sb_count"]/text()'
))
result_len_container
=
utils
.
to_string
(
result_len_container
)
if
"-"
in
result_len_container
:
# Remove the part "from-to" for paginated request ...
...
...
@@ -113,9 +113,9 @@ def response(resp):
def
_fetch_supported_languages
(
resp
):
supported_languages
=
[]
dom
=
html
.
fromstring
(
resp
.
text
)
options
=
dom
.
xpath
(
'//div[@id="limit-languages"]//input'
)
options
=
eval_xpath
(
dom
,
'//div[@id="limit-languages"]//input'
)
for
option
in
options
:
code
=
option
.
xpath
(
'./@id'
)[
0
]
.
replace
(
'_'
,
'-'
)
code
=
eval_xpath
(
option
,
'./@id'
)[
0
]
.
replace
(
'_'
,
'-'
)
if
code
==
'nb'
:
code
=
'no'
supported_languages
.
append
(
code
)
...
...
searx/engines/dictzone.py
View file @
731e3429
...
...
@@ -11,7 +11,7 @@
import
re
from
lxml
import
html
from
searx.utils
import
is_valid_lang
from
searx.utils
import
is_valid_lang
,
eval_xpath
from
searx.url_utils
import
urljoin
categories
=
[
'general'
]
...
...
@@ -47,14 +47,14 @@ def response(resp):
dom
=
html
.
fromstring
(
resp
.
text
)
for
k
,
result
in
enumerate
(
dom
.
xpath
(
results_xpath
)[
1
:]):
for
k
,
result
in
enumerate
(
eval_xpath
(
dom
,
results_xpath
)[
1
:]):
try
:
from_result
,
to_results_raw
=
result
.
xpath
(
'./td'
)
from_result
,
to_results_raw
=
eval_xpath
(
result
,
'./td'
)
except
:
continue
to_results
=
[]
for
to_result
in
to_results_raw
.
xpath
(
'./p/a'
):
for
to_result
in
eval_xpath
(
to_results_raw
,
'./p/a'
):
t
=
to_result
.
text_content
()
if
t
.
strip
():
to_results
.
append
(
to_result
.
text_content
())
...
...
searx/engines/doku.py
View file @
731e3429
...
...
@@ -11,6 +11,7 @@
from
lxml.html
import
fromstring
from
searx.engines.xpath
import
extract_text
from
searx.utils
import
eval_xpath
from
searx.url_utils
import
urlencode
# engine dependent config
...
...
@@ -45,16 +46,16 @@ def response(resp):
# parse results
# Quickhits
for
r
in
doc
.
xpath
(
'//div[@class="search_quickresult"]/ul/li'
):
for
r
in
eval_xpath
(
doc
,
'//div[@class="search_quickresult"]/ul/li'
):
try
:
res_url
=
r
.
xpath
(
'.//a[@class="wikilink1"]/@href'
)[
-
1
]
res_url
=
eval_xpath
(
r
,
'.//a[@class="wikilink1"]/@href'
)[
-
1
]
except
:
continue
if
not
res_url
:
continue
title
=
extract_text
(
r
.
xpath
(
'.//a[@class="wikilink1"]/@title'
))
title
=
extract_text
(
eval_xpath
(
r
,
'.//a[@class="wikilink1"]/@title'
))
# append result
results
.
append
({
'title'
:
title
,
...
...
@@ -62,13 +63,13 @@ def response(resp):
'url'
:
base_url
+
res_url
})
# Search results
for
r
in
doc
.
xpath
(
'//dl[@class="search_results"]/*'
):
for
r
in
eval_xpath
(
doc
,
'//dl[@class="search_results"]/*'
):
try
:
if
r
.
tag
==
"dt"
:
res_url
=
r
.
xpath
(
'.//a[@class="wikilink1"]/@href'
)[
-
1
]
title
=
extract_text
(
r
.
xpath
(
'.//a[@class="wikilink1"]/@title'
))
res_url
=
eval_xpath
(
r
,
'.//a[@class="wikilink1"]/@href'
)[
-
1
]
title
=
extract_text
(
eval_xpath
(
r
,
'.//a[@class="wikilink1"]/@title'
))
elif
r
.
tag
==
"dd"
:
content
=
extract_text
(
r
.
xpath
(
'.'
))
content
=
extract_text
(
eval_xpath
(
r
,
'.'
))
# append result
results
.
append
({
'title'
:
title
,
...
...
searx/engines/duckduckgo.py
View file @
731e3429
...
...
@@ -18,7 +18,7 @@ from json import loads
from
searx.engines.xpath
import
extract_text
from
searx.poolrequests
import
get
from
searx.url_utils
import
urlencode
from
searx.utils
import
match_language
from
searx.utils
import
match_language
,
eval_xpath
# engine dependent config
categories
=
[
'general'
]
...
...
@@ -106,19 +106,19 @@ def response(resp):
doc
=
fromstring
(
resp
.
text
)
# parse results
for
i
,
r
in
enumerate
(
doc
.
xpath
(
result_xpath
)):
for
i
,
r
in
enumerate
(
eval_xpath
(
doc
,
result_xpath
)):
if
i
>=
30
:
break
try
:
res_url
=
r
.
xpath
(
url_xpath
)[
-
1
]
res_url
=
eval_xpath
(
r
,
url_xpath
)[
-
1
]
except
:
continue
if
not
res_url
:
continue
title
=
extract_text
(
r
.
xpath
(
title_xpath
))
content
=
extract_text
(
r
.
xpath
(
content_xpath
))
title
=
extract_text
(
eval_xpath
(
r
,
title_xpath
))
content
=
extract_text
(
eval_xpath
(
r
,
content_xpath
))
# append result
results
.
append
({
'title'
:
title
,
...
...
searx/engines/duden.py
View file @
731e3429
...
...
@@ -11,6 +11,7 @@
from
lxml
import
html
,
etree
import
re
from
searx.engines.xpath
import
extract_text
from
searx.utils
import
eval_xpath
from
searx.url_utils
import
quote
,
urljoin
from
searx
import
logger
...
...
@@ -52,9 +53,9 @@ def response(resp):
dom
=
html
.
fromstring
(
resp
.
text
)
try
:
number_of_results_string
=
re
.
sub
(
'[^0-9]'
,
''
,
dom
.
xpath
(
'//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()'
)[
0
]
)
number_of_results_string
=
\
re
.
sub
(
'[^0-9]'
,
''
,
eval_xpath
(
dom
,
'//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()'
)[
0
]
)
results
.
append
({
'number_of_results'
:
int
(
number_of_results_string
)})
...
...
@@ -62,12 +63,12 @@ def response(resp):
logger
.
debug
(
"Couldn't read number of results."
)
pass
for
result
in
dom
.
xpath
(
'//section[not(contains(@class, "essay"))]'
):
for
result
in
eval_xpath
(
dom
,
'//section[not(contains(@class, "essay"))]'
):
try
:
url
=
result
.
xpath
(
'.//h2/a'
)[
0
]
.
get
(
'href'
)
url
=
eval_xpath
(
result
,
'.//h2/a'
)[
0
]
.
get
(
'href'
)
url
=
urljoin
(
base_url
,
url
)
title
=
result
.
xpath
(
'string(.//h2/a)'
)
.
strip
()
content
=
extract_text
(
result
.
xpath
(
'.//p'
))
title
=
eval_xpath
(
result
,
'string(.//h2/a)'
)
.
strip
()
content
=
extract_text
(
eval_xpath
(
result
,
'.//p'
))
# append result
results
.
append
({
'url'
:
url
,
'title'
:
title
,
...
...
searx/engines/gigablast.py
View file @
731e3429
...
...
@@ -15,6 +15,7 @@ from json import loads
from
time
import
time
from
lxml.html
import
fromstring
from
searx.url_utils
import
urlencode
from
searx.utils
import
eval_xpath
# engine dependent config
categories
=
[
'general'
]
...
...
@@ -99,9 +100,9 @@ def response(resp):
def
_fetch_supported_languages
(
resp
):
supported_languages
=
[]
dom
=
fromstring
(
resp
.
text
)
links
=
dom
.
xpath
(
'//span[@id="menu2"]/a'
)
links
=
eval_xpath
(
dom
,
'//span[@id="menu2"]/a'
)
for
link
in
links
:
href
=
link
.
xpath
(
'./@href'
)[
0
]
.
split
(
'lang
%3
A'
)
href
=
eval_xpath
(
link
,
'./@href'
)[
0
]
.
split
(
'lang
%3
A'
)
if
len
(
href
)
==
2
:
code
=
href
[
1
]
.
split
(
'_'
)
if
len
(
code
)
==
2
:
...
...
searx/engines/google.py
View file @
731e3429
...
...
@@ -14,7 +14,7 @@ from lxml import html, etree
from
searx.engines.xpath
import
extract_text
,
extract_url
from
searx
import
logger
from
searx.url_utils
import
urlencode
,
urlparse
,
parse_qsl
from
searx.utils
import
match_language
from
searx.utils
import
match_language
,
eval_xpath
logger
=
logger
.
getChild
(
'google engine'
)
...
...
@@ -156,7 +156,7 @@ def parse_url(url_string, google_hostname):
# returns extract_text on the first result selected by the xpath or None
def
extract_text_from_dom
(
result
,
xpath
):
r
=
result
.
xpath
(
xpath
)
r
=
eval_xpath
(
result
,
xpath
)
if
len
(
r
)
>
0
:
return
extract_text
(
r
[
0
])
return
None
...
...
@@ -227,21 +227,21 @@ def response(resp):
# convert the text to dom
dom
=
html
.
fromstring
(
resp
.
text
)
instant_answer
=
dom
.
xpath
(
'//div[@id="_vBb"]//text()'
)
instant_answer
=
eval_xpath
(
dom
,
'//div[@id="_vBb"]//text()'
)
if
instant_answer
:
results
.
append
({
'answer'
:
u' '
.
join
(
instant_answer
)})
try
:
results_num
=
int
(
dom
.
xpath
(
'//div[@id="resultStats"]//text()'
)[
0
]
results_num
=
int
(
eval_xpath
(
dom
,
'//div[@id="resultStats"]//text()'
)[
0
]
.
split
()[
1
]
.
replace
(
','
,
''
))
results
.
append
({
'number_of_results'
:
results_num
})
except
:
pass
# parse results
for
result
in
dom
.
xpath
(
results_xpath
):
for
result
in
eval_xpath
(
dom
,
results_xpath
):
try
:
title
=
extract_text
(
result
.
xpath
(
title_xpath
)[
0
])
url
=
parse_url
(
extract_url
(
result
.
xpath
(
url_xpath
),
google_url
),
google_hostname
)
title
=
extract_text
(
eval_xpath
(
result
,
title_xpath
)[
0
])
url
=
parse_url
(
extract_url
(
eval_xpath
(
result
,
url_xpath
),
google_url
),
google_hostname
)
parsed_url
=
urlparse
(
url
,
google_hostname
)
# map result
...
...
@@ -250,7 +250,7 @@ def response(resp):
continue
# if parsed_url.path.startswith(maps_path) or parsed_url.netloc.startswith(map_hostname_start):
# print "yooooo"*30
# x =
result.xpath(
map_near)
# x =
eval_xpath(result,
map_near)
# if len(x) > 0:
# # map : near the location
# results = results + parse_map_near(parsed_url, x, google_hostname)
...
...
@@ -287,11 +287,11 @@ def response(resp):
continue
# parse suggestion
for
suggestion
in
dom
.
xpath
(
suggestion_xpath
):
for
suggestion
in
eval_xpath
(
dom
,
suggestion_xpath
):
# append suggestion
results
.
append
({
'suggestion'
:
extract_text
(
suggestion
)})
for
correction
in
dom
.
xpath
(
spelling_suggestion_xpath
):
for
correction
in
eval_xpath
(
dom
,
spelling_suggestion_xpath
):
results
.
append
({
'correction'
:
extract_text
(
correction
)})
# return results
...
...
@@ -300,9 +300,9 @@ def response(resp):
def
parse_images
(
result
,
google_hostname
):
results
=
[]
for
image
in
result
.
xpath
(
images_xpath
):
url
=
parse_url
(
extract_text
(
image
.
xpath
(
image_url_xpath
)[
0
]),
google_hostname
)
img_src
=
extract_text
(
image
.
xpath
(
image_img_src_xpath
)[
0
])
for
image
in
eval_xpath
(
result
,
images_xpath
):
url
=
parse_url
(
extract_text
(
eval_xpath
(
image
,
image_url_xpath
)[
0
]),
google_hostname
)
img_src
=
extract_text
(
eval_xpath
(
image
,
image_img_src_xpath
)[
0
])
# append result
results
.
append
({
'url'
:
url
,
...
...
@@ -389,10 +389,10 @@ def attributes_to_html(attributes):
def
_fetch_supported_languages
(
resp
):
supported_languages
=
{}
dom
=
html
.
fromstring
(
resp
.
text
)
options
=
dom
.
xpath
(
'//*[@id="langSec"]//input[@name="lr"]'
)
options
=
eval_xpath
(
dom
,
'//*[@id="langSec"]//input[@name="lr"]'
)
for
option
in
options
:
code
=
option
.
xpath
(
'./@value'
)[
0
]
.
split
(
'_'
)[
-
1
]
name
=
option
.
xpath
(
'./@data-name'
)[
0
]
.
title
()
code
=
eval_xpath
(
option
,
'./@value'
)[
0
]
.
split
(
'_'
)[
-
1
]
name
=
eval_xpath
(
option
,
'./@data-name'
)[
0
]
.
title
()
supported_languages
[
code
]
=
{
"name"
:
name
}
return
supported_languages
searx/engines/startpage.py
View file @
731e3429
...
...
@@ -16,6 +16,7 @@ from datetime import datetime, timedelta
import
re
from
searx.engines.xpath
import
extract_text
from
searx.languages
import
language_codes
from
searx.utils
import
eval_xpath
# engine dependent config
categories
=
[
'general'
]
...
...
@@ -70,8 +71,8 @@ def response(resp):
dom
=
html
.
fromstring
(
resp
.
text
)
# parse results
for
result
in
dom
.
xpath
(
results_xpath
):
links
=
result
.
xpath
(
link_xpath
)
for
result
in
eval_xpath
(
dom
,
results_xpath
):
links
=
eval_xpath
(
result
,
link_xpath
)
if
not
links
:
continue
link
=
links
[
0
]
...
...
@@ -87,8 +88,8 @@ def response(resp):
title
=
extract_text
(
link
)
if
result
.
xpath
(
content_xpath
):
content
=
extract_text
(
result
.
xpath
(
content_xpath
))
if
eval_xpath
(
result
,
content_xpath
):
content
=
extract_text
(
eval_xpath
(
result
,
content_xpath
))
else
:
content
=
''
...
...
searx/engines/wikidata.py
View file @
731e3429
...
...
@@ -16,7 +16,7 @@ from searx.poolrequests import get
from
searx.engines.xpath
import
extract_text
from
searx.engines.wikipedia
import
_fetch_supported_languages
,
supported_languages_url
from
searx.url_utils
import
urlencode
from
searx.utils
import
match_language
from
searx.utils
import
match_language
,
eval_xpath
from
json
import
loads
from
lxml.html
import
fromstring
...
...
@@ -57,22 +57,6 @@ language_fallback_xpath = '//sup[contains(@class,"wb-language-fallback-indicator
calendar_name_xpath
=
'.//sup[contains(@class,"wb-calendar-name")]'
media_xpath
=
value_xpath
+
'//div[contains(@class,"commons-media-caption")]//a'
# xpath_cache
xpath_cache
=
{}
def
get_xpath
(
xpath_str
):
result
=
xpath_cache
.
get
(
xpath_str
,
None
)
if
not
result
:
result
=
etree
.
XPath
(
xpath_str
)
xpath_cache
[
xpath_str
]
=
result
return
result
def
eval_xpath
(
element
,
xpath_str
):
xpath
=
get_xpath
(
xpath_str
)
return
xpath
(
element
)
def
get_id_cache
(
result
):
id_cache
=
{}
...
...
searx/engines/xpath.py
View file @
731e3429
from
lxml
import
html
from
lxml.etree
import
_ElementStringResult
,
_ElementUnicodeResult
from
searx.utils
import
html_to_text
from
searx.utils
import
html_to_text
,
eval_xpath
from
searx.url_utils
import
unquote
,
urlencode
,
urljoin
,
urlparse
search_url
=
None
...
...
@@ -104,15 +104,15 @@ def response(resp):
results
=
[]
dom
=
html
.
fromstring
(
resp
.
text
)
if
results_xpath
:
for
result
in
dom
.
xpath
(
results_xpath
):
url
=
extract_url
(
result
.
xpath
(
url_xpath
),
search_url
)
title
=
extract_text
(
result
.
xpath
(
title_xpath
))
content
=
extract_text
(
result
.
xpath
(
content_xpath
))
for
result
in
eval_xpath
(
dom
,
results_xpath
):
url
=
extract_url
(
eval_xpath
(
result
,
url_xpath
),
search_url
)
title
=
extract_text
(
eval_xpath
(
result
,
title_xpath
))
content
=
extract_text
(
eval_xpath
(
result
,
content_xpath
))
tmp_result
=
{
'url'
:
url
,
'title'
:
title
,
'content'
:
content
}
# add thumbnail if available
if
thumbnail_xpath
:
thumbnail_xpath_result
=
result
.
xpath
(
thumbnail_xpath
)
thumbnail_xpath_result
=
eval_xpath
(
result
,
thumbnail_xpath
)
if
len
(
thumbnail_xpath_result
)
>
0
:
tmp_result
[
'img_src'
]
=
extract_url
(
thumbnail_xpath_result
,
search_url
)
...
...
@@ -120,14 +120,14 @@ def response(resp):
else
:
for
url
,
title
,
content
in
zip
(
(
extract_url
(
x
,
search_url
)
for
x
in
dom
.
xpath
(
url_xpath
)),
map
(
extract_text
,
dom
.
xpath
(
title_xpath
)),
map
(
extract_text
,
dom
.
xpath
(
content_xpath
))
x
in
eval_xpath
(
dom
,
url_xpath
)),
map
(
extract_text
,
eval_xpath
(
dom
,
title_xpath
)),
map
(
extract_text
,
eval_xpath
(
dom
,
content_xpath
))
):
results
.
append
({
'url'
:
url
,
'title'
:
title
,
'content'
:
content
})
if
not
suggestion_xpath
:
return
results
for
suggestion
in
dom
.
xpath
(
suggestion_xpath
):
for
suggestion
in
eval_xpath
(
dom
,
suggestion_xpath
):
results
.
append
({
'suggestion'
:
extract_text
(
suggestion
)})
return
results
searx/engines/yahoo.py
View file @
731e3429
...
...
@@ -14,7 +14,7 @@
from
lxml
import
html
from
searx.engines.xpath
import
extract_text
,
extract_url
from
searx.url_utils
import
unquote
,
urlencode
from
searx.utils
import
match_language
from
searx.utils
import
match_language
,
eval_xpath
# engine dependent config
categories
=
[
'general'
]
...
...
@@ -109,21 +109,21 @@ def response(resp):
dom
=
html
.
fromstring
(
resp
.
text
)
try
:
results_num
=
int
(
dom
.
xpath
(
'//div[@class="compPagination"]/span[last()]/text()'
)[
0
]
results_num
=
int
(
eval_xpath
(
dom
,
'//div[@class="compPagination"]/span[last()]/text()'
)[
0
]
.
split
()[
0
]
.
replace
(
','
,
''
))
results
.
append
({
'number_of_results'
:
results_num
})
except
:
pass
# parse results
for
result
in
dom
.
xpath
(
results_xpath
):
for
result
in
eval_xpath
(
dom
,
results_xpath
):
try
:
url
=
parse_url
(
extract_url
(
result
.
xpath
(
url_xpath
),
search_url
))
title
=
extract_text
(
result
.
xpath
(
title_xpath
)[
0
])
url
=
parse_url
(
extract_url
(
eval_xpath
(
result
,
url_xpath
),
search_url
))
title
=
extract_text
(
eval_xpath
(
result
,
title_xpath
)[
0
])
except
:
continue
content
=
extract_text
(
result
.
xpath
(
content_xpath
)[
0
])
content
=
extract_text
(
eval_xpath
(
result
,
content_xpath
)[
0
])
# append result
results
.
append
({
'url'
:
url
,
...
...
@@ -131,7 +131,7 @@ def response(resp):
'content'
:
content
})
# if no suggestion found, return results
suggestions
=
dom
.
xpath
(
suggestion_xpath
)
suggestions
=
eval_xpath
(
dom
,
suggestion_xpath
)
if
not
suggestions
:
return
results
...
...
@@ -148,9 +148,9 @@ def response(resp):
def
_fetch_supported_languages
(
resp
):
supported_languages
=
[]
dom
=
html
.
fromstring
(
resp
.
text
)
options
=
dom
.
xpath
(
'//div[@id="yschlang"]/span/label/input'
)
options
=
eval_xpath
(
dom
,
'//div[@id="yschlang"]/span/label/input'
)
for
option
in
options
:
code_parts
=
option
.
xpath
(
'./@value'
)[
0
][
5
:]
.
split
(
'_'
)
code_parts
=
eval_xpath
(
option
,
'./@value'
)[
0
][
5
:]
.
split
(
'_'
)
if
len
(
code_parts
)
==
2
:
code
=
code_parts
[
0
]
+
'-'
+
code_parts
[
1
]
.
upper
()
else
:
...
...
searx/results.py
View file @
731e3429
...
...
@@ -67,8 +67,9 @@ def merge_two_infoboxes(infobox1, infobox2):
for
url2
in
infobox2
.
get
(
'urls'
,
[]):
unique_url
=
True
for
url1
in
infobox1
.
get
(
'urls'
,
[]):
if
compare_urls
(
urlparse
(
url1
.
get
(
'url'
,
''
)),
urlparse
(
url2
.
get
(
'url'
,
''
))):
parsed_url2
=
urlparse
(
url2
.
get
(
'url'
,
''
))
for
url1
in
urls1
:
if
compare_urls
(
urlparse
(
url1
.
get
(
'url'
,
''
)),
parsed_url2
):
unique_url
=
False
break
if
unique_url
:
...
...
@@ -188,8 +189,9 @@ class ResultContainer(object):
add_infobox
=
True
infobox_id
=
infobox
.
get
(
'id'
,
None
)
if
infobox_id
is
not
None
:
parsed_url_infobox_id
=
urlparse
(
infobox_id
)
for
existingIndex
in
self
.
infoboxes
:
if
compare_urls
(
urlparse
(
existingIndex
.
get
(
'id'
,
''
)),
urlparse
(
infobox_id
)
):
if
compare_urls
(
urlparse
(
existingIndex
.
get
(
'id'
,
''
)),
parsed_url_infobox_id
):
merge_two_infoboxes
(
existingIndex
,
infobox
)
add_infobox
=
False
...
...
searx/utils.py
View file @
731e3429
...
...
@@ -13,6 +13,7 @@ from numbers import Number
from
os.path
import
splitext
,
join
from
io
import
open
from
random
import
choice
from
lxml.etree
import
XPath
import
sys
import
json
...
...
@@ -51,6 +52,7 @@ ecma_unescape2_re = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE)
useragents
=
json
.
loads
(
open
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
))
+
"/data/useragents.json"
,
'r'
,
encoding
=
'utf-8'
)
.
read
())
xpath_cache
=
dict
()
lang_to_lc_cache
=
dict
()
...
...
@@ -450,3 +452,16 @@ def get_engine_from_settings(name):
return
engine
return
{}
def
get_xpath
(
xpath_str
):
result
=
xpath_cache
.
get
(
xpath_str
,
None
)
if
result
is
None
:
result
=
XPath
(
xpath_str
)
xpath_cache
[
xpath_str
]
=
result
return
result
def
eval_xpath
(
element
,
xpath_str
):
xpath
=
get_xpath
(
xpath_str
)
return
xpath
(
element
)
searx/webapp.py
View file @
731e3429
...
...
@@ -157,20 +157,18 @@ outgoing_proxies = settings['outgoing'].get('proxies') or None
@
babel
.
localeselector
def
get_locale
():
locale
=
request
.
accept_languages
.
best_match
(
settings
[
'locales'
]
.
keys
())
if
request
.
preferences
.
get_value
(
'locale'
)
!=
''
:
locale
=
request
.
preferences
.
get_value
(
'locale'
)
if
'locale'
in
request
.
form
\
and
request
.
form
[
'locale'
]
in
settings
[
'locales'
]:
return
request
.
form
[
'locale'
]
if
'locale'
in
request
.
args
\
and
request
.
args
[
'locale'
]
in
settings
[
'locales'
]:
locale
=
request
.
args
[
'locale'
]
return
request
.
args
[
'locale'
]
if
'locale'
in
request
.
form
\
and
request
.
form
[
'locale'
]
in
settings
[
'locales'
]:
locale
=
request
.
form
[
'locale'
]
if
request
.
preferences
.
get_value
(
'locale'
)
!=
''
:
return
request
.
preferences
.
get_value
(
'locale'
)
return
locale
return
request
.
accept_languages
.
best_match
(
settings
[
'locales'
]
.
keys
())
# code-highlighter
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment