Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
searx-engine
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
searx-engine
Commits
052a71d0
Unverified
Commit
052a71d0
authored
Aug 21, 2018
by
Adam Tauber
Committed by
GitHub
Aug 21, 2018
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1368 from aliceinwire/gentoo_engine
Add Gentoo wiki search engine
parents
8ec55112
d24c58cf
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
132 additions
and
0 deletions
+132
-0
gentoo.py
searx/engines/gentoo.py
+128
-0
settings.yml
searx/settings.yml
+4
-0
No files found.
searx/engines/gentoo.py
0 → 100644
View file @
052a71d0
# -*- coding: utf-8 -*-
"""
Gentoo Wiki
@website https://wiki.gentoo.org
@provide-api yes
@using-api no
@results HTML
@stable no (HTML can change)
@parse url, title
"""
from
lxml
import
html
from
searx.engines.xpath
import
extract_text
from
searx.url_utils
import
urlencode
,
urljoin
# engine dependent config
categories
=
[
'it'
]
language_support
=
True
paging
=
True
base_url
=
'https://wiki.gentoo.org'
# xpath queries
xpath_results
=
'//ul[@class="mw-search-results"]/li'
xpath_link
=
'.//div[@class="mw-search-result-heading"]/a'
# cut 'en' from 'en-US', 'de' from 'de-CH', and so on
def
locale_to_lang_code
(
locale
):
if
locale
.
find
(
'-'
)
>=
0
:
locale
=
locale
.
split
(
'-'
)[
0
]
return
locale
# wikis for some languages were moved off from the main site, we need to make
# requests to correct URLs to be able to get results in those languages
lang_urls
=
{
'en'
:
{
'base'
:
'https://wiki.gentoo.org'
,
'search'
:
'/index.php?title=Special:Search&offset={offset}&{query}'
},
'others'
:
{
'base'
:
'https://wiki.gentoo.org'
,
'search'
:
'/index.php?title=Special:Search&offset={offset}&{query}
\
&profile=translation&languagefilter={language}'
}
}
# get base & search URLs for selected language
def
get_lang_urls
(
language
):
if
language
!=
'en'
:
return
lang_urls
[
'others'
]
return
lang_urls
[
'en'
]
# Language names to build search requests for
# those languages which are hosted on the main site.
main_langs
=
{
'ar'
:
'العربية'
,
'bg'
:
'Български'
,
'cs'
:
'Česky'
,
'da'
:
'Dansk'
,
'el'
:
'Ελληνικά'
,
'es'
:
'Español'
,
'he'
:
'עברית'
,
'hr'
:
'Hrvatski'
,
'hu'
:
'Magyar'
,
'it'
:
'Italiano'
,
'ko'
:
'한국어'
,
'lt'
:
'Lietuviškai'
,
'nl'
:
'Nederlands'
,
'pl'
:
'Polski'
,
'pt'
:
'Português'
,
'ru'
:
'Русский'
,
'sl'
:
'Slovenský'
,
'th'
:
'ไทย'
,
'uk'
:
'Українська'
,
'zh'
:
'简体中文'
}
supported_languages
=
dict
(
lang_urls
,
**
main_langs
)
# do search-request
def
request
(
query
,
params
):
# translate the locale (e.g. 'en-US') to language code ('en')
language
=
locale_to_lang_code
(
params
[
'language'
])
# if our language is hosted on the main site, we need to add its name
# to the query in order to narrow the results to that language
if
language
in
main_langs
:
query
+=
b
' ('
+
(
main_langs
[
language
])
.
encode
(
'utf-8'
)
+
b
')'
# prepare the request parameters
query
=
urlencode
({
'search'
:
query
})
offset
=
(
params
[
'pageno'
]
-
1
)
*
20
# get request URLs for our language of choice
urls
=
get_lang_urls
(
language
)
search_url
=
urls
[
'base'
]
+
urls
[
'search'
]
params
[
'url'
]
=
search_url
.
format
(
query
=
query
,
offset
=
offset
,
language
=
language
)
return
params
# get response from search-request
def
response
(
resp
):
# get the base URL for the language in which request was made
language
=
locale_to_lang_code
(
resp
.
search_params
[
'language'
])
base_url
=
get_lang_urls
(
language
)[
'base'
]
results
=
[]
dom
=
html
.
fromstring
(
resp
.
text
)
# parse results
for
result
in
dom
.
xpath
(
xpath_results
):
link
=
result
.
xpath
(
xpath_link
)[
0
]
href
=
urljoin
(
base_url
,
link
.
attrib
.
get
(
'href'
))
title
=
extract_text
(
link
)
results
.
append
({
'url'
:
href
,
'title'
:
title
})
return
results
searx/settings.yml
View file @
052a71d0
...
...
@@ -273,6 +273,10 @@ engines:
timeout
:
3.0
disabled
:
True
-
name
:
gentoo
engine
:
gentoo
shortcut
:
ge
-
name
:
gitlab
engine
:
json_engine
paging
:
True
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment