Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
searx-engine
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
searx-engine
Commits
1057e42c
Commit
1057e42c
authored
Oct 16, 2019
by
Adam Tauber
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[fix] update digg engine
parent
72459b24
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
24 deletions
+12
-24
digg.py
searx/engines/digg.py
+12
-24
No files found.
searx/engines/digg.py
View file @
1057e42c
...
...
@@ -15,7 +15,8 @@ import string
from
dateutil
import
parser
from
json
import
loads
from
lxml
import
html
from
searx.url_utils
import
quote_plus
from
searx.url_utils
import
urlencode
from
datetime
import
datetime
# engine dependent config
categories
=
[
'news'
,
'social media'
]
...
...
@@ -23,7 +24,7 @@ paging = True
# search-url
base_url
=
'https://digg.com/'
search_url
=
base_url
+
'api/search/
{query}.json?position={position}
&format=html'
search_url
=
base_url
+
'api/search/
?{query}&from={position}&size=20
&format=html'
# specific xpath variables
results_xpath
=
'//article'
...
...
@@ -38,9 +39,9 @@ digg_cookie_chars = string.ascii_uppercase + string.ascii_lowercase +\
# do search-request
def
request
(
query
,
params
):
offset
=
(
params
[
'pageno'
]
-
1
)
*
1
0
offset
=
(
params
[
'pageno'
]
-
1
)
*
2
0
params
[
'url'
]
=
search_url
.
format
(
position
=
offset
,
query
=
quote_plus
(
query
))
query
=
urlencode
({
'q'
:
query
}
))
params
[
'cookies'
][
'frontend.auid'
]
=
''
.
join
(
random
.
choice
(
digg_cookie_chars
)
for
_
in
range
(
22
))
return
params
...
...
@@ -52,30 +53,17 @@ def response(resp):
search_result
=
loads
(
resp
.
text
)
if
'html'
not
in
search_result
or
search_result
[
'html'
]
==
''
:
return
results
dom
=
html
.
fromstring
(
search_result
[
'html'
])
# parse results
for
result
in
dom
.
xpath
(
results_xpath
):
url
=
result
.
attrib
.
get
(
'data-contenturl'
)
thumbnail
=
result
.
xpath
(
'.//img'
)[
0
]
.
attrib
.
get
(
'src'
)
title
=
''
.
join
(
result
.
xpath
(
title_xpath
))
content
=
''
.
join
(
result
.
xpath
(
content_xpath
))
pubdate
=
result
.
xpath
(
pubdate_xpath
)[
0
]
.
attrib
.
get
(
'datetime'
)
publishedDate
=
parser
.
parse
(
pubdate
)
# http to https
thumbnail
=
thumbnail
.
replace
(
"http://static.digg.com"
,
"https://static.digg.com"
)
for
result
in
search_result
[
'mapped'
]:
published
=
datetime
.
strptime
(
result
[
'created'
][
'ISO'
],
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
# append result
results
.
append
({
'url'
:
url
,
'title'
:
title
,
'content'
:
content
,
results
.
append
({
'url'
:
result
[
'url'
]
,
'title'
:
result
[
'title'
]
,
'content'
:
result
[
'excerpt'
]
,
'template'
:
'videos.html'
,
'publishedDate'
:
published
Date
,
'thumbnail'
:
thumbnail
})
'publishedDate'
:
published
,
'thumbnail'
:
result
[
'images'
][
'thumbImage'
]
})
# return results
return
results
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment