Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
searx-engine
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
searx-engine
Commits
abcbcec0
Unverified
Commit
abcbcec0
authored
Jan 05, 2019
by
Noémi Ványi
Committed by
GitHub
Jan 05, 2019
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1444 from Venca24/devel_google_videos
[fix] google videos engine
parents
899ba5d6
2456b8f5
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
41 additions
and
10 deletions
+41
-10
google_videos.py
searx/engines/google_videos.py
+18
-6
test_google_videos.py
tests/unit/engines/test_google_videos.py
+23
-4
No files found.
searx/engines/google_videos.py
View file @
abcbcec0
...
@@ -7,7 +7,7 @@
...
@@ -7,7 +7,7 @@
@using-api no
@using-api no
@results HTML
@results HTML
@stable no
@stable no
@parse url, title, content
@parse url, title, content
, thumbnail
"""
"""
from
datetime
import
date
,
timedelta
from
datetime
import
date
,
timedelta
...
@@ -15,7 +15,7 @@ from json import loads
...
@@ -15,7 +15,7 @@ from json import loads
from
lxml
import
html
from
lxml
import
html
from
searx.engines.xpath
import
extract_text
from
searx.engines.xpath
import
extract_text
from
searx.url_utils
import
urlencode
from
searx.url_utils
import
urlencode
import
re
# engine dependent config
# engine dependent config
categories
=
[
'videos'
]
categories
=
[
'videos'
]
...
@@ -25,7 +25,7 @@ time_range_support = True
...
@@ -25,7 +25,7 @@ time_range_support = True
number_of_results
=
10
number_of_results
=
10
search_url
=
'https://www.google.com/search'
\
search_url
=
'https://www.google.com/search'
\
'?{query}'
\
'?
q=
{query}'
\
'&tbm=vid'
\
'&tbm=vid'
\
'&{search_options}'
'&{search_options}'
time_range_attr
=
"qdr:{range}"
time_range_attr
=
"qdr:{range}"
...
@@ -69,15 +69,27 @@ def response(resp):
...
@@ -69,15 +69,27 @@ def response(resp):
# parse results
# parse results
for
result
in
dom
.
xpath
(
'//div[@class="g"]'
):
for
result
in
dom
.
xpath
(
'//div[@class="g"]'
):
title
=
extract_text
(
result
.
xpath
(
'.//h3
/a
'
))
title
=
extract_text
(
result
.
xpath
(
'.//h3'
))
url
=
result
.
xpath
(
'.//
h3
/a/@href'
)[
0
]
url
=
result
.
xpath
(
'.//
div[@class="r"]
/a/@href'
)[
0
]
content
=
extract_text
(
result
.
xpath
(
'.//span[@class="st"]'
))
content
=
extract_text
(
result
.
xpath
(
'.//span[@class="st"]'
))
# get thumbnails
script
=
str
(
dom
.
xpath
(
'//script[contains(., "_setImagesSrc")]'
)[
0
]
.
text
)
id
=
result
.
xpath
(
'.//div[@class="s"]//img/@id'
)[
0
]
thumbnails_data
=
re
.
findall
(
's=
\'
(.*?)(?:
\\\\
[a-z,1-9,
\\\\
]+
\'
|
\'
)
\
;var ii=
\
[(?:|[
\'
vidthumb
\
d+
\'
,]+)
\'
'
+
id
,
script
)
tmp
=
[]
if
len
(
thumbnails_data
)
!=
0
:
tmp
=
re
.
findall
(
'(data:image/jpeg;base64,[a-z,A-Z,0-9,/,
\
+]+)'
,
thumbnails_data
[
0
])
thumbnail
=
''
if
len
(
tmp
)
!=
0
:
thumbnail
=
tmp
[
-
1
]
# append result
# append result
results
.
append
({
'url'
:
url
,
results
.
append
({
'url'
:
url
,
'title'
:
title
,
'title'
:
title
,
'content'
:
content
,
'content'
:
content
,
'thumbnail'
:
''
,
'thumbnail'
:
thumbnail
,
'template'
:
'videos.html'
})
'template'
:
'videos.html'
})
return
results
return
results
tests/unit/engines/test_google_videos.py
View file @
abcbcec0
...
@@ -30,16 +30,34 @@ class TestGoogleVideosEngine(SearxTestCase):
...
@@ -30,16 +30,34 @@ class TestGoogleVideosEngine(SearxTestCase):
<div>
<div>
<div>
<div>
<div class="g">
<div class="g">
<div>
<div class="r">
<h3><a href="url_1">Title 1</h3>
<a href="url_1"><h3>Title 1</h3></a>
</div>
<div class="s">
<div>
<a>
<g-img>
<img id="vidthumb1">
</g-img>
</a>
</div>
</div>
</div>
<div>
<div>
<span class="st">Content 1</span>
<span class="st">Content 1</span>
</div>
</div>
</div>
</div>
<div class="g">
<div class="g">
<div>
<div class="r">
<h3><a href="url_2">Title 2</h3>
<a href="url_2"><h3>Title 2</h3></a>
</div>
<div class="s">
<div>
<a>
<g-img>
<img id="vidthumb2">
</g-img>
</a>
</div>
</div>
</div>
<div>
<div>
<span class="st">Content 2</span>
<span class="st">Content 2</span>
...
@@ -47,6 +65,7 @@ class TestGoogleVideosEngine(SearxTestCase):
...
@@ -47,6 +65,7 @@ class TestGoogleVideosEngine(SearxTestCase):
</div>
</div>
</div>
</div>
</div>
</div>
<script>function _setImagesSrc(c,d,e){}</script>
"""
"""
response
=
mock
.
Mock
(
text
=
html
)
response
=
mock
.
Mock
(
text
=
html
)
results
=
google_videos
.
response
(
response
)
results
=
google_videos
.
response
(
response
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment