Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
5ffccab8
Commit
5ffccab8
authored
Sep 08, 2016
by
c24b
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] results_nb
parent
d1f79fed
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
10 additions
and
6 deletions
+10
-6
CERN.py
gargantext/util/crawlers/CERN.py
+7
-3
cern.py
moissonneurs/cern.py
+3
-3
No files found.
gargantext/util/crawlers/CERN.py
View file @
5ffccab8
...
@@ -77,7 +77,7 @@ class CernCrawler(Crawler):
...
@@ -77,7 +77,7 @@ class CernCrawler(Crawler):
return
downloaded
return
downloaded
def
get_ids
(
self
,
query
):
def
get_ids
(
self
,
query
):
'''get results nb + individual ids of search query'''
'''get results nb + individual ids of search query
return every time 200 ids
'''
dict_q
=
uparse
.
parse_qs
(
query
)
dict_q
=
uparse
.
parse_qs
(
query
)
#parameters for a global request
#parameters for a global request
dict_q
[
"p"
]
=
query
dict_q
[
"p"
]
=
query
...
@@ -89,7 +89,11 @@ class CernCrawler(Crawler):
...
@@ -89,7 +89,11 @@ class CernCrawler(Crawler):
r
=
requests
.
get
(
signed_url
)
r
=
requests
.
get
(
signed_url
)
print
(
signed_url
)
print
(
signed_url
)
self
.
ids
=
r
.
json
()
self
.
ids
=
r
.
json
()
#self.results_nb = len(self.ids)
print
(
type
(
self
.
ids
),
len
(
self
.
ids
))
#self.ids = (",").split(j_ids.replace("[|]", ""))
self
.
results_nb
=
len
(
self
.
ids
)
print
(
self
.
results_nb
,
"res"
)
#self.generate_urls()
#self.generate_urls()
return
(
self
.
ids
)
return
(
self
.
ids
)
def
generate_urls
(
self
):
def
generate_urls
(
self
):
...
@@ -100,7 +104,7 @@ class CernCrawler(Crawler):
...
@@ -100,7 +104,7 @@ class CernCrawler(Crawler):
''' for NEXT time'''
''' for NEXT time'''
raise
NotImplementedError
raise
NotImplementedError
@
timing
def
scan_results
(
self
,
query
):
def
scan_results
(
self
,
query
):
'''[OLD]scanner le nombre de resultat en récupérant 1 seul résultat
'''[OLD]scanner le nombre de resultat en récupérant 1 seul résultat
qui affiche uniquement l'auteur de la page 1
qui affiche uniquement l'auteur de la page 1
...
...
moissonneurs/cern.py
View file @
5ffccab8
...
@@ -26,9 +26,9 @@ def query( request):
...
@@ -26,9 +26,9 @@ def query( request):
if
source
[
"crawler"
]
is
not
None
:
if
source
[
"crawler"
]
is
not
None
:
crawlerbot
=
load_crawler
(
source
)()
crawlerbot
=
load_crawler
(
source
)()
#old raw way to get results_nb
#old raw way to get results_nb
#
results = crawlerbot.scan_results(query)
results
=
crawlerbot
.
scan_results
(
query
)
ids
=
crawlerbot
.
get_ids
(
query
)
#
ids = crawlerbot.get_ids(query)
return
JsonHttpResponse
({
"results_nb"
:
int
(
len
(
ids
)),
"ids"
:
ids
})
return
JsonHttpResponse
({
"results_nb"
:
crawlerbot
.
results_nb
})
def
save
(
request
,
project_id
):
def
save
(
request
,
project_id
):
'''save'''
'''save'''
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment