Commit 7eadf91e authored by delanoe's avatar delanoe

[FEAT] REPEC via Multivac. Done. TODO: some date to fix.

parent ba042fa0
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
from ._Crawler import * from ._Crawler import *
import json import json
from gargantext.settings import API_TOKENS from gargantext.settings import API_TOKENS
from math import trunc
class MultivacCrawler(Crawler): class MultivacCrawler(Crawler):
''' Multivac API CLIENT''' ''' Multivac API CLIENT'''
...@@ -24,6 +24,7 @@ class MultivacCrawler(Crawler): ...@@ -24,6 +24,7 @@ class MultivacCrawler(Crawler):
# Final EndPoints # Final EndPoints
# TODO : Change endpoint according type of database # TODO : Change endpoint according type of database
self.URL = self.BASE_URL + "/" + self.API_URL self.URL = self.BASE_URL + "/" + self.API_URL
self.status = []
def __format_query__(self, query=None): def __format_query__(self, query=None):
'''formating the query''' '''formating the query'''
...@@ -58,6 +59,7 @@ class MultivacCrawler(Crawler): ...@@ -58,6 +59,7 @@ class MultivacCrawler(Crawler):
, params = querystring , params = querystring
) )
print(querystring)
# Validation : 200 if ok else raise Value # Validation : 200 if ok else raise Value
if response.status_code == 200: if response.status_code == 200:
charset = response.headers["Content-Type"].split("; ")[1].split("=")[1] charset = response.headers["Content-Type"].split("; ")[1].split("=")[1]
...@@ -77,12 +79,13 @@ class MultivacCrawler(Crawler): ...@@ -77,12 +79,13 @@ class MultivacCrawler(Crawler):
return self.results_nb return self.results_nb
def download(self, query): def download(self, query):
self.path = "/tmp/MultivacResults.xml"
downloaded = False downloaded = False
self.status.append("fetching results") self.status.append("fetching results")
corpus = [] corpus = []
paging = 100 paging = 100
self.query_max = self.results_nb self.query_max = self.scan_results(query)
if self.query_max > QUERY_SIZE_N_MAX: if self.query_max > QUERY_SIZE_N_MAX:
msg = "Invalid sample size N = %i (max = %i)" % (self.query_max, QUERY_SIZE_N_MAX) msg = "Invalid sample size N = %i (max = %i)" % (self.query_max, QUERY_SIZE_N_MAX)
...@@ -91,10 +94,13 @@ class MultivacCrawler(Crawler): ...@@ -91,10 +94,13 @@ class MultivacCrawler(Crawler):
with open(self.path, 'wb') as f: with open(self.path, 'wb') as f:
for page in range(0, self.query_max, paging): #for page in range(1, self.query_max, paging):
corpus.append(self.get(self.query, fromPage=page, count=paging)["hits"]) for page in range(1, trunc(self.query_max / 100) + 1):
docs = self._get(query, fromPage=page, count=paging)["results"]["hits"]
f.write(str(corpus).encode("utf-8")) for doc in docs:
corpus.append(doc)
f.write(json.dumps(corpus).encode("utf-8"))
downloaded = True downloaded = True
return downloaded return downloaded
...@@ -45,7 +45,7 @@ def save(request, project_id): ...@@ -45,7 +45,7 @@ def save(request, project_id):
print(query, N) print(query, N)
#for next time #for next time
#ids = request.POST["ids"] #ids = request.POST["ids"]
source = get_resource(RESOURCE_TYPE_SCOAP) source = get_resource(RESOURCE_TYPE_MULTIVAC)
if N == 0: if N == 0:
raise Http404() raise Http404()
if N > QUERY_SIZE_N_MAX: if N > QUERY_SIZE_N_MAX:
......
...@@ -671,7 +671,7 @@ ...@@ -671,7 +671,7 @@
$("#submit_thing").prop('disabled' , false) $("#submit_thing").prop('disabled' , false)
//$("#submit_thing").attr('onclick', testCERN(query, N)); //$("#submit_thing").attr('onclick', testCERN(query, N));
$("#submit_thing").on("click", function(){ $("#submit_thing").on("click", function(){
testCERN(pubmedquery, N); saveMultivac(pubmedquery, N);
//$("#submit_thing").onclick() //$("#submit_thing").onclick()
})} })}
//(N > {{query_size}}) //(N > {{query_size}})
...@@ -680,7 +680,7 @@ ...@@ -680,7 +680,7 @@
$('#submit_thing').prop('disabled', false); $('#submit_thing').prop('disabled', false);
$("#submit_thing").html("Processing a sample file") $("#submit_thing").html("Processing a sample file")
$("#submit_thing").on("click", function(){ $("#submit_thing").on("click", function(){
testCERN(pubmedquery, N); saveMultivac(pubmedquery, N);
//$("#submit_thing").onclick() //$("#submit_thing").onclick()
})} })}
} }
...@@ -886,6 +886,60 @@ ...@@ -886,6 +886,60 @@
}); });
} }
function saveMultivac(query, N){
//alert("CERN!")
console.log("In Multivac")
if(!query || query=="") return;
//var origQuery = query
var data = { "query" : query , "N": N };
var projectid = window.location.href.split("projects")[1].replace(/\//g, '')//replace all the slashes
console.log(data)
$.ajax({
dataType: 'json',
url: window.location.origin+"/moissonneurs/multivac/save/"+projectid,
data: data,
type: 'POST',
beforeSend: function(xhr) {
xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
},
success: function(data) {
console.log("ajax_success: in saveMultivac()")
console.log(data)
alert("OK")
setTimeout(
function() {
$('#addcorpus').modal('hide')
$("#wait").modal("show");
// setTimeout(
// function(){
// location.reload();
//
// }, 600);
// )
//setTimeout(, 300)
//location.reload();
}, 600);
},
error: function(data) {
console.log(data)
setTimeout(
function() {
$('#addcorpus').modal('hide')
$("#wait").modal("show")
//setTimeout(, 300)
//location.reload();
}, 600);
},
});
}
function deleteCorpus(e, corpusId) { function deleteCorpus(e, corpusId) {
// prevents scroll back to top of page // prevents scroll back to top of page
e.preventDefault() e.preventDefault()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment