Commit 7eadf91e authored by delanoe's avatar delanoe

[FEAT] REPEC via Multivac. Done. TODO: some date to fix.

parent ba042fa0
......@@ -9,7 +9,7 @@
from ._Crawler import *
import json
from gargantext.settings import API_TOKENS
from math import trunc
class MultivacCrawler(Crawler):
''' Multivac API CLIENT'''
......@@ -24,6 +24,7 @@ class MultivacCrawler(Crawler):
# Final EndPoints
# TODO : Change endpoint according type of database
self.URL = self.BASE_URL + "/" + self.API_URL
self.status = []
def __format_query__(self, query=None):
'''formating the query'''
......@@ -58,6 +59,7 @@ class MultivacCrawler(Crawler):
, params = querystring
)
print(querystring)
# Validation : 200 if ok else raise Value
if response.status_code == 200:
charset = response.headers["Content-Type"].split("; ")[1].split("=")[1]
......@@ -77,12 +79,13 @@ class MultivacCrawler(Crawler):
return self.results_nb
def download(self, query):
self.path = "/tmp/MultivacResults.xml"
downloaded = False
self.status.append("fetching results")
corpus = []
paging = 100
self.query_max = self.results_nb
self.query_max = self.scan_results(query)
if self.query_max > QUERY_SIZE_N_MAX:
msg = "Invalid sample size N = %i (max = %i)" % (self.query_max, QUERY_SIZE_N_MAX)
......@@ -91,10 +94,13 @@ class MultivacCrawler(Crawler):
with open(self.path, 'wb') as f:
for page in range(0, self.query_max, paging):
corpus.append(self.get(self.query, fromPage=page, count=paging)["hits"])
f.write(str(corpus).encode("utf-8"))
#for page in range(1, self.query_max, paging):
for page in range(1, trunc(self.query_max / 100) + 1):
docs = self._get(query, fromPage=page, count=paging)["results"]["hits"]
for doc in docs:
corpus.append(doc)
f.write(json.dumps(corpus).encode("utf-8"))
downloaded = True
return downloaded
......@@ -45,7 +45,7 @@ def save(request, project_id):
print(query, N)
#for next time
#ids = request.POST["ids"]
source = get_resource(RESOURCE_TYPE_SCOAP)
source = get_resource(RESOURCE_TYPE_MULTIVAC)
if N == 0:
raise Http404()
if N > QUERY_SIZE_N_MAX:
......
......@@ -671,7 +671,7 @@
$("#submit_thing").prop('disabled' , false)
//$("#submit_thing").attr('onclick', testCERN(query, N));
$("#submit_thing").on("click", function(){
testCERN(pubmedquery, N);
saveMultivac(pubmedquery, N);
//$("#submit_thing").onclick()
})}
//(N > {{query_size}})
......@@ -680,7 +680,7 @@
$('#submit_thing').prop('disabled', false);
$("#submit_thing").html("Processing a sample file")
$("#submit_thing").on("click", function(){
testCERN(pubmedquery, N);
saveMultivac(pubmedquery, N);
//$("#submit_thing").onclick()
})}
}
......@@ -886,6 +886,60 @@
});
}
function saveMultivac(query, N){
//alert("CERN!")
console.log("In Multivac")
if(!query || query=="") return;
//var origQuery = query
var data = { "query" : query , "N": N };
var projectid = window.location.href.split("projects")[1].replace(/\//g, '')//replace all the slashes
console.log(data)
$.ajax({
dataType: 'json',
url: window.location.origin+"/moissonneurs/multivac/save/"+projectid,
data: data,
type: 'POST',
beforeSend: function(xhr) {
xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
},
success: function(data) {
console.log("ajax_success: in saveMultivac()")
console.log(data)
alert("OK")
setTimeout(
function() {
$('#addcorpus').modal('hide')
$("#wait").modal("show");
// setTimeout(
// function(){
// location.reload();
//
// }, 600);
// )
//setTimeout(, 300)
//location.reload();
}, 600);
},
error: function(data) {
console.log(data)
setTimeout(
function() {
$('#addcorpus').modal('hide')
$("#wait").modal("show")
//setTimeout(, 300)
//location.reload();
}, 600);
},
});
}
function deleteCorpus(e, corpusId) {
// prevents scroll back to top of page
e.preventDefault()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment