Commit c1a02b29 authored by PkSM3's avatar PkSM3

[UPDATE] merge conflict solved? (pubmed server down)

parents 92cad524 4e74b74e
......@@ -58,7 +58,7 @@ def apply_workflow(corpus_id):
# With Django ORM
corpus_django = models.Node.objects.get(id=corpus_id)
corpus_django.metadata['Processing'] = 0
corpus_django.metadata['Processing'] = "2"
corpus_django.save()
print("-" *60)
......@@ -73,5 +73,12 @@ def apply_workflow(corpus_id):
extract_ngrams(corpus, ['title'])
compute_tfidf(corpus)
try:
corpus_django.metadata['Processing'] = 0
corpus_django.save()
except Exception as error:
print(error)
......@@ -97,7 +97,7 @@ INSTALLED_APPS = (
'cte_tree',
'node',
'ngram',
'scrap_pubmed',
'scrappers.scrap_pubmed',
'djcelery',
'aldjemy',
'rest_framework',
......
......@@ -6,7 +6,7 @@ from django.contrib.auth.views import login
from gargantext_web import views, views_optimized
import gargantext_web.api
import scrap_pubmed.views as pubmedscrapper
import scrappers.scrap_pubmed.views as pubmedscrapper
admin.autodiscover()
......@@ -102,6 +102,7 @@ if settings.MAINTENANCE:
url(r'^$', views.home_view),
url(r'^about/', views.get_about),
url(r'^admin/', include(admin.site.urls)),
url(r'^.*', views.get_maintenance),
)
......
......@@ -39,7 +39,7 @@ from django.template import RequestContext
from django.contrib.auth.decorators import login_required
from django.contrib.auth import authenticate, login, logout
from scrap_pubmed.admin import Logger
from scrappers.scrap_pubmed.admin import Logger
from gargantext_web.db import *
......@@ -259,8 +259,8 @@ def corpus(request, project_id, corpus_id):
return redirect('/login/?next=%s' % request.path)
try:
offset = str(project_id)
offset = str(corpus_id)
offset = int(project_id)
offset = int(corpus_id)
except ValueError:
raise Http404()
......@@ -289,8 +289,10 @@ def corpus(request, project_id, corpus_id):
try:
processing = corpus.metadata['Processing']
except:
except Exception as error:
print(error)
processing = 0
print('processing', processing)
html = t.render(Context({\
'user': user,\
......
import os
from django.shortcuts import redirect
from django.shortcuts import render
from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseForbidden
......@@ -14,6 +17,7 @@ from node.admin import CustomForm
from gargantext_web.db import *
from gargantext_web.settings import DEBUG, MEDIA_ROOT
from gargantext_web.api import JsonHttpResponse
import json
import re
......@@ -134,7 +138,15 @@ def project(request, project_id):
)
session.add(corpus)
session.commit()
# save the uploaded file
# If user is new, folder does not exist yet, create it then
dirpath = '%s/corpora/%s' % (MEDIA_ROOT, request.user.username)
if not os.path.exists(dirpath):
print("Creating folder %s" % dirpath)
os.makedirs(dirpath)
# Save the uploaded file
filepath = '%s/corpora/%s/%s' % (MEDIA_ROOT, request.user.username, thefile._name)
f = open(filepath, 'wb')
f.write(thefile.read())
......@@ -158,7 +170,7 @@ def project(request, project_id):
print(error)
# redirect to the main project page
# TODO need to wait before response (need corpus update)
sleep(1)
sleep(2)
return HttpResponseRedirect('/project/' + str(project_id))
else:
print('ERROR: BAD FORM')
......
......@@ -3,7 +3,7 @@ from django.template.loader import get_template
from django.template import Context
from django.contrib.auth.models import User, Group
from scrap_pubmed.MedlineFetcherDavid2015 import MedlineFetcher
from scrappers.scrap_pubmed.MedlineFetcherDavid2015 import MedlineFetcher
from urllib.request import urlopen, urlretrieve
import json
......@@ -184,31 +184,7 @@ def testISTEX(request , project_id):
print(request.method)
alist = ["bar","foo"]
# SQLAlchemy session
session = Session()
# do we have a valid project id?
try:
project_id = int(project_id)
except ValueError:
raise Http404()
# do we have a valid project?
project = (session
.query(Node)
.filter(Node.id == project_id)
.filter(Node.type_id == cache.NodeType['Project'].id)
).first()
if project is None:
raise Http404()
# do we have a valid user?
user = request.user
if not user.is_authenticated():
return redirect('/login/?next=%s' % request.path)
if project.user_id != user.id:
return HttpResponseForbidden()
if request.method == "POST":
# print(alist)
......@@ -217,73 +193,63 @@ def testISTEX(request , project_id):
N = 60
if "query" in request.POST: query = request.POST["query"]
if "string" in request.POST: query_string = request.POST["string"].replace(" ","+")
if "N" in request.POST: N = int(request.POST["N"])
# if "N" in request.POST: N = request.POST["N"]
print(query_string , query , N)
urlreqs = []
pagesize = 500
tasks = MedlineFetcher()
chunks = list(tasks.chunks(range(N), pagesize))
for k in chunks:
if (k[0]+pagesize)>N: pagesize = N-k[0]
urlreqs.append("http://api.istex.fr/document/?q="+query_string+"&output=*&"+"from="+str(k[0])+"&size="+str(pagesize))
print(urlreqs)
# urlreqs = []
# pagesize = 50
# tasks = MedlineFetcher()
# chunks = list(tasks.chunks(range(N), pagesize))
# for k in chunks:
# if (k[0]+pagesize)>N: pagesize = N-k[0]
# urlreqs.append("http://api.istex.fr/document/?q="+query_string+"&output=*&"+"from="+str(k[0])+"&size="+str(pagesize))
# print(urlreqs)
# urlreqs = ["http://localhost/374255" , "http://localhost/374278" ]
# print(urlreqs)
resourcetype = cache.ResourceType["istex"]
print(resourcetype)
# corpus node instanciation as a Django model
corpus = Node(
name = query,
user_id = request.user.id,
parent_id = project_id,
type_id = cache.NodeType['Corpus'].id,
language_id = None,
)
session.add(corpus)
session.commit()
tasks = MedlineFetcher()
for i in range(8):
t = threading.Thread(target=tasks.worker2) #thing to do
t.daemon = True # thread dies when main thread (only non-daemon thread) exits.
t.start()
for url in urlreqs:
filename = MEDIA_ROOT + '/corpora/%s/%s' % (request.user, str(datetime.datetime.now().isoformat()))
tasks.q.put( [url , filename]) #put a task in th queue
tasks.q.join() # wait until everything is finished
dwnldsOK = 0
for filename in tasks.firstResults:
if filename!=False:
print(filename)
# add the uploaded resource to the corpus
add_resource(corpus,
user_id = request.user.id,
type_id = resourcetype.id,
file = filename,
)
dwnldsOK+=1
if dwnldsOK == 0: return JsonHttpResponse(["fail"])
try:
def apply_workflow(corpus):
parse_resources(corpus)
extract_ngrams(corpus, ['title'])
compute_tfidf(corpus)
if DEBUG:
apply_workflow(corpus)
else:
thread = threading.Thread(target=apply_workflow, args=(corpus, ), daemon=True)
thread.start()
except Exception as error:
print('WORKFLOW ERROR')
print(error)
return HttpResponseRedirect('/project/' + str(project_id))
# resource_type = ResourceType.objects.get(name="istext" )
# parent = Node.objects.get(id=project_id)
# node_type = NodeType.objects.get(name='Corpus')
# type_id = NodeType.objects.get(name='Document').id
# user_id = User.objects.get( username=request.user ).id
# corpus = Node(
# user=request.user,
# parent=parent,
# type=node_type,
# name=query,
# )
# corpus.save()
# # configuring your queue with the event
# for i in range(8):
# t = threading.Thread(target=tasks.worker2) #thing to do
# t.daemon = True # thread dies when main thread (only non-daemon thread) exits.
# t.start()
# for url in urlreqs:
# filename = MEDIA_ROOT + '/corpora/%s/%s' % (request.user, str(datetime.now().microsecond))
# tasks.q.put( [url , filename]) #put a task in th queue
# tasks.q.join() # wait until everything is finished
# for filename in tasks.firstResults:
# corpus.add_resource( user=request.user, type=resource_type, file=filename )
# corpus.save()
# print("DEBUG:",DEBUG)
# # do the WorkFlow
# try:
# if DEBUG is True:
# corpus.workflow()
# else:
# corpus.workflow.apply_async((), countdown=3)
# return JsonHttpResponse(["workflow","finished"])
# except Exception as error:
# print(error)
data = [query_string,query,N]
return JsonHttpResponse(data)
......
......@@ -131,7 +131,7 @@
<div class="col-md-4">
<div class="jumbotron">
{% if processing == "1" %}
{% if processing >= "1" %}
<h3> <img width="20px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img> Graph (later)</h3>
{% else %}
<h3><a href="/project/{{project.id}}/corpus/{{ corpus.id }}/explorer">Graph</a></h3>
......
......@@ -277,10 +277,10 @@
function bringDaNoise() {
var theresults = $("#theresults").html()
if( theresults && theresults.search("No results")==-1 ) {
var origQuery = $("#id_name").val()
console.log("we've in dynamic mode")
$("#simpleloader").html('<img width="30px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img>')
$("#submit_thing").prop('onclick',null);
var theType = $("#id_type option:selected").html();
if(theType=="Pubmed (xml format)") doTheQuery();
if(theType=="istex") {
......@@ -301,13 +301,12 @@
$( "#id_form" ).submit();
}
}
}
}
function getGlobalResults(value){
console.log("in getGlobalResults()")
console.log("value:")
console.log(value)
// AJAX to django
var pubmedquery = $("#id_name").val()
var Npubs = $("#id_N").val();
......@@ -318,8 +317,6 @@
$("#"+value.id).prop('onclick',null);
var theType = $("#id_type option:selected").html();
console.log("theType:")
console.log(theType)
if(theType=="Pubmed (xml format)") {
$.ajax({
......@@ -357,7 +354,7 @@
});
}
if(theType=="istex") {
if(theType=="istext") {
console.log(window.location.origin+"tests/istextquery")
$.ajax({
// contentType: "application/json",
......@@ -484,7 +481,7 @@
var origQuery = query
var pubmedifiedQuery = { query : query , string: query , N:Npubs}
var pubmedifiedQuery = { query : query , string: query }
// console.log(pubmedifiedQuery)
var projectid = window.location.href.split("project")[1].replace(/\//g, '')//replace all the slashes
......@@ -500,7 +497,7 @@
success: function(data) {
console.log("ajax_success: in testISTEX()")
console.log(data)
location.reload();
// location.reload();
},
error: function(result) {
console.log("in testISTEX(). Data not found");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment