[UPDATE] merge conflict solved? (pubmed server down)

c1a02b29 · PkSM3 · 92cad524 · 4e74b74e · c1a02b29 · c1a02b29
Commit c1a02b29 authored Apr 17, 2015 by PkSM3
16 changed files
--- a/gargantext_web/celery.py
+++ b/gargantext_web/celery.py
@@ -58,7 +58,7 @@ def apply_workflow(corpus_id):
        # With Django ORM 
        corpus_django = models.Node.objects.get(id=corpus_id)
-        corpus_django.metadata['Processing'] = 0
+        corpus_django.metadata['Processing'] = "2"
        corpus_django.save()
        print("-" *60)
@@ -73,5 +73,12 @@ def apply_workflow(corpus_id):
    extract_ngrams(corpus, ['title'])
    compute_tfidf(corpus)
+    try:
+        corpus_django.metadata['Processing'] = 0
+        corpus_django.save()
+    except Exception as error:
+        print(error)
--- a/gargantext_web/settings.py
+++ b/gargantext_web/settings.py
@@ -97,7 +97,7 @@ INSTALLED_APPS = (
    'cte_tree',
    'node',
    'ngram',
-    'scrap_pubmed',
+    'scrappers.scrap_pubmed',
    'djcelery',
    'aldjemy',
    'rest_framework',

--- a/gargantext_web/urls.py
+++ b/gargantext_web/urls.py
@@ -6,7 +6,7 @@ from django.contrib.auth.views import login
 from gargantext_web import views, views_optimized
 import gargantext_web.api
-import scrap_pubmed.views as pubmedscrapper
+import scrappers.scrap_pubmed.views as pubmedscrapper
 admin.autodiscover()
@@ -102,6 +102,7 @@ if settings.MAINTENANCE:
    url(r'^$', views.home_view),
    url(r'^about/', views.get_about),
+    url(r'^admin/', include(admin.site.urls)),
    url(r'^.*', views.get_maintenance),
    )

--- a/gargantext_web/views.py
+++ b/gargantext_web/views.py
@@ -39,7 +39,7 @@ from django.template import RequestContext
 from django.contrib.auth.decorators import login_required
 from django.contrib.auth import authenticate, login, logout
-from scrap_pubmed.admin import Logger
+from scrappers.scrap_pubmed.admin import Logger
 from gargantext_web.db import *
@@ -259,8 +259,8 @@ def corpus(request, project_id, corpus_id):
        return redirect('/login/?next=%s' % request.path)
    try:
-        offset = str(project_id)
+        offset = int(project_id)
-        offset = str(corpus_id)
+        offset = int(corpus_id)
    except ValueError:
        raise Http404()
@@ -289,8 +289,10 @@ def corpus(request, project_id, corpus_id):
    try:
        processing = corpus.metadata['Processing']
-    except:
+    except Exception as error:
+        print(error)
        processing = 0
+    print('processing', processing)
    html = t.render(Context({\
            'user': user,\

--- a/gargantext_web/views_optimized.py
+++ b/gargantext_web/views_optimized.py
+import os
 from django.shortcuts import redirect
 from django.shortcuts import render
 from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseForbidden
@@ -14,6 +17,7 @@ from node.admin import CustomForm
 from gargantext_web.db import *
 from gargantext_web.settings import DEBUG, MEDIA_ROOT
 from gargantext_web.api import JsonHttpResponse
 import json
 import re
@@ -134,7 +138,15 @@ def project(request, project_id):
            )
            session.add(corpus)
            session.commit()
-            # save the uploaded file
+            # If user is new, folder does not exist yet, create it then
+            dirpath = '%s/corpora/%s' % (MEDIA_ROOT, request.user.username)
+            if not os.path.exists(dirpath):
+                print("Creating folder %s" % dirpath)
+                os.makedirs(dirpath)
+            # Save the uploaded file
            filepath = '%s/corpora/%s/%s' % (MEDIA_ROOT, request.user.username, thefile._name)
            f = open(filepath, 'wb')
            f.write(thefile.read())
@@ -158,7 +170,7 @@ def project(request, project_id):
                print(error)
            # redirect to the main project page
            # TODO need to wait before response (need corpus update) 
-            sleep(1)
+            sleep(2)
            return HttpResponseRedirect('/project/' + str(project_id))
        else:
            print('ERROR: BAD FORM')

--- a/backupdb.py
+++ b/backupdb.py
--- a/init_accounts.py
+++ b/init_accounts.py
--- a/test_db.py
+++ b/test_db.py
--- a/scrap_pubmed/MedlineFetcherDavid2015.py
+++ b/scrap_pubmed/MedlineFetcherDavid2015.py
--- a/scrap_pubmed/__init__.py
+++ b/scrap_pubmed/__init__.py
--- a/scrap_pubmed/admin.py
+++ b/scrap_pubmed/admin.py
--- a/scrap_pubmed/models.py
+++ b/scrap_pubmed/models.py
--- a/scrap_pubmed/tests.py
+++ b/scrap_pubmed/tests.py
--- a/scrap_pubmed/views.py
+++ b/scrap_pubmed/views.py
@@ -3,7 +3,7 @@ from django.template.loader import get_template
 from django.template import Context
 from django.contrib.auth.models import User, Group
-from scrap_pubmed.MedlineFetcherDavid2015 import MedlineFetcher
+from scrappers.scrap_pubmed.MedlineFetcherDavid2015 import MedlineFetcher
 from urllib.request import urlopen, urlretrieve
 import json
@@ -184,31 +184,7 @@ def testISTEX(request , project_id):
 	print(request.method)
 	alist = ["bar","foo"]
-	# SQLAlchemy session
-	session = Session()
-	# do we have a valid project id?
-	try:
-		project_id = int(project_id)
-	except ValueError:
-		raise Http404()
-	# do we have a valid project?
-	project = (session
-		.query(Node)
-		.filter(Node.id == project_id)
-		.filter(Node.type_id == cache.NodeType['Project'].id)
-	).first()
-	if project is None:
-		raise Http404()
-	# do we have a valid user?
-	user = request.user
-	if not user.is_authenticated():
-		return redirect('/login/?next=%s' % request.path)
-	if project.user_id != user.id:
-		return HttpResponseForbidden()
 	if request.method == "POST":
 		# print(alist)
@@ -217,73 +193,63 @@ def testISTEX(request , project_id):
 		N = 60
 		if "query" in request.POST: query = request.POST["query"]
 		if "string" in request.POST: query_string = request.POST["string"].replace(" ","+")
-		if "N" in request.POST: N = int(request.POST["N"])
+		# if "N" in request.POST: N = request.POST["N"]
 		print(query_string , query , N)
-		urlreqs = []
+		# urlreqs = []
-		pagesize = 500
+		# pagesize = 50
-		tasks = MedlineFetcher()
+		# tasks = MedlineFetcher()
-		chunks = list(tasks.chunks(range(N), pagesize))
+		# chunks = list(tasks.chunks(range(N), pagesize))
-		for k in chunks:
+		# for k in chunks:
-			if (k[0]+pagesize)>N: pagesize = N-k[0]
+		# 	if (k[0]+pagesize)>N: pagesize = N-k[0]
-			urlreqs.append("http://api.istex.fr/document/?q="+query_string+"&output=*&"+"from="+str(k[0])+"&size="+str(pagesize))
+		# 	urlreqs.append("http://api.istex.fr/document/?q="+query_string+"&output=*&"+"from="+str(k[0])+"&size="+str(pagesize))
-		print(urlreqs)
+		# print(urlreqs)
 		# urlreqs = ["http://localhost/374255" , "http://localhost/374278" ]
 		# print(urlreqs)
-		resourcetype = cache.ResourceType["istex"]
+		# resource_type = ResourceType.objects.get(name="istext" )
-		print(resourcetype)
-		# corpus node instanciation as a Django model
+		# parent      = Node.objects.get(id=project_id)
-		corpus = Node(
+		# node_type   = NodeType.objects.get(name='Corpus')
-			name = query,
+		# type_id = NodeType.objects.get(name='Document').id
-			user_id = request.user.id,
+		# user_id = User.objects.get( username=request.user ).id
-			parent_id = project_id,
-			type_id = cache.NodeType['Corpus'].id,
+		# corpus = Node(
-			language_id = None,
+		# 	user=request.user,
-		)
+		# 	parent=parent,
-		session.add(corpus)
+		# 	type=node_type,
-		session.commit()
+		# 	name=query,
+		# )
-		tasks = MedlineFetcher()
-		for i in range(8):
+		# corpus.save()
-			t = threading.Thread(target=tasks.worker2) #thing to do
-			t.daemon = True  # thread dies when main thread (only non-daemon thread) exits.
+		# # configuring your queue with the event
-			t.start()
+		# for i in range(8):
-		for url in urlreqs:
+		# 	t = threading.Thread(target=tasks.worker2) #thing to do
-			filename = MEDIA_ROOT + '/corpora/%s/%s' % (request.user, str(datetime.datetime.now().isoformat()))
+		# 	t.daemon = True  # thread dies when main thread (only non-daemon thread) exits.
-			tasks.q.put( [url , filename]) #put a task in th queue
+		# 	t.start()
-		tasks.q.join() # wait until everything is finished
+		# for url in urlreqs:
+		# 	filename = MEDIA_ROOT + '/corpora/%s/%s' % (request.user, str(datetime.now().microsecond))
-		dwnldsOK = 0
+		# 	tasks.q.put( [url , filename]) #put a task in th queue
-		for filename in tasks.firstResults:
+		# tasks.q.join() # wait until everything is finished
-			if filename!=False:
+		# for filename in tasks.firstResults:
-				print(filename)
+		# 	corpus.add_resource( user=request.user, type=resource_type, file=filename )
-				# add the uploaded resource to the corpus
-				add_resource(corpus,
-					user_id = request.user.id,
+		# corpus.save()
-					type_id = resourcetype.id,
+		# print("DEBUG:",DEBUG)
-					file = filename,
+		# # do the WorkFlow
-				)
+		# try:
-				dwnldsOK+=1
+		# 	if DEBUG is True:
+		# 		corpus.workflow()
-		if dwnldsOK == 0: return JsonHttpResponse(["fail"])
+		# 	else:
+		# 		corpus.workflow.apply_async((), countdown=3)
-		try:
-			def apply_workflow(corpus):
+		# 	return JsonHttpResponse(["workflow","finished"])
-				parse_resources(corpus)
+		# except Exception as error:
-				extract_ngrams(corpus, ['title'])
+		# 	print(error)
-				compute_tfidf(corpus)
-			if DEBUG:
-				apply_workflow(corpus)
-			else:
-				thread = threading.Thread(target=apply_workflow, args=(corpus, ), daemon=True)
-				thread.start()
-		except Exception as error:
-			print('WORKFLOW ERROR')
-			print(error)
-		return HttpResponseRedirect('/project/' + str(project_id))
 	data = [query_string,query,N]
 	return JsonHttpResponse(data)

--- a/templates/corpus.html
+++ b/templates/corpus.html
@@ -131,7 +131,7 @@
 				<div class="col-md-4">
 						<div class="jumbotron">
-                {% if processing == "1" %}
+                {% if processing >= "1" %}
                <h3> <img width="20px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img> Graph (later)</h3>
                {% else %}
 								<h3><a href="/project/{{project.id}}/corpus/{{ corpus.id }}/explorer">Graph</a></h3>

--- a/templates/project.html
+++ b/templates/project.html
@@ -277,10 +277,10 @@
 		function bringDaNoise() {
 			var theresults = $("#theresults").html()
 			if( theresults && theresults.search("No results")==-1 ) {
-				var origQuery = $("#id_name").val()
 				console.log("we've in dynamic mode")
 				$("#simpleloader").html('<img width="30px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img>')
 				$("#submit_thing").prop('onclick',null);
 				var theType = $("#id_type option:selected").html();
 				if(theType=="Pubmed (xml format)") doTheQuery();
 				if(theType=="istex") {
@@ -301,13 +301,12 @@
 						$( "#id_form" ).submit();
 					}
 				}
 			}
 		}
 		function getGlobalResults(value){
 			console.log("in getGlobalResults()")
-			console.log("value:")
-			console.log(value)
 			// AJAX to django
 			var pubmedquery = $("#id_name").val()
 			var Npubs = $("#id_N").val();
@@ -318,8 +317,6 @@
 			$("#"+value.id).prop('onclick',null);
 			var theType = $("#id_type option:selected").html();
-			console.log("theType:")
-			console.log(theType)
 			if(theType=="Pubmed (xml format)") {
 			    $.ajax({
@@ -357,7 +354,7 @@
 			    });	
 			}
-			if(theType=="istex") {
+			if(theType=="istext") {
 				console.log(window.location.origin+"tests/istextquery")
 			    $.ajax({
 				  // contentType: "application/json",
@@ -484,7 +481,7 @@
 			var origQuery = query
-			var pubmedifiedQuery = { query : query , string: query , N:Npubs}
+			var pubmedifiedQuery = { query : query , string: query }
 			// console.log(pubmedifiedQuery)
 			var projectid = window.location.href.split("project")[1].replace(/\//g, '')//replace all the slashes
@@ -500,7 +497,7 @@
 		      success: function(data) {
 				console.log("ajax_success: in testISTEX()")
 		        console.log(data)
-		        location.reload();
+		        // location.reload();
 		      },
 		        error: function(result) {
 		            console.log("in testISTEX(). Data not found");