Commit 2ea48b86 authored by Administrator's avatar Administrator

Merge branch 'unstable' into testing

parents a8dff456 4c89ca94
...@@ -274,14 +274,19 @@ def do_tfidf(corpus, reset=True): ...@@ -274,14 +274,19 @@ def do_tfidf(corpus, reset=True):
NodeNodeNgram.objects.filter(nodex=corpus).delete() NodeNodeNgram.objects.filter(nodex=corpus).delete()
if isinstance(corpus, Node) and corpus.type.name == "Corpus": if isinstance(corpus, Node) and corpus.type.name == "Corpus":
# print("\n- - - - - - - - - - ")
# for i in Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")):
# print("^^^",i)
for document in Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")): for document in Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")):
for node_ngram in Node_Ngram.objects.filter(node=document): for node_ngram in Node_Ngram.objects.filter(node=document):
try: try:
# print("\t",node_ngram.ngram)
nnn = NodeNodeNgram.objects.get(nodex=corpus, nodey=document, ngram=node_ngram.ngram) nnn = NodeNodeNgram.objects.get(nodex=corpus, nodey=document, ngram=node_ngram.ngram)
except: except:
score = tfidf(corpus, document, node_ngram.ngram) score = tfidf(corpus, document, node_ngram.ngram)
nnn = NodeNodeNgram(nodex=corpus, nodey=node_ngram.node, ngram=node_ngram.ngram, score=score) nnn = NodeNodeNgram(nodex=corpus, nodey=node_ngram.node, ngram=node_ngram.ngram, score=score)
nnn.save() nnn.save()
# print("- - - - - - - - - - \n")
else: else:
print("Only corpus implemented yet, you put instead:", type(corpus)) print("Only corpus implemented yet, you put instead:", type(corpus))
......
from django.conf.urls import patterns, include, url
from django.contrib import admin
from django.contrib.auth.views import login
from gargantext_web import views
import gargantext_web.api
admin.autodiscover()
urlpatterns = patterns('',
# Admin views
url(r'^admin/', include(admin.site.urls)),
url(r'^login/', include(admin.site.urls)),
url(r'^grappelli/', include('grappelli.urls')),
# User views
url(r'^$', views.home),
url(r'^projects/$', views.projects),
url(r'^project/(\d+)/delete/$', views.delete_project),
url(r'^project/(\d+)/$', views.project),
url(r'^project/(\d+)/corpus/(\d+)/$', views.corpus),
url(r'^project/(\d+)/corpus/(\d+)/delete/$', views.delete_corpus),
# Visualizations
url(r'^corpus/(\d+)/explorer$', views.explorer_graph),
url(r'^corpus/(\d+)/matrix$', views.explorer_matrix),
# Getting data
url(r'^chart/corpus/(\d+)/data.csv$', views.send_csv),
url(r'^corpus/(\d+)/node_link.json$', views.node_link),
url(r'^corpus/(\d+)/adjacency.json$', views.adjacency),
url(r'^api$', gargantext_web.api.Root),
url(r'^api/nodes/(\d+)/children/metadata$', gargantext_web.api.NodesChildrenMetatadata.as_view()),
url(r'^api/nodes/(\d+)/children/queries$', gargantext_web.api.NodesChildrenQueries.as_view()),
#url(r'^api/nodes$', gargantext_web.api.NodesController.get),
url(r'^api/nodes/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),
url(r'^api/nodes/(\d+)/data$', gargantext_web.api.CorpusController.data),
url(r'^graph-it$', views.graph_it),
url(r'^ngrams$', views.ngrams),
)
from django.conf import settings
if settings.DEBUG:
urlpatterns += patterns('',
url(r'^media/(?P<path>.*)$', 'django.views.static.serve', {
'document_root': settings.MEDIA_ROOT,
}),
url(r'^static/(?P<path>.*)$', 'django.views.static.serve', {
'document_root': settings.STATIC_ROOT,
}),
)
...@@ -34,6 +34,8 @@ from django.template import RequestContext ...@@ -34,6 +34,8 @@ from django.template import RequestContext
from django.contrib.auth.decorators import login_required from django.contrib.auth.decorators import login_required
from django.contrib.auth import authenticate, login, logout from django.contrib.auth import authenticate, login, logout
from scrap_pubmed.admin import Logger
def login_user(request): def login_user(request):
logout(request) logout(request)
username = password = '' username = password = ''
...@@ -194,6 +196,7 @@ def projects(request): ...@@ -194,6 +196,7 @@ def projects(request):
user = request.user user = request.user
date = datetime.datetime.now() date = datetime.datetime.now()
print(Logger.write("STATIC_ROOT"))
project_type = NodeType.objects.get(name='Project') project_type = NodeType.objects.get(name='Project')
projects = Node.objects.filter(user=user, type_id = project_type.id).order_by("-date") projects = Node.objects.filter(user=user, type_id = project_type.id).order_by("-date")
...@@ -299,6 +302,10 @@ def project(request, project_id): ...@@ -299,6 +302,10 @@ def project(request, project_id):
for key in donut_part.keys() ] for key in donut_part.keys() ]
dauser = User.objects.get( username=user )
groups = len(dauser.groups.filter(name="PubMed_0.1"))
print("*groupslen*:",groups)
if request.method == 'POST': if request.method == 'POST':
form = CustomForm(request.POST, request.FILES) form = CustomForm(request.POST, request.FILES)
...@@ -308,8 +315,6 @@ def project(request, project_id): ...@@ -308,8 +315,6 @@ def project(request, project_id):
name = form.cleaned_data['name'] name = form.cleaned_data['name']
thefile = form.cleaned_data['file'] thefile = form.cleaned_data['file']
print(request.POST['type'])
print(form.cleaned_data['type'])
resource_type = ResourceType.objects.get(name=str( form.cleaned_data['type'] )) resource_type = ResourceType.objects.get(name=str( form.cleaned_data['type'] ))
print("-------------") print("-------------")
...@@ -819,7 +824,7 @@ def node_link(request, corpus_id): ...@@ -819,7 +824,7 @@ def node_link(request, corpus_id):
start = time.time() start = time.time()
data = get_cooc(request=request, corpus_id=corpus_id, type="node_link") data = get_cooc(request=request, corpus_id=corpus_id, type="node_link")
end = time.time() end = time.time()
print ("LOG::TIME: get_cooc() [s]",(end - start)) print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" get_cooc() [s]",(end - start))
print("In node_link() END") print("In node_link() END")
return JsonHttpResponse(data) return JsonHttpResponse(data)
......
...@@ -217,10 +217,10 @@ corpus_pubmed.add_resource( ...@@ -217,10 +217,10 @@ corpus_pubmed.add_resource(
for resource in corpus_pubmed.get_resources(): for resource in corpus_pubmed.get_resources():
print('Resource #%d - %s - %s' % (resource.id, resource.digest, resource.file)) print('Resource #%d - %s - %s' % (resource.id, resource.digest, resource.file))
print('Parse corpus #%d...' % (corpus_pubmed.id, )) # print('Parse corpus #%d...' % (corpus_pubmed.id, ))
corpus_pubmed.parse_resources(verbose=True) # corpus_pubmed.parse_resources(verbose=True)
print('Extract corpus #%d...' % (corpus_pubmed.id, )) # print('Extract corpus #%d...' % (corpus_pubmed.id, ))
corpus_pubmed.children.all().extract_ngrams(['title',]) # corpus_pubmed.children.all().extract_ngrams(['title',])
print('Parsed corpus #%d.' % (corpus_pubmed.id, )) # print('Parsed corpus #%d.' % (corpus_pubmed.id, ))
exit() exit()
...@@ -11,6 +11,7 @@ from cte_tree.models import CTENode, CTENodeManager ...@@ -11,6 +11,7 @@ from cte_tree.models import CTENode, CTENodeManager
from parsing.Caches import LanguagesCache, NgramsExtractorsCache, NgramsCaches from parsing.Caches import LanguagesCache, NgramsExtractorsCache, NgramsCaches
from parsing.FileParsers import * from parsing.FileParsers import *
from time import time from time import time
import datetime
from collections import defaultdict from collections import defaultdict
import hashlib import hashlib
...@@ -160,6 +161,9 @@ class Node(CTENode): ...@@ -160,6 +161,9 @@ class Node(CTENode):
def parse_resources(self, verbose=False): def parse_resources(self, verbose=False):
# parse all resources into a list of metadata # parse all resources into a list of metadata
metadata_list = [] metadata_list = []
print("not parsed resources:")
print(self.node_resource.filter(parsed=False))
print("= = = = = = = = = = =\n")
for node_resource in self.node_resource.filter(parsed=False): for node_resource in self.node_resource.filter(parsed=False):
resource = node_resource.resource resource = node_resource.resource
parser = defaultdict(lambda:FileParser.FileParser, { parser = defaultdict(lambda:FileParser.FileParser, {
...@@ -173,7 +177,11 @@ class Node(CTENode): ...@@ -173,7 +177,11 @@ class Node(CTENode):
})[resource.type.name]() })[resource.type.name]()
metadata_list += parser.parse(str(resource.file)) metadata_list += parser.parse(str(resource.file))
# print(parser.parse(str(resource.file))) # print(parser.parse(str(resource.file)))
# retrieve info from the database # # retrieve info from the database
# print("\n - - -- - - - - - - - ")
# for i in metadata_list:
# print("***",i["title"])
# print("- - -- - - - - - - - \n")
type_id = NodeType.objects.get(name='Document').id type_id = NodeType.objects.get(name='Document').id
langages_cache = LanguagesCache() langages_cache = LanguagesCache()
user_id = self.user.id user_id = self.user.id
...@@ -228,6 +236,7 @@ class Node(CTENode): ...@@ -228,6 +236,7 @@ class Node(CTENode):
associations[terms] += 1 associations[terms] += 1
#print(associations) #print(associations)
# insert the occurrences in the database # insert the occurrences in the database
# print(associations.items())
Node_Ngram.objects.bulk_create([ Node_Ngram.objects.bulk_create([
Node_Ngram( Node_Ngram(
node = self, node = self,
...@@ -247,16 +256,18 @@ class Node(CTENode): ...@@ -247,16 +256,18 @@ class Node(CTENode):
self.save() self.save()
self.parse_resources() self.parse_resources()
end = time.time() end = time.time()
print ("LOG::TIME: parse_resources() [s]",(end - start)) print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" parse_resources() [s]",(end - start))
print("LOG::TIME: In workflow() / parse_resources()") print("LOG::TIME: In workflow() / parse_resources()")
start = time.time() start = time.time()
print("LOG::TIME: In workflow() extract_ngrams()") print("LOG::TIME: In workflow() extract_ngrams()")
print("\n- - - - - - - - - -")
type_document = NodeType.objects.get(name='Document') type_document = NodeType.objects.get(name='Document')
self.children.filter(type_id=type_document.pk).extract_ngrams(keys=['title',]) self.children.filter(type_id=type_document.pk).extract_ngrams(keys=['title',])
end = time.time() end = time.time()
print("- - - - - - - - - - \n")
print ("LOG::TIME: ",(end - start)) print ("LOG::TIME: ",(end - start))
print ("LOG::TIME: extract_ngrams() [s]",(end - start)) print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" extract_ngrams() [s]",(end - start))
print("LOG::TIME: In workflow() / extract_ngrams()") print("LOG::TIME: In workflow() / extract_ngrams()")
start = time.time() start = time.time()
...@@ -264,7 +275,7 @@ class Node(CTENode): ...@@ -264,7 +275,7 @@ class Node(CTENode):
from analysis.functions import do_tfidf from analysis.functions import do_tfidf
do_tfidf(self) do_tfidf(self)
end = time.time() end = time.time()
print ("LOG::TIME: do_tfidf() [s]",(end - start)) print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" do_tfidf() [s]",(end - start))
print("LOG::TIME: In workflow() / do_tfidf()") print("LOG::TIME: In workflow() / do_tfidf()")
print("In workflow() END") print("In workflow() END")
......
...@@ -10,12 +10,12 @@ import os ...@@ -10,12 +10,12 @@ import os
import time import time
# import libxml2 # import libxml2
from lxml import etree from lxml import etree
from datetime import datetime import datetime
from django.core.files import File from django.core.files import File
import threading import threading
from queue import Queue from queue import Queue
import time # import time
class MedlineFetcher: class MedlineFetcher:
...@@ -180,8 +180,9 @@ class MedlineFetcher: ...@@ -180,8 +180,9 @@ class MedlineFetcher:
for i,query in enumerate(thequeries): for i,query in enumerate(thequeries):
k = query["count"] k = query["count"]
percentage = k/float(N) proportion = k/float(N)
retmax_forthisyear = int(round(globalLimit*percentage)) retmax_forthisyear = int(round(globalLimit*proportion))
query["retmax"] = retmax_forthisyear query["retmax"] = retmax_forthisyear
if query["retmax"]==0: query["retmax"]+=1
return thequeries return thequeries
from django.contrib import admin from django.contrib import admin
from gargantext_web.settings import STATIC_ROOT
# Register your models here. # Register your models here.
import os
import datetime
class Logger():
def write(msg):
path = "Logs/"
Logger.ensure_dir(path)
nowfull = datetime.datetime.now().isoformat().split("T")
date = nowfull[0]
time = nowfull[1]
return path
def ensure_dir(f):
d = os.path.dirname(f)
if not os.path.exists(d):
os.makedirs(d)
...@@ -4,7 +4,7 @@ from django.shortcuts import render ...@@ -4,7 +4,7 @@ from django.shortcuts import render
from django.http import Http404, HttpResponse, HttpResponseRedirect from django.http import Http404, HttpResponse, HttpResponseRedirect
from django.template.loader import get_template from django.template.loader import get_template
from django.template import Context from django.template import Context
from django.contrib.auth.models import User from django.contrib.auth.models import User, Group
from scrap_pubmed.MedlineFetcherDavid2015 import MedlineFetcher from scrap_pubmed.MedlineFetcherDavid2015 import MedlineFetcher
...@@ -13,8 +13,9 @@ from urllib.request import urlopen, urlretrieve ...@@ -13,8 +13,9 @@ from urllib.request import urlopen, urlretrieve
import json import json
from gargantext_web.settings import MEDIA_ROOT from gargantext_web.settings import MEDIA_ROOT
from datetime import datetime # from datetime import datetime
import time import time
import datetime
import os import os
import threading import threading
from django.core.files import File from django.core.files import File
...@@ -30,12 +31,13 @@ def getGlobalStats(request ): ...@@ -30,12 +31,13 @@ def getGlobalStats(request ):
alist = ["bar","foo"] alist = ["bar","foo"]
if request.method == "POST": if request.method == "POST":
N = 100
query = request.POST["query"] query = request.POST["query"]
print ("LOG::TIME: query =", query ) print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" query =", query )
print ("LOG::TIME: N =", 300 ) print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" N =", N )
instancia = MedlineFetcher() instancia = MedlineFetcher()
# alist = instancia.serialFetcher( 5, query , int(request.POST["N"]) ) # alist = instancia.serialFetcher( 5, query , int(request.POST["N"]) )
alist = instancia.serialFetcher( 5, query , 300 ) alist = instancia.serialFetcher( 5, query , N )
data = alist data = alist
return JsonHttpResponse(data) return JsonHttpResponse(data)
...@@ -73,6 +75,7 @@ def doTheQuery(request , project_id): ...@@ -73,6 +75,7 @@ def doTheQuery(request , project_id):
type_id = NodeType.objects.get(name='Document').id type_id = NodeType.objects.get(name='Document').id
user_id = User.objects.get( username=request.user ).id user_id = User.objects.get( username=request.user ).id
corpus = Node( corpus = Node(
user=request.user, user=request.user,
parent=parent, parent=parent,
...@@ -91,7 +94,7 @@ def doTheQuery(request , project_id): ...@@ -91,7 +94,7 @@ def doTheQuery(request , project_id):
t.daemon = True # thread dies when main thread (only non-daemon thread) exits. t.daemon = True # thread dies when main thread (only non-daemon thread) exits.
t.start() t.start()
for url in urlreqs: for url in urlreqs:
filename = MEDIA_ROOT + '/corpora/%s/%s' % (request.user, str(datetime.now().microsecond)) filename = MEDIA_ROOT + '/corpora/%s/%s' % (request.user, str(datetime.datetime.now().isoformat()))
tasks.q.put( [url , filename]) #put a task in th queue tasks.q.put( [url , filename]) #put a task in th queue
tasks.q.join() # wait until everything is finished tasks.q.join() # wait until everything is finished
for filename in tasks.firstResults: for filename in tasks.firstResults:
......
...@@ -221,8 +221,6 @@ ...@@ -221,8 +221,6 @@
<button onclick='bringDaNoise();' id="submit_thing" disabled class="btn btn-primary" >Process this!</button><span id="simpleloader"></span> <button onclick='bringDaNoise();' id="submit_thing" disabled class="btn btn-primary" >Process this!</button><span id="simpleloader"></span>
</div> </div>
</div> </div>
</div><!-- /.modal-content --> </div><!-- /.modal-content -->
...@@ -246,8 +244,6 @@ ...@@ -246,8 +244,6 @@
return cookieValue; return cookieValue;
} }
var thequeries = [] var thequeries = []
function doTheQuery() { function doTheQuery() {
...@@ -331,6 +327,7 @@ ...@@ -331,6 +327,7 @@
console.log("enabling "+"#"+value.id) console.log("enabling "+"#"+value.id)
$("#"+value.id).attr('onclick','getGlobalResults(this);'); $("#"+value.id).attr('onclick','getGlobalResults(this);');
// $("#submit_thing").prop('disabled' , false) // $("#submit_thing").prop('disabled' , false)
$("#submit_thing").html("Process a 100 sample!")
thequeries = data thequeries = data
var N=0,k=0; var N=0,k=0;
...@@ -364,6 +361,7 @@ ...@@ -364,6 +361,7 @@
$("#theresults").html("") $("#theresults").html("")
$('#submit_thing').prop('disabled', false); $('#submit_thing').prop('disabled', false);
$( "#id_name" ).on('input',null); $( "#id_name" ).on('input',null);
$("#submit_thing").html("Process this!")
} }
// @dynamic-query events // @dynamic-query events
else { else {
...@@ -391,6 +389,7 @@ ...@@ -391,6 +389,7 @@
$("#pubmedcrawl").css("visibility", "visible"); $("#pubmedcrawl").css("visibility", "visible");
$("#pubmedcrawl").show(); $("#pubmedcrawl").show();
$("#file_yes").click(); $("#file_yes").click();
$("#submit_thing").html("Process this!")
} }
// hide Radio-Inputs and trigger @upload-file events // hide Radio-Inputs and trigger @upload-file events
else { else {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment