Commit 3629523d authored by PkSM3's avatar PkSM3

[UPDATE] send * to stoplist and apres show (mainlist-stoplist): OK

parent 08539c33
...@@ -6,4 +6,5 @@ urlpatterns = patterns('', ...@@ -6,4 +6,5 @@ urlpatterns = patterns('',
url(r'^document/(?P<doc_id>[0-9]+)$', views.Document.as_view()), # document view url(r'^document/(?P<doc_id>[0-9]+)$', views.Document.as_view()), # document view
url(r'^corpus/(?P<corpus_id>[0-9]+)/document/(?P<doc_id>[0-9]+)$', views.NgramList.as_view()), # the list associated with an ngram url(r'^corpus/(?P<corpus_id>[0-9]+)/document/(?P<doc_id>[0-9]+)$', views.NgramList.as_view()), # the list associated with an ngram
url(r'^lists/(?P<list_id>[0-9]+)/ngrams(?:/(?P<ngram_id>[0-9]+))?$', views.NgramEdit.as_view()), # url(r'^lists/(?P<list_id>[0-9]+)/ngrams(?:/(?P<ngram_id>[0-9]+))?$', views.NgramEdit.as_view()), #
url(r'^lists/(?P<list_id>[0-9]+)/multiple?$', views.deleteMultiple ), #
) )
...@@ -16,6 +16,8 @@ from rest_framework.authentication import SessionAuthentication, BasicAuthentica ...@@ -16,6 +16,8 @@ from rest_framework.authentication import SessionAuthentication, BasicAuthentica
from node.models import Node from node.models import Node
from gargantext_web.db import * from gargantext_web.db import *
from ngram.lists import listIds, listNgramIds, ngramList from ngram.lists import listIds, listNgramIds, ngramList
from gargantext_web.api import JsonHttpResponse
import json
@login_required @login_required
...@@ -109,6 +111,26 @@ class NgramEdit(APIView): ...@@ -109,6 +111,26 @@ class NgramEdit(APIView):
session.query(Node_Ngram).filter(Node_Ngram.node_id==list_id).filter(Node_Ngram.ngram_id==ngram_id).delete() session.query(Node_Ngram).filter(Node_Ngram.node_id==list_id).filter(Node_Ngram.ngram_id==ngram_id).delete()
return Response(None, 204) return Response(None, 204)
def deleteMultiple(request, list_id):
results = ["hola","mundo"]
user = request.user
if not user.is_authenticated():
return redirect('/login/?next=%s' % request.path)
if request.POST:
todel_ids = json.loads(request.POST['to_delete'])
for ngram_id in todel_ids:
# add the ngram to the list if not already done
node_ngram = session.query(Node_Ngram).filter(Node_Ngram.node_id==list_id).filter(Node_Ngram.ngram_id==ngram_id).first()
if node_ngram is None:
node_ngram = Node_Ngram(node_id=list_id, ngram_id=ngram_id, weight=1.0)
session.add(node_ngram)
session.commit()
return JsonHttpResponse(results)
class Document(APIView): class Document(APIView):
""" """
......
...@@ -85,7 +85,8 @@ urlpatterns = patterns('', ...@@ -85,7 +85,8 @@ urlpatterns = patterns('',
url(r'^tests/paginator/corpus/(\d+)/$', views.newpaginatorJSON), url(r'^tests/paginator/corpus/(\d+)/$', views.newpaginatorJSON),
url(r'^tests/move2trash/$' , views.move_to_trash_multiple ), url(r'^tests/move2trash/$' , views.move_to_trash_multiple ),
url(r'^project/(\d+)/corpus/(\d+)/ngrams/ngrams.json$', samtest.test_ngrams), url(r'^project/(\d+)/corpus/(\d+)/ngrams/ngrams.json$', samtest.test_ngrams),
url(r'^project/(\d+)/corpus/(\d+)/ngrams$', samtest.get_ngrams) url(r'^project/(\d+)/corpus/(\d+)/ngrams$', samtest.get_ngrams),
url(r'^corpus/(\d+)/document/(\d+)/testpage$', samtest.test_test)
) )
......
...@@ -271,10 +271,8 @@ def testISTEX(request , project_id): ...@@ -271,10 +271,8 @@ def testISTEX(request , project_id):
) )
dwnldsOK+=1 dwnldsOK+=1
if dwnldsOK == 0: return JsonHttpResponse(["fail"]) if dwnldsOK == 0: return JsonHttpResponse(["fail"])
###########################
# print(urlreqs) ###########################
try: try:
if not DEBUG: if not DEBUG:
apply_workflow.apply_async((corpus.id,),) apply_workflow.apply_async((corpus.id,),)
......
...@@ -119,11 +119,15 @@ function Final_UpdateTable( action ) { ...@@ -119,11 +119,15 @@ function Final_UpdateTable( action ) {
var current_docs = {} var current_docs = {}
var BIS_dict = {} var BIS_dict = {}
var path = window.location.pathname.match(/\/project\/(.*)\/corpus\/(.*)\//); var url_elems = window.location.href.split("/")
var projectid = path[1] var url_mainIDs = {}
var corpusid = path[2] for(var i=0; i<url_elems.length; i++) {
// if the this element is a number:
var theurl = "/api/nodes/"+corpusid+"/children/duplicates?keys=title&limit=9999" if(url_elems[i]!="" && !isNaN(Number(url_elems[i]))) {
url_mainIDs[url_elems[i-1]] = Number(url_elems[i]);
}
}
var theurl = "/api/nodes/"+url_mainIDs["corpus"]+"/children/duplicates?keys=title&limit=9999"
// $.ajax({ // $.ajax({
// url: theurl, // url: theurl,
// success: function(data) { // success: function(data) {
...@@ -204,6 +208,38 @@ function transformContent(rec_id , header , content) { ...@@ -204,6 +208,38 @@ function transformContent(rec_id , header , content) {
} else return content; } else return content;
} }
$("#move2trash")
.click(function(){
var ids2trash = []
for(var i in Garbage) {
ids2trash.push(AjaxRecords[i].id);
}
console.log("ids to the trash:")
console.log(ids2trash)
$.ajax({
url: "/tests/move2trash/",
data: "nodeids="+JSON.stringify(ids2trash),
type: 'POST',
beforeSend: function(xhr) {
xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
},
success: function(data) {
console.log("in #move2trash")
console.log(data)
location.reload();
},
error: function(result) {
console.log("Data not found in #move2trash");
console.log(result)
}
});
})
.hide();
//generic enough //generic enough
function ulWriter(rowIndex, record, columns, cellWriter) { function ulWriter(rowIndex, record, columns, cellWriter) {
// pr("\tulWriter: "+record.id) // pr("\tulWriter: "+record.id)
...@@ -224,9 +260,30 @@ function ulWriter(rowIndex, record, columns, cellWriter) { ...@@ -224,9 +260,30 @@ function ulWriter(rowIndex, record, columns, cellWriter) {
return '<tr>' + tr + '</tr>'; return '<tr>' + tr + '</tr>';
} }
// var div__filter_for_search = ''
// div__filter_for_search += '<select data-width="100px" class="selectpicker" multiple data-max-options="1">';
// div__filter_for_search += ' <optgroup label="All" data-max-options="1" selected>';
// div__filter_for_search += ' <option>Title</option>';
// div__filter_for_search += ' <option>Date</option>';
// div__filter_for_search += ' </optgroup>';
// div__filter_for_search += ' <optgroup label="Category" data-max-options="1">';
// div__filter_for_search += ' <option>Title</option>';
// div__filter_for_search += ' <option>Date</option>';
// div__filter_for_search += ' </optgroup>';
// div__filter_for_search += ' <optgroup label="Duplicates" data-max-options="1">';
// div__filter_for_search += ' <option>by DOI</option>';
// div__filter_for_search += ' <option>by Title</option>';
// div__filter_for_search += ' </optgroup>';
// div__filter_for_search += '</select>';
// $("#supmofos").html(div__filter_for_search)
// (3) Get records and hyperdata for paginator // (3) Get records and hyperdata for paginator
$.ajax({ $.ajax({
url: '/tests/paginator/corpus/'+corpusid, url: '/tests/paginator/corpus/'+url_mainIDs["corpus"],
success: function(data){ success: function(data){
console.log(data) console.log(data)
...@@ -235,7 +292,7 @@ function ulWriter(rowIndex, record, columns, cellWriter) { ...@@ -235,7 +292,7 @@ function ulWriter(rowIndex, record, columns, cellWriter) {
var orig_id = parseInt(data.records[i].id) var orig_id = parseInt(data.records[i].id)
var arr_id = parseInt(i) var arr_id = parseInt(i)
RecDict[orig_id] = arr_id; RecDict[orig_id] = arr_id;
data.records[i]["name"] = '<a target="_blank" href="/project/'+projectid+'/corpus/'+ corpusid + '/document/'+orig_id+'">'+data.records[i]["name"]+'</a>' data.records[i]["name"] = '<a target="_blank" href="/project/'+url_mainIDs["project"]+'/corpus/'+ url_mainIDs["corpus"] + '/document/'+orig_id+'">'+data.records[i]["name"]+'</a>'
data.records[i]["del"] = false data.records[i]["del"] = false
var date = data.records[i]["date"]; var date = data.records[i]["date"];
...@@ -247,36 +304,6 @@ function ulWriter(rowIndex, record, columns, cellWriter) { ...@@ -247,36 +304,6 @@ function ulWriter(rowIndex, record, columns, cellWriter) {
// $("#move2trash").prop('disabled', true); // $("#move2trash").prop('disabled', true);
$("#move2trash")
.click(function(){
var ids2trash = []
for(var i in Garbage) {
ids2trash.push(AjaxRecords[i].id);
}
console.log("ids to the trash:")
console.log(ids2trash)
$.ajax({
url: "/tests/move2trash/",
data: "nodeids="+JSON.stringify(ids2trash),
type: 'POST',
beforeSend: function(xhr) {
xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
},
success: function(data) {
console.log("in #move2trash")
console.log(data)
location.reload();
},
error: function(result) {
console.log("Data not found in #move2trash");
console.log(result)
}
});
})
.hide();
var t0 = AjaxRecords[0].date.split("-").map(Number) var t0 = AjaxRecords[0].date.split("-").map(Number)
...@@ -429,6 +456,13 @@ function ulWriter(rowIndex, record, columns, cellWriter) { ...@@ -429,6 +456,13 @@ function ulWriter(rowIndex, record, columns, cellWriter) {
$('<br><br><div class="imadiv"></div>').insertAfter(".dynatable-per-page") $('<br><br><div class="imadiv"></div>').insertAfter(".dynatable-per-page")
$(".dynatable-record-count").insertAfter(".imadiv") $(".dynatable-record-count").insertAfter(".imadiv")
$(".dynatable-pagination-links").insertAfter(".imadiv") $(".dynatable-pagination-links").insertAfter(".imadiv")
// console.log(RecDict)
var the_content = $("#supmofos").html();
$(""+the_content).insertAfter("#dynatable-query-search-my-ajax-table")
$("#supmofos").remove()
// .insertAfter("#dynatable-query-search-my-ajax-table")
} }
}); });
...@@ -159,8 +159,16 @@ function Final_UpdateTable( action ) { ...@@ -159,8 +159,16 @@ function Final_UpdateTable( action ) {
// Get all the duplicates using the Django-Garg API // Get all the duplicates using the Django-Garg API
var current_docs = {} var current_docs = {}
var BIS_dict = {} var BIS_dict = {}
var corpusid = window.location.href.split("corpus")[1].replace(/\//g, '')//replace all the slashes
var theurl = "/api/nodes/"+corpusid+"/children/duplicates?keys=title&limit=9999" var url_elems = window.location.href.split("/")
var url_mainIDs = {}
for(var i=0; i<url_elems.length; i++) {
// if the this element is a number:
if(url_elems[i]!="" && !isNaN(Number(url_elems[i]))) {
url_mainIDs[url_elems[i-1]] = Number(url_elems[i]);
}
}
var theurl = "/api/nodes/"+url_mainIDs["corpus"]+"/children/duplicates?keys=title&limit=9999"
// $.ajax({ // $.ajax({
// url: theurl, // url: theurl,
// success: function(data) { // success: function(data) {
...@@ -364,14 +372,46 @@ $("#Clean_All").click(function(){ ...@@ -364,14 +372,46 @@ $("#Clean_All").click(function(){
$("#Save_All").click(function(){ $("#Save_All").click(function(){
var sum__selected_elems = 0; var sum__selected_elems = 0;
var poubelle = []
for(var i in FlagsBuffer) for(var i in FlagsBuffer)
if (Object.keys(FlagsBuffer[i]).length==0) poubelle.push(i)
sum__selected_elems += Object.keys(FlagsBuffer[i]).length; sum__selected_elems += Object.keys(FlagsBuffer[i]).length;
for(var i in poubelle)
delete FlagsBuffer[poubelle[i]];
if ( sum__selected_elems>0 ) { if ( sum__selected_elems>0 ) {
console.log("") console.log("")
console.log("Do the ajax conexion with API and send this array to be processed:") console.log("Do the ajax conexion with API and send this array to be processed:")
for(var i in FlagsBuffer) {
var real_ids = []
for (var j in FlagsBuffer[i])
real_ids.push( AjaxRecords[j].id );
FlagsBuffer[i] = real_ids
}
console.log(FlagsBuffer) console.log(FlagsBuffer)
console.log("") var list_id = $("#list_id").val()
// '/annotations/lists/'+list_id+'/ngrams/108642'
console.log(window.location.origin+'/annotations/lists/'+list_id+"/multiple")
console.log(real_ids)
$.ajax({
method: "POST",
url: window.location.origin+'/annotations/lists/'+list_id+"/multiple",
data: "to_delete="+JSON.stringify(real_ids),
beforeSend: function(xhr) {
xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
},
success: function(data){
console.log(data)
},
error: function(result) {
console.log("Data not found in #Save_All");
console.log(result)
}
});
// console.log("")
} }
}); });
......
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
{% load staticfiles %} {% load staticfiles %}
<link rel="stylesheet" type="text/css" href="{% static "css/bootstrap.css" %}"> <link rel="stylesheet" type="text/css" href="{% static "css/bootstrap.css" %}">
<link rel="stylesheet" type="text/css" href="{% static "js/bootstrap/bootstrap-select.min.css" %}">
<link rel="stylesheet" type="text/css" href="{% static "css/morris.css" %}"> <link rel="stylesheet" type="text/css" href="{% static "css/morris.css" %}">
<link rel="stylesheet" type="text/css" href="{% static "css/jquery.easy-pie-chart.css"%}"> <link rel="stylesheet" type="text/css" href="{% static "css/jquery.easy-pie-chart.css"%}">
...@@ -132,7 +134,6 @@ th a { ...@@ -132,7 +134,6 @@ th a {
</div> </div>
</div> </div>
<div class="container"> <div class="container">
<div class="row"> <div class="row">
<div class="col-md-4"> <div class="col-md-4">
...@@ -178,8 +179,25 @@ th a { ...@@ -178,8 +179,25 @@ th a {
</div> </div>
</div> </div>
<div id="supmofos">
<select data-width="100px" dir="ltr" class="selectpicker">
<option selected>All</option>
<option>Title</option>
<option>Date</option>
<optgroup label="Duplicates">
<option>by DOI</option>
<option>by Title</option>
</optgroup>
</select>
</div>
<script type="text/javascript" src="{% static "js/jquery/jquery.min.js" %}"></script> <script type="text/javascript" src="{% static "js/jquery/jquery.min.js" %}"></script>
<script src="{% static "js/charts/bootstrap.min.js" %}"></script> <script src="{% static "js/charts/bootstrap.min.js" %}"></script>
<script type="text/javascript" src="{% static "js/bootstrap/bootstrap-select.min.js" %}"></script>
<script type="text/javascript" src="{% static "js/jquery/jquery.dynatable.js" %}"></script> <script type="text/javascript" src="{% static "js/jquery/jquery.dynatable.js" %}"></script>
<!-- custom-lib for dynatable.js and dc.js --> <!-- custom-lib for dynatable.js and dc.js -->
......
...@@ -266,7 +266,7 @@ ...@@ -266,7 +266,7 @@
success: function(data) { success: function(data) {
console.log("in doTheQuery() Ajax.Success:") console.log("in doTheQuery() Ajax.Success:")
console.log(data) console.log(data)
//location.reload(); location.reload();
}, },
error: function(result) { error: function(result) {
console.log("in doTheQuery(). Data not found"); console.log("in doTheQuery(). Data not found");
...@@ -487,7 +487,6 @@ ...@@ -487,7 +487,6 @@
var pubmedifiedQuery = { query : query , string: query } var pubmedifiedQuery = { query : query , string: query }
// console.log(pubmedifiedQuery)
var projectid = window.location.href.split("project")[1].replace(/\//g, '')//replace all the slashes var projectid = window.location.href.split("project")[1].replace(/\//g, '')//replace all the slashes
...@@ -502,7 +501,7 @@ ...@@ -502,7 +501,7 @@
success: function(data) { success: function(data) {
console.log("ajax_success: in testISTEX()") console.log("ajax_success: in testISTEX()")
console.log(data) console.log(data)
//location.reload(); location.reload();
}, },
error: function(result) { error: function(result) {
console.log("in testISTEX(). Data not found"); console.log("in testISTEX(). Data not found");
......
...@@ -50,6 +50,46 @@ from gargantext_web import about ...@@ -50,6 +50,46 @@ from gargantext_web import about
from gargantext_web.api import JsonHttpResponse from gargantext_web.api import JsonHttpResponse
from ngram.lists import listIds, listNgramIds, ngramList , doList
def test_page(request , project_id , corpus_id):
if not request.user.is_authenticated():
return redirect('/login/?next=%s' % request.path)
try:
offset = int(project_id)
offset = int(corpus_id)
except ValueError:
raise Http404()
t = get_template('tests/test_select-boostrap.html')
user = cache.User[request.user.username].id
date = datetime.datetime.now()
project = cache.Node[int(project_id)]
corpus = cache.Node[int(corpus_id)]
type_doc_id = cache.NodeType['Document'].id
number = session.query(func.count(Node.id)).filter(Node.parent_id==corpus_id, Node.type_id==type_doc_id).all()[0][0]
try:
processing = corpus.hyperdata['Processing']
except Exception as error:
print(error)
processing = 0
html = t.render(Context({
'debug': settings.DEBUG,
'user': user,
'date': date,
'project': project,
'corpus' : corpus,
'processing' : processing,
'number' : number,
}))
return HttpResponse(html)
def get_ngrams(request , project_id , corpus_id ): def get_ngrams(request , project_id , corpus_id ):
if not request.user.is_authenticated(): if not request.user.is_authenticated():
return redirect('/login/?next=%s' % request.path) return redirect('/login/?next=%s' % request.path)
...@@ -68,6 +108,13 @@ def get_ngrams(request , project_id , corpus_id ): ...@@ -68,6 +108,13 @@ def get_ngrams(request , project_id , corpus_id ):
corpus = cache.Node[int(corpus_id)] corpus = cache.Node[int(corpus_id)]
type_doc_id = cache.NodeType['Document'].id type_doc_id = cache.NodeType['Document'].id
number = session.query(func.count(Node.id)).filter(Node.parent_id==corpus_id, Node.type_id==type_doc_id).all()[0][0] number = session.query(func.count(Node.id)).filter(Node.parent_id==corpus_id, Node.type_id==type_doc_id).all()[0][0]
lists = dict()
for list_type in ['MiamList', 'StopList']:
list_id = list()
list_id = listIds(user_id=request.user.id, corpus_id=int(corpus_id), typeList=list_type)
lists["%s" % list_id[0][0]] = list_type
try: try:
processing = corpus.hyperdata['Processing'] processing = corpus.hyperdata['Processing']
except Exception as error: except Exception as error:
...@@ -82,11 +129,40 @@ def get_ngrams(request , project_id , corpus_id ): ...@@ -82,11 +129,40 @@ def get_ngrams(request , project_id , corpus_id ):
'corpus' : corpus, 'corpus' : corpus,
'processing' : processing, 'processing' : processing,
'number' : number, 'number' : number,
'list_id': list_id[0][0],
})) }))
return HttpResponse(html) return HttpResponse(html)
def test_test(request , corpus_id , doc_id):
"""Get All for a doc id"""
corpus_id = int(corpus_id)
doc_id = int(doc_id)
lists = dict()
for list_type in ['StopList']:
list_id = list()
list_id = listIds(user_id=request.user.id, corpus_id=int(corpus_id), typeList=list_type)
lists["%s" % list_id[0][0]] = list_type
print(list_id[0][0])
# # # ngrams of list_id of corpus_id:
# commeca = "StopList"
doc_ngram_list = listNgramIds(corpus_id=corpus_id, list_id=list_id[0][0], doc_id=list_id[0][0], user_id=request.user.id)
to_del = {}
for n in doc_ngram_list:
to_del[ n[0] ] = True
print( to_del.keys() )
results = [ "hola" , "mundo" ]
return JsonHttpResponse(results)
def test_ngrams(request , project_id, corpus_id ): def test_ngrams(request , project_id, corpus_id ):
results = ["hola" , "mundo"] results = ["hola" , "mundo"]
...@@ -94,6 +170,17 @@ def test_ngrams(request , project_id, corpus_id ): ...@@ -94,6 +170,17 @@ def test_ngrams(request , project_id, corpus_id ):
whitelist_type_id = cache.NodeType['WhiteList'].id whitelist_type_id = cache.NodeType['WhiteList'].id
document_type_id = cache.NodeType['Document'].id document_type_id = cache.NodeType['Document'].id
corpus_id = int(corpus_id)
lists = dict()
for list_type in ['StopList']:
list_id = list()
list_id = listIds(user_id=request.user.id, corpus_id=int(corpus_id), typeList=list_type)
lists["%s" % list_id[0][0]] = list_type
doc_ngram_list = listNgramIds(corpus_id=corpus_id, list_id=list_id[0][0], doc_id=list_id[0][0], user_id=request.user.id)
StopList = {}
for n in doc_ngram_list:
StopList[ n[0] ] = True
# # 13099 clinical benefits # # 13099 clinical benefits
# # 7492 recent data # # 7492 recent data
# # 14279 brain development # # 14279 brain development
...@@ -145,15 +232,16 @@ def test_ngrams(request , project_id, corpus_id ): ...@@ -145,15 +232,16 @@ def test_ngrams(request , project_id, corpus_id ):
for doc in documents: for doc in documents:
NgramOccs = session.query(Node_Ngram).filter( Node_Ngram.node_id==doc.id).all() NgramOccs = session.query(Node_Ngram).filter( Node_Ngram.node_id==doc.id).all()
for ngram in NgramOccs: for ngram in NgramOccs:
if ngram.ngram_id not in Ngrams_Scores: if ngram.ngram_id not in StopList:
Ngrams_Scores[ngram.ngram_id] = {} if ngram.ngram_id not in Ngrams_Scores:
Ngrams_Scores[ngram.ngram_id]["scores"] = { Ngrams_Scores[ngram.ngram_id] = {}
"occ_sum": 0.0, Ngrams_Scores[ngram.ngram_id]["scores"] = {
"occ_uniq": 0.0, "occ_sum": 0.0,
"tfidf_sum": 0.0 "occ_uniq": 0.0,
} "tfidf_sum": 0.0
Ngrams_Scores[ngram.ngram_id]["scores"]["occ_sum"]+=ngram.weight }
Ngrams_Scores[ngram.ngram_id]["scores"]["occ_uniq"]+=1 Ngrams_Scores[ngram.ngram_id]["scores"]["occ_sum"]+=ngram.weight
Ngrams_Scores[ngram.ngram_id]["scores"]["occ_uniq"]+=1
# print("\t" , ngram.ngram_id , "\t" , ngram.weight ) # print("\t" , ngram.ngram_id , "\t" , ngram.weight )
## Getting the Effective nro de OCCS / >## ## Getting the Effective nro de OCCS / >##
...@@ -177,9 +265,10 @@ def test_ngrams(request , project_id, corpus_id ): ...@@ -177,9 +265,10 @@ def test_ngrams(request , project_id, corpus_id ):
Sum = 0 Sum = 0
NgramTFIDF = session.query(NodeNodeNgram).filter( NodeNodeNgram.nodex_id==corpus_id ).all() NgramTFIDF = session.query(NodeNodeNgram).filter( NodeNodeNgram.nodex_id==corpus_id ).all()
for ngram in NgramTFIDF: for ngram in NgramTFIDF:
Ngrams_Scores[ngram.ngram_id]["scores"]["tfidf_sum"] += ngram.score if ngram.ngram_id not in StopList:
Sum += Ngrams_Scores[ngram.ngram_id]["scores"]["occ_uniq"] Ngrams_Scores[ngram.ngram_id]["scores"]["tfidf_sum"] += ngram.score
# print( "docid:", ngram.nodey_id , ngram.ngram_id , ngram.score) Sum += Ngrams_Scores[ngram.ngram_id]["scores"]["occ_uniq"]
# print( "docid:", ngram.nodey_id , ngram.ngram_id , ngram.score)
# import pprint # import pprint
...@@ -210,13 +299,14 @@ def test_ngrams(request , project_id, corpus_id ): ...@@ -210,13 +299,14 @@ def test_ngrams(request , project_id, corpus_id ):
query = session.query(Ngram).filter(Ngram.id.in_( ngrams_ids )) query = session.query(Ngram).filter(Ngram.id.in_( ngrams_ids ))
ngrams_data = query.all() ngrams_data = query.all()
for ngram in ngrams_data: for ngram in ngrams_data:
occ_uniq = Ngrams_Scores[ngram.id]["scores"]["occ_uniq"] if ngram.id not in StopList:
if occ_uniq > occs_threshold: occ_uniq = Ngrams_Scores[ngram.id]["scores"]["occ_uniq"]
Ngrams_Scores[ngram.id]["name"] = ngram.terms if occ_uniq > occs_threshold:
Ngrams_Scores[ngram.id]["id"] = ngram.id Ngrams_Scores[ngram.id]["name"] = ngram.terms
Ngrams_Scores[ngram.id]["scores"]["tfidf"] = Ngrams_Scores[ngram.id]["scores"]["tfidf_sum"] / occ_uniq Ngrams_Scores[ngram.id]["id"] = ngram.id
del Ngrams_Scores[ngram.id]["scores"]["tfidf_sum"] Ngrams_Scores[ngram.id]["scores"]["tfidf"] = Ngrams_Scores[ngram.id]["scores"]["tfidf_sum"] / occ_uniq
Metrics["ngrams"].append( Ngrams_Scores[ngram.id] ) del Ngrams_Scores[ngram.id]["scores"]["tfidf_sum"]
Metrics["ngrams"].append( Ngrams_Scores[ngram.id] )
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment