Commit 9b2e7006 authored by PkSM3's avatar PkSM3

[UPDATE] GET stoplist|subforms with occs+tfidf

parent aa3fbe2c
...@@ -12,6 +12,8 @@ import datetime ...@@ -12,6 +12,8 @@ import datetime
import copy import copy
import json import json
from gargantext_web.db import cache
from gargantext_web.validation import validate, ValidationException from gargantext_web.validation import validate, ValidationException
from gargantext_web.db import session, Node, NodeNgram, NodeNgramNgram\ from gargantext_web.db import session, Node, NodeNgram, NodeNgramNgram\
...@@ -74,16 +76,94 @@ from rest_framework.decorators import api_view ...@@ -74,16 +76,94 @@ from rest_framework.decorators import api_view
# TODO how to secure REST ? # TODO how to secure REST ?
def get_occtfidf( ngrams , user_id , corpus_id , list_name):
ngram_ids = {}
corpus = session.query(Node).filter( Node.id==corpus_id ).first()
nodes_ngrams = session.query(Ngram).filter(Ngram.id.in_( ngrams ) ).all()
for ngram in nodes_ngrams:
ngram_ids[ngram.id] = {
"id": ngram.id,
"name": ngram.terms
}
# [ Get Uniq_Occs ]
myamlist = session.query(Node).filter(Node.user_id == user_id , Node.parent_id==corpus_id , Node.type_id == cache.NodeType[list_name].id ).first()
Miam = aliased(NodeNgram)
ngrams_occs = (session.query(NodeNgram.ngram_id, func.sum(NodeNgram.weight))
.join(Node, Node.id == NodeNgram.node_id)
.join(Miam, Miam.ngram_id == NodeNgram.ngram_id)
.filter(Node.parent_id == corpus_id, Node.type_id==cache.NodeType['Document'].id)
.filter(Miam.node_id==myamlist.id)
.group_by(NodeNgram.ngram_id)
.all()
)
# [ / Get Uniq_Occs ]
# print([n for n in ngrams_occs])
OCCs = {}
for ngram in ngrams_occs:
try:
ngram_ids [ ngram[0] ][ "occ_uniq" ] = ngram[1]
except:
pass
for i in ngram_ids:
if "occ_uniq" not in ngram_ids[i]:
ngram_ids[i][ "occ_uniq" ] = 1
group_by = []
results = ['id', 'terms']
ngrams_query = (session
.query(Ngram.id, Ngram.terms)
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
.join(Node, Node.id == Node_Ngram.node_id)
)
Tfidf = aliased(NodeNodeNgram)
tfidf_id = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus).id
ngrams_query = (ngrams_query.add_column(Tfidf.score.label('tfidf'))
.join(Tfidf, Tfidf.ngram_id == Ngram.id)
.filter(Tfidf.nodex_id == tfidf_id)
)
group_by.append(Tfidf.score)
results.append('tfidf')
ngrams_query = (ngrams_query.filter(Node.parent_id == corpus_id)
.group_by(Ngram.id, Ngram.terms, *group_by)
)
TheList = aliased(NodeNgram)
list_id = get_or_create_node(nodetype=list_name, corpus=corpus).id
ngrams_query = (ngrams_query.join(TheList, TheList.ngram_id == Ngram.id )
.filter(TheList.node_id == list_id)
)
for ngram in ngrams_query:
try:
ngram_ids [ ngram[0] ][ "tfidf" ] = ngram[2]
except:
pass
for i in ngram_ids:
if "tfidf" not in ngram_ids[i]:
ngram_ids[i][ "tfidf" ] = 0.01
return ngram_ids
class List(APIView): class List(APIView):
def get(self, request, corpus_id , list_name ): def get(self, request, corpus_id , list_name ):
corpus = session.query(Node).filter( Node.id==corpus_id ).first() corpus = session.query(Node).filter( Node.id==corpus_id ).first()
list_name = list_name.title()+"List" list_name = list_name.title()+"List"
node_mapList = get_or_create_node(nodetype=list_name, corpus=corpus ) node_list = get_or_create_node(nodetype=list_name, corpus=corpus )
nodes_in_map = session.query(NodeNgram).filter(NodeNgram.node_id==node_mapList.id ).all() nodes_ngrams = session.query(NodeNgram).filter(NodeNgram.node_id==node_list.id ).all()
results = {}
for node in nodes_in_map: ngram_ids = {}
results[node.ngram_id] = True for node in nodes_ngrams:
return JsonHttpResponse(results) ngram_ids[node.ngram_id] = True
ngrams = [int(i) for i in list(ngram_ids.keys())]
ngram_ids = get_occtfidf( ngrams , request.user.id , corpus_id , list_name)
return JsonHttpResponse(ngram_ids)
class Ngrams(APIView): class Ngrams(APIView):
...@@ -107,7 +187,24 @@ class Ngrams(APIView): ...@@ -107,7 +187,24 @@ class Ngrams(APIView):
the_score = "tfidf" the_score = "tfidf"
if request.GET.get('score', False) != False: if request.GET.get('score', False) != False:
the_score = request.GET['score'] the_score = request.GET['score']
# get the scores # # get the scores
# print( je peux pas prenez les ngrams occs avec l'aliased et get_or_create_node )
# if 'occs' in the_score:
# print("OOOOOOOCCCSSSS:")
# miamlist = session.query(Node).filter(Node.user_id == request.user.id , Node.parent_id==node_id , Node.type_id == cache.NodeType['MiamList'].id ).first()
# print( miamlist )
# Miam = aliased(NodeNgram)
# ngrams_query = ( session.query(NodeNgram.ngram_id, func.sum(NodeNgram.weight))
# .join(Node, Node.id == NodeNgram.node_id)
# .join(Miam, Miam.ngram_id == NodeNgram.ngram_id)
# .filter(Node.parent_id == node_id, Node.type_id==cache.NodeType['Document'].id)
# .filter(Miam.node_id==miamlist.id)
# .group_by(NodeNgram.ngram_id)
# .all()
# )
# for i in ngrams_query:
# print(i)
if 'tfidf' in the_score: if 'tfidf' in the_score:
Tfidf = aliased(NodeNodeNgram) Tfidf = aliased(NodeNodeNgram)
tfidf_id = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus).id tfidf_id = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus).id
...@@ -305,25 +402,15 @@ class Group(APIView): ...@@ -305,25 +402,15 @@ class Group(APIView):
mainNode_sinonims.append( node ) mainNode_sinonims.append( node )
groups["links"][ mainNode ] = mainNode_sinonims groups["links"][ mainNode ] = mainNode_sinonims
# for i in groups["nodes"]:
# print(i)
ngrams = [int(i) for i in list(groups["nodes"].keys())]
groups["nodes"] = get_occtfidf( ngrams , request.user.id , corpus_id , "Group")
return JsonHttpResponse(groups) return JsonHttpResponse(groups)
def post(self, request, node_id): def post(self, request, node_id):
# # input validation
# input = validate(request.DATA, {'data' : {'source': int, 'target': list}})
# group_id = get_group_id(node_id)
# for data in input['data']:
# if data['source'] > 0 and len(data['target']) > 0:
# for target_id in data['target']:
# if target_id > 0:
# session.add(NodeNgramNgram(node_id=group_id, \
# ngramx_id=output['source'], ngramy_id=target_id, score=1))
# session.commit()
# return JsonHttpResponse(True, 201)
# else:
# raise APIException('Missing parameter: "{\'data\' : [\'source\': Int, \'target\': [Int]}"', 400)
return JsonHttpResponse( ["hola" , "mundo"] ) return JsonHttpResponse( ["hola" , "mundo"] )
def delete(self, request, corpus_id): def delete(self, request, corpus_id):
......
...@@ -490,7 +490,7 @@ $("#Save_All").click(function(){ ...@@ -490,7 +490,7 @@ $("#Save_All").click(function(){
FlagsBuffer["inmap"] = {} FlagsBuffer["inmap"] = {}
for(var id in AjaxRecords) { for(var id in AjaxRecords) {
if( ngrams_map[ AjaxRecords[id]["id"] ] ) { if( NGrams["map"][ AjaxRecords[id]["id"] ] ) {
if(AjaxRecords[id]["state"]==0 || AjaxRecords[id]["state"]==2) { if(AjaxRecords[id]["state"]==0 || AjaxRecords[id]["state"]==2) {
FlagsBuffer["outmap"][ AjaxRecords[id].id ] = true FlagsBuffer["outmap"][ AjaxRecords[id].id ] = true
if(AjaxRecords[id]["state"]==2) { if(AjaxRecords[id]["state"]==2) {
...@@ -510,10 +510,10 @@ $("#Save_All").click(function(){ ...@@ -510,10 +510,10 @@ $("#Save_All").click(function(){
} }
} }
// [ = = = = For deleting subforms = = = = ] // [ = = = = For deleting subforms = = = = ]
for(var i in ngrams_groups.links) { for(var i in NGrams["group"].links) {
if(FlagsBuffer["delete"][i]) { if(FlagsBuffer["delete"][i]) {
for(var j in ngrams_groups.links[i] ) { for(var j in NGrams["group"].links[i] ) {
FlagsBuffer["delete"][ngrams_groups.links[i][j]] = true FlagsBuffer["delete"][NGrams["group"].links[i][j]] = true
} }
for(var j in FlagsBuffer["delete"][i] ) { for(var j in FlagsBuffer["delete"][i] ) {
FlagsBuffer["delete"][FlagsBuffer["delete"][i][j]] = true FlagsBuffer["delete"][FlagsBuffer["delete"][i][j]] = true
...@@ -860,7 +860,7 @@ function SearchFilters( elem ) { ...@@ -860,7 +860,7 @@ function SearchFilters( elem ) {
if( MODE == "filter_all") { if( MODE == "filter_all") {
console.clear() console.clear()
var result = Main_test( ngrams_data , ngrams_data.scores.initial , MODE) var result = Main_test( NGrams["main"] , NGrams["main"].scores.initial , MODE)
console.log( result ) console.log( result )
MyTable.data('dynatable').sorts.clear(); MyTable.data('dynatable').sorts.clear();
...@@ -871,19 +871,19 @@ function SearchFilters( elem ) { ...@@ -871,19 +871,19 @@ function SearchFilters( elem ) {
if( MODE == "filter_map-list") { if( MODE == "filter_map-list") {
console.clear() console.clear()
console.log("ngrams_map:") console.log("ngrams_map:")
console.log(ngrams_map) console.log(NGrams["map"])
var sub_ngrams_data = { var sub_ngrams_data = {
"ngrams":[], "ngrams":[],
"scores": $.extend({}, ngrams_data.scores) "scores": $.extend({}, NGrams["main"].scores)
} }
for(var r in ngrams_data.ngrams) { for(var r in NGrams["main"].ngrams) {
if ( ngrams_map[ngrams_data.ngrams[r].id] ) { if ( NGrams["map"][NGrams["main"].ngrams[r].id] ) {
sub_ngrams_data["ngrams"].push( ngrams_data.ngrams[r] ) sub_ngrams_data["ngrams"].push( NGrams["main"].ngrams[r] )
} }
} }
var result = Main_test(sub_ngrams_data , ngrams_data.scores.initial , MODE) var result = Main_test(sub_ngrams_data , NGrams["main"].scores.initial , MODE)
console.log( result ) console.log( result )
// MyTable.data('dynatable').sorts.clear(); // MyTable.data('dynatable').sorts.clear();
// MyTable.data('dynatable').sorts.add('score', 0) // 1=ASCENDING, // MyTable.data('dynatable').sorts.add('score', 0) // 1=ASCENDING,
...@@ -892,40 +892,14 @@ function SearchFilters( elem ) { ...@@ -892,40 +892,14 @@ function SearchFilters( elem ) {
if( MODE == "filter_stop-list") { if( MODE == "filter_stop-list") {
if(Object.keys(ngrams_stop).length<1) { if(Object.keys(NGrams["stop"]).length<1) {
var corpus_id = getIDFromURL( "corpus" )
var someurl = window.location.origin+"/api/node/"+corpus_id+"/ngrams/list/stop";
$.ajax({
type: "GET",
url: someurl,
dataType: "json",
success : function(data, textStatus, jqXHR) {
console.clear()
console.log("ngrams_stop:")
console.log( data )
ngrams_stop = data
var sub_ngrams_data = { var sub_ngrams_data = {
"ngrams":[], "ngrams":[],
"scores": $.extend({}, ngrams_data.scores) "scores": $.extend({}, NGrams["main"].scores)
} }
for(var r in ngrams_data.ngrams) { for(var r in NGrams["main"].ngrams) {
if ( ngrams_stop[ngrams_data.ngrams[r].id] ) { if ( NGrams["stop"][ NGrams["main"].ngrams[r].id ] ) {
sub_ngrams_data["ngrams"].push( ngrams_data.ngrams[r] ) sub_ngrams_data["ngrams"].push( NGrams["main"].ngrams[r] )
}
}
},
error: function(exception) {
console.log("second ajax, exception!: "+exception.status)
}
})
} else {
var sub_ngrams_data = {
"ngrams":[],
"scores": $.extend({}, ngrams_data.scores)
}
for(var r in ngrams_data.ngrams) {
if ( ngrams_stop[ngrams_data.ngrams[r].id] ) {
sub_ngrams_data["ngrams"].push( ngrams_data.ngrams[r] )
} }
} }
} }
...@@ -945,37 +919,35 @@ function getIDFromURL( item ) { ...@@ -945,37 +919,35 @@ function getIDFromURL( item ) {
return pageurl[cid+1]; return pageurl[cid+1];
} }
var StopList = {}
function test_getlist( list_name ) {
// node/(?P<corpus_id>[0-9]+)/ngrams/list/(?P<list_name>\w+)
var corpus_id = getIDFromURL( "corpus" )
var someurl = window.location.origin+"/api/node/"+corpus_id+"/ngrams/list/"+list_name;
$.ajax({
type: "GET",
url: someurl,
dataType: "json",
success : function(data, textStatus, jqXHR) {
console.log( data )
StopList = data
},
error: function(exception) {
console.log("second ajax, exception!: "+exception.status)
}
})
}
// [ = = = = = = = = = = INIT = = = = = = = = = = ] // [ = = = = = = = = = = INIT = = = = = = = = = = ]
var corpus_id = getIDFromURL( "corpus" ) var corpus_id = getIDFromURL( "corpus" )
var url1=window.location.origin+"/api/node/"+corpus_id+"/ngrams/group", var url0=window.location.origin+"/api/node/"+corpus_id+"/ngrams/list/stop",
url1=window.location.origin+"/api/node/"+corpus_id+"/ngrams/group",
url2=window.location.origin+"/api/node/"+corpus_id+"/ngrams/keep", url2=window.location.origin+"/api/node/"+corpus_id+"/ngrams/keep",
url3=window.location.href+"/ngrams.json"; url3=window.location.href+"/ngrams.json";
var ngrams_groups = {}, ngrams_map = {}, ngrams_stop = {} , ngrams_data = {}; var NGrams = {
"group" : {},
"stop" : {},
"miam" : {},
"map" : {},
"scores" : {}
}
$.when( $.when(
$.ajax({
type: "GET",
url: url0,
dataType: "json",
success : function(data, textStatus, jqXHR) { NGrams["stop"] = data },
error: function(exception) {
console.log("first ajax, exception!: "+exception.status)
}
}),
$.ajax({ $.ajax({
type: "GET", type: "GET",
url: url1, url: url1,
dataType: "json", dataType: "json",
success : function(data, textStatus, jqXHR) { ngrams_groups = data }, success : function(data, textStatus, jqXHR) { NGrams["group"] = data },
error: function(exception) { error: function(exception) {
console.log("first ajax, exception!: "+exception.status) console.log("first ajax, exception!: "+exception.status)
} }
...@@ -984,7 +956,7 @@ $.when( ...@@ -984,7 +956,7 @@ $.when(
type: "GET", type: "GET",
url: url2, url: url2,
dataType: "json", dataType: "json",
success : function(data, textStatus, jqXHR) { ngrams_map = data }, success : function(data, textStatus, jqXHR) { NGrams["map"] = data },
error: function(exception) { error: function(exception) {
console.log("first ajax, exception!: "+exception.status) console.log("first ajax, exception!: "+exception.status)
} }
...@@ -993,7 +965,7 @@ $.when( ...@@ -993,7 +965,7 @@ $.when(
type: "GET", type: "GET",
url: url3, url: url3,
dataType: "json", dataType: "json",
success : function(data, textStatus, jqXHR) { ngrams_data = data }, success : function(data, textStatus, jqXHR) { NGrams["main"] = data },
error: function(exception) { error: function(exception) {
console.log("second ajax, exception!: "+exception.status) console.log("second ajax, exception!: "+exception.status)
} }
...@@ -1001,40 +973,40 @@ $.when( ...@@ -1001,40 +973,40 @@ $.when(
).then(function() { ).then(function() {
// Deleting subforms from the ngrams-table, clean start baby! // Deleting subforms from the ngrams-table, clean start baby!
if( Object.keys(ngrams_groups.links).length>0 ) { if( Object.keys(NGrams["group"].links).length>0 ) {
var _forms = { "main":{} , "sub":{} } var _forms = { "main":{} , "sub":{} }
for(var i in ngrams_groups.links) { for(var i in NGrams["group"].links) {
_forms["main"][i] = true _forms["main"][i] = true
for(var j in ngrams_groups.links[i]) { for(var j in NGrams["group"].links[i]) {
_forms["sub"][ ngrams_groups.links[i][j] ] = true _forms["sub"][ NGrams["group"].links[i][j] ] = true
} }
} }
var ngrams_data_ = [] var ngrams_data_ = []
for(var i in ngrams_data.ngrams) { for(var i in NGrams["main"].ngrams) {
if(_forms["sub"][ngrams_data.ngrams[i].id]) { if(_forms["sub"][NGrams["main"].ngrams[i].id]) {
ngrams_groups["nodes"][ngrams_data.ngrams[i].id] = ngrams_data.ngrams[i] NGrams["group"]["nodes"][NGrams["main"].ngrams[i].id] = NGrams["main"].ngrams[i]
} else { } else {
if( _forms["main"][ ngrams_data.ngrams[i].id ] ) if( _forms["main"][ NGrams["main"].ngrams[i].id ] )
ngrams_data.ngrams[i].name = "*"+ngrams_data.ngrams[i].name NGrams["main"].ngrams[i].name = "*"+NGrams["main"].ngrams[i].name
ngrams_data_.push( ngrams_data.ngrams[i] ) ngrams_data_.push( NGrams["main"].ngrams[i] )
} }
} }
ngrams_data.ngrams = ngrams_data_; NGrams["main"].ngrams = ngrams_data_;
} }
if( Object.keys(ngrams_map).length>0 ) { if( Object.keys(NGrams["map"]).length>0 ) {
for(var i in ngrams_data.ngrams) { for(var i in NGrams["main"].ngrams) {
if(ngrams_map[ngrams_data.ngrams[i].id]) { if(NGrams["map"][NGrams["main"].ngrams[i].id]) {
ngrams_data.ngrams[i]["map"] = true NGrams["main"].ngrams[i]["map"] = true
} }
} }
} }
// Building the Score-Selector // Building the Score-Selector //NGrams["scores"]
var FirstScore = ngrams_data.scores.initial var FirstScore = NGrams["main"].scores.initial
var possible_scores = Object.keys( ngrams_data.ngrams[0].scores ); var possible_scores = Object.keys( NGrams["main"].ngrams[0].scores );
var scores_div = '<br><select style="font-size:25px;" class="span1" id="scores_selector">'+"\n"; var scores_div = '<br><select style="font-size:25px;" class="span1" id="scores_selector">'+"\n";
scores_div += "\t"+'<option value="'+FirstScore+'">'+FirstScore+'</option>'+"\n" scores_div += "\t"+'<option value="'+FirstScore+'">'+FirstScore+'</option>'+"\n"
for( var i in possible_scores ) { for( var i in possible_scores ) {
...@@ -1043,8 +1015,8 @@ $.when( ...@@ -1043,8 +1015,8 @@ $.when(
} }
} }
// Initializing the Charts and Table // Initializing the Charts and Table
console.log( ngrams_data ) console.log( NGrams["main"] )
var result = Main_test( ngrams_data , FirstScore , "filter_all") var result = Main_test( NGrams["main"] , FirstScore , "filter_all")
console.log( result ) console.log( result )
// Listener for onchange Score-Selector // Listener for onchange Score-Selector
...@@ -1052,7 +1024,7 @@ $.when( ...@@ -1052,7 +1024,7 @@ $.when(
$("#ScoresBox").html(scores_div) $("#ScoresBox").html(scores_div)
$("#scores_selector").on('change', function() { $("#scores_selector").on('change', function() {
console.log( this.value ) console.log( this.value )
var result = Main_test( ngrams_data , this.value , "filter_all") var result = Main_test( NGrams["main"] , this.value , "filter_all")
console.log( result ) console.log( result )
}); });
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment