Commit 9b2e7006 authored by PkSM3's avatar PkSM3

[UPDATE] GET stoplist|subforms with occs+tfidf

parent aa3fbe2c
......@@ -12,6 +12,8 @@ import datetime
import copy
import json
from gargantext_web.db import cache
from gargantext_web.validation import validate, ValidationException
from gargantext_web.db import session, Node, NodeNgram, NodeNgramNgram\
......@@ -74,16 +76,94 @@ from rest_framework.decorators import api_view
# TODO how to secure REST ?
def get_occtfidf( ngrams , user_id , corpus_id , list_name):
ngram_ids = {}
corpus = session.query(Node).filter( Node.id==corpus_id ).first()
nodes_ngrams = session.query(Ngram).filter(Ngram.id.in_( ngrams ) ).all()
for ngram in nodes_ngrams:
ngram_ids[ngram.id] = {
"id": ngram.id,
"name": ngram.terms
}
# [ Get Uniq_Occs ]
myamlist = session.query(Node).filter(Node.user_id == user_id , Node.parent_id==corpus_id , Node.type_id == cache.NodeType[list_name].id ).first()
Miam = aliased(NodeNgram)
ngrams_occs = (session.query(NodeNgram.ngram_id, func.sum(NodeNgram.weight))
.join(Node, Node.id == NodeNgram.node_id)
.join(Miam, Miam.ngram_id == NodeNgram.ngram_id)
.filter(Node.parent_id == corpus_id, Node.type_id==cache.NodeType['Document'].id)
.filter(Miam.node_id==myamlist.id)
.group_by(NodeNgram.ngram_id)
.all()
)
# [ / Get Uniq_Occs ]
# print([n for n in ngrams_occs])
OCCs = {}
for ngram in ngrams_occs:
try:
ngram_ids [ ngram[0] ][ "occ_uniq" ] = ngram[1]
except:
pass
for i in ngram_ids:
if "occ_uniq" not in ngram_ids[i]:
ngram_ids[i][ "occ_uniq" ] = 1
group_by = []
results = ['id', 'terms']
ngrams_query = (session
.query(Ngram.id, Ngram.terms)
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
.join(Node, Node.id == Node_Ngram.node_id)
)
Tfidf = aliased(NodeNodeNgram)
tfidf_id = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus).id
ngrams_query = (ngrams_query.add_column(Tfidf.score.label('tfidf'))
.join(Tfidf, Tfidf.ngram_id == Ngram.id)
.filter(Tfidf.nodex_id == tfidf_id)
)
group_by.append(Tfidf.score)
results.append('tfidf')
ngrams_query = (ngrams_query.filter(Node.parent_id == corpus_id)
.group_by(Ngram.id, Ngram.terms, *group_by)
)
TheList = aliased(NodeNgram)
list_id = get_or_create_node(nodetype=list_name, corpus=corpus).id
ngrams_query = (ngrams_query.join(TheList, TheList.ngram_id == Ngram.id )
.filter(TheList.node_id == list_id)
)
for ngram in ngrams_query:
try:
ngram_ids [ ngram[0] ][ "tfidf" ] = ngram[2]
except:
pass
for i in ngram_ids:
if "tfidf" not in ngram_ids[i]:
ngram_ids[i][ "tfidf" ] = 0.01
return ngram_ids
class List(APIView):
def get(self, request, corpus_id , list_name ):
corpus = session.query(Node).filter( Node.id==corpus_id ).first()
list_name = list_name.title()+"List"
node_mapList = get_or_create_node(nodetype=list_name, corpus=corpus )
nodes_in_map = session.query(NodeNgram).filter(NodeNgram.node_id==node_mapList.id ).all()
results = {}
for node in nodes_in_map:
results[node.ngram_id] = True
return JsonHttpResponse(results)
node_list = get_or_create_node(nodetype=list_name, corpus=corpus )
nodes_ngrams = session.query(NodeNgram).filter(NodeNgram.node_id==node_list.id ).all()
ngram_ids = {}
for node in nodes_ngrams:
ngram_ids[node.ngram_id] = True
ngrams = [int(i) for i in list(ngram_ids.keys())]
ngram_ids = get_occtfidf( ngrams , request.user.id , corpus_id , list_name)
return JsonHttpResponse(ngram_ids)
class Ngrams(APIView):
......@@ -107,7 +187,24 @@ class Ngrams(APIView):
the_score = "tfidf"
if request.GET.get('score', False) != False:
the_score = request.GET['score']
# get the scores
# # get the scores
# print( je peux pas prenez les ngrams occs avec l'aliased et get_or_create_node )
# if 'occs' in the_score:
# print("OOOOOOOCCCSSSS:")
# miamlist = session.query(Node).filter(Node.user_id == request.user.id , Node.parent_id==node_id , Node.type_id == cache.NodeType['MiamList'].id ).first()
# print( miamlist )
# Miam = aliased(NodeNgram)
# ngrams_query = ( session.query(NodeNgram.ngram_id, func.sum(NodeNgram.weight))
# .join(Node, Node.id == NodeNgram.node_id)
# .join(Miam, Miam.ngram_id == NodeNgram.ngram_id)
# .filter(Node.parent_id == node_id, Node.type_id==cache.NodeType['Document'].id)
# .filter(Miam.node_id==miamlist.id)
# .group_by(NodeNgram.ngram_id)
# .all()
# )
# for i in ngrams_query:
# print(i)
if 'tfidf' in the_score:
Tfidf = aliased(NodeNodeNgram)
tfidf_id = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus).id
......@@ -305,25 +402,15 @@ class Group(APIView):
mainNode_sinonims.append( node )
groups["links"][ mainNode ] = mainNode_sinonims
# for i in groups["nodes"]:
# print(i)
ngrams = [int(i) for i in list(groups["nodes"].keys())]
groups["nodes"] = get_occtfidf( ngrams , request.user.id , corpus_id , "Group")
return JsonHttpResponse(groups)
def post(self, request, node_id):
# # input validation
# input = validate(request.DATA, {'data' : {'source': int, 'target': list}})
# group_id = get_group_id(node_id)
# for data in input['data']:
# if data['source'] > 0 and len(data['target']) > 0:
# for target_id in data['target']:
# if target_id > 0:
# session.add(NodeNgramNgram(node_id=group_id, \
# ngramx_id=output['source'], ngramy_id=target_id, score=1))
# session.commit()
# return JsonHttpResponse(True, 201)
# else:
# raise APIException('Missing parameter: "{\'data\' : [\'source\': Int, \'target\': [Int]}"', 400)
return JsonHttpResponse( ["hola" , "mundo"] )
def delete(self, request, corpus_id):
......
......@@ -490,7 +490,7 @@ $("#Save_All").click(function(){
FlagsBuffer["inmap"] = {}
for(var id in AjaxRecords) {
if( ngrams_map[ AjaxRecords[id]["id"] ] ) {
if( NGrams["map"][ AjaxRecords[id]["id"] ] ) {
if(AjaxRecords[id]["state"]==0 || AjaxRecords[id]["state"]==2) {
FlagsBuffer["outmap"][ AjaxRecords[id].id ] = true
if(AjaxRecords[id]["state"]==2) {
......@@ -510,10 +510,10 @@ $("#Save_All").click(function(){
}
}
// [ = = = = For deleting subforms = = = = ]
for(var i in ngrams_groups.links) {
for(var i in NGrams["group"].links) {
if(FlagsBuffer["delete"][i]) {
for(var j in ngrams_groups.links[i] ) {
FlagsBuffer["delete"][ngrams_groups.links[i][j]] = true
for(var j in NGrams["group"].links[i] ) {
FlagsBuffer["delete"][NGrams["group"].links[i][j]] = true
}
for(var j in FlagsBuffer["delete"][i] ) {
FlagsBuffer["delete"][FlagsBuffer["delete"][i][j]] = true
......@@ -860,7 +860,7 @@ function SearchFilters( elem ) {
if( MODE == "filter_all") {
console.clear()
var result = Main_test( ngrams_data , ngrams_data.scores.initial , MODE)
var result = Main_test( NGrams["main"] , NGrams["main"].scores.initial , MODE)
console.log( result )
MyTable.data('dynatable').sorts.clear();
......@@ -871,19 +871,19 @@ function SearchFilters( elem ) {
if( MODE == "filter_map-list") {
console.clear()
console.log("ngrams_map:")
console.log(ngrams_map)
console.log(NGrams["map"])
var sub_ngrams_data = {
"ngrams":[],
"scores": $.extend({}, ngrams_data.scores)
"scores": $.extend({}, NGrams["main"].scores)
}
for(var r in ngrams_data.ngrams) {
if ( ngrams_map[ngrams_data.ngrams[r].id] ) {
sub_ngrams_data["ngrams"].push( ngrams_data.ngrams[r] )
for(var r in NGrams["main"].ngrams) {
if ( NGrams["map"][NGrams["main"].ngrams[r].id] ) {
sub_ngrams_data["ngrams"].push( NGrams["main"].ngrams[r] )
}
}
var result = Main_test(sub_ngrams_data , ngrams_data.scores.initial , MODE)
var result = Main_test(sub_ngrams_data , NGrams["main"].scores.initial , MODE)
console.log( result )
// MyTable.data('dynatable').sorts.clear();
// MyTable.data('dynatable').sorts.add('score', 0) // 1=ASCENDING,
......@@ -892,40 +892,14 @@ function SearchFilters( elem ) {
if( MODE == "filter_stop-list") {
if(Object.keys(ngrams_stop).length<1) {
var corpus_id = getIDFromURL( "corpus" )
var someurl = window.location.origin+"/api/node/"+corpus_id+"/ngrams/list/stop";
$.ajax({
type: "GET",
url: someurl,
dataType: "json",
success : function(data, textStatus, jqXHR) {
console.clear()
console.log("ngrams_stop:")
console.log( data )
ngrams_stop = data
if(Object.keys(NGrams["stop"]).length<1) {
var sub_ngrams_data = {
"ngrams":[],
"scores": $.extend({}, ngrams_data.scores)
"scores": $.extend({}, NGrams["main"].scores)
}
for(var r in ngrams_data.ngrams) {
if ( ngrams_stop[ngrams_data.ngrams[r].id] ) {
sub_ngrams_data["ngrams"].push( ngrams_data.ngrams[r] )
}
}
},
error: function(exception) {
console.log("second ajax, exception!: "+exception.status)
}
})
} else {
var sub_ngrams_data = {
"ngrams":[],
"scores": $.extend({}, ngrams_data.scores)
}
for(var r in ngrams_data.ngrams) {
if ( ngrams_stop[ngrams_data.ngrams[r].id] ) {
sub_ngrams_data["ngrams"].push( ngrams_data.ngrams[r] )
for(var r in NGrams["main"].ngrams) {
if ( NGrams["stop"][ NGrams["main"].ngrams[r].id ] ) {
sub_ngrams_data["ngrams"].push( NGrams["main"].ngrams[r] )
}
}
}
......@@ -945,37 +919,35 @@ function getIDFromURL( item ) {
return pageurl[cid+1];
}
var StopList = {}
function test_getlist( list_name ) {
// node/(?P<corpus_id>[0-9]+)/ngrams/list/(?P<list_name>\w+)
var corpus_id = getIDFromURL( "corpus" )
var someurl = window.location.origin+"/api/node/"+corpus_id+"/ngrams/list/"+list_name;
$.ajax({
type: "GET",
url: someurl,
dataType: "json",
success : function(data, textStatus, jqXHR) {
console.log( data )
StopList = data
},
error: function(exception) {
console.log("second ajax, exception!: "+exception.status)
}
})
}
// [ = = = = = = = = = = INIT = = = = = = = = = = ]
var corpus_id = getIDFromURL( "corpus" )
var url1=window.location.origin+"/api/node/"+corpus_id+"/ngrams/group",
var url0=window.location.origin+"/api/node/"+corpus_id+"/ngrams/list/stop",
url1=window.location.origin+"/api/node/"+corpus_id+"/ngrams/group",
url2=window.location.origin+"/api/node/"+corpus_id+"/ngrams/keep",
url3=window.location.href+"/ngrams.json";
var ngrams_groups = {}, ngrams_map = {}, ngrams_stop = {} , ngrams_data = {};
var NGrams = {
"group" : {},
"stop" : {},
"miam" : {},
"map" : {},
"scores" : {}
}
$.when(
$.ajax({
type: "GET",
url: url0,
dataType: "json",
success : function(data, textStatus, jqXHR) { NGrams["stop"] = data },
error: function(exception) {
console.log("first ajax, exception!: "+exception.status)
}
}),
$.ajax({
type: "GET",
url: url1,
dataType: "json",
success : function(data, textStatus, jqXHR) { ngrams_groups = data },
success : function(data, textStatus, jqXHR) { NGrams["group"] = data },
error: function(exception) {
console.log("first ajax, exception!: "+exception.status)
}
......@@ -984,7 +956,7 @@ $.when(
type: "GET",
url: url2,
dataType: "json",
success : function(data, textStatus, jqXHR) { ngrams_map = data },
success : function(data, textStatus, jqXHR) { NGrams["map"] = data },
error: function(exception) {
console.log("first ajax, exception!: "+exception.status)
}
......@@ -993,7 +965,7 @@ $.when(
type: "GET",
url: url3,
dataType: "json",
success : function(data, textStatus, jqXHR) { ngrams_data = data },
success : function(data, textStatus, jqXHR) { NGrams["main"] = data },
error: function(exception) {
console.log("second ajax, exception!: "+exception.status)
}
......@@ -1001,40 +973,40 @@ $.when(
).then(function() {
// Deleting subforms from the ngrams-table, clean start baby!
if( Object.keys(ngrams_groups.links).length>0 ) {
if( Object.keys(NGrams["group"].links).length>0 ) {
var _forms = { "main":{} , "sub":{} }
for(var i in ngrams_groups.links) {
for(var i in NGrams["group"].links) {
_forms["main"][i] = true
for(var j in ngrams_groups.links[i]) {
_forms["sub"][ ngrams_groups.links[i][j] ] = true
for(var j in NGrams["group"].links[i]) {
_forms["sub"][ NGrams["group"].links[i][j] ] = true
}
}
var ngrams_data_ = []
for(var i in ngrams_data.ngrams) {
if(_forms["sub"][ngrams_data.ngrams[i].id]) {
ngrams_groups["nodes"][ngrams_data.ngrams[i].id] = ngrams_data.ngrams[i]
for(var i in NGrams["main"].ngrams) {
if(_forms["sub"][NGrams["main"].ngrams[i].id]) {
NGrams["group"]["nodes"][NGrams["main"].ngrams[i].id] = NGrams["main"].ngrams[i]
} else {
if( _forms["main"][ ngrams_data.ngrams[i].id ] )
ngrams_data.ngrams[i].name = "*"+ngrams_data.ngrams[i].name
ngrams_data_.push( ngrams_data.ngrams[i] )
if( _forms["main"][ NGrams["main"].ngrams[i].id ] )
NGrams["main"].ngrams[i].name = "*"+NGrams["main"].ngrams[i].name
ngrams_data_.push( NGrams["main"].ngrams[i] )
}
}
ngrams_data.ngrams = ngrams_data_;
NGrams["main"].ngrams = ngrams_data_;
}
if( Object.keys(ngrams_map).length>0 ) {
for(var i in ngrams_data.ngrams) {
if(ngrams_map[ngrams_data.ngrams[i].id]) {
ngrams_data.ngrams[i]["map"] = true
if( Object.keys(NGrams["map"]).length>0 ) {
for(var i in NGrams["main"].ngrams) {
if(NGrams["map"][NGrams["main"].ngrams[i].id]) {
NGrams["main"].ngrams[i]["map"] = true
}
}
}
// Building the Score-Selector
var FirstScore = ngrams_data.scores.initial
var possible_scores = Object.keys( ngrams_data.ngrams[0].scores );
// Building the Score-Selector //NGrams["scores"]
var FirstScore = NGrams["main"].scores.initial
var possible_scores = Object.keys( NGrams["main"].ngrams[0].scores );
var scores_div = '<br><select style="font-size:25px;" class="span1" id="scores_selector">'+"\n";
scores_div += "\t"+'<option value="'+FirstScore+'">'+FirstScore+'</option>'+"\n"
for( var i in possible_scores ) {
......@@ -1043,8 +1015,8 @@ $.when(
}
}
// Initializing the Charts and Table
console.log( ngrams_data )
var result = Main_test( ngrams_data , FirstScore , "filter_all")
console.log( NGrams["main"] )
var result = Main_test( NGrams["main"] , FirstScore , "filter_all")
console.log( result )
// Listener for onchange Score-Selector
......@@ -1052,7 +1024,7 @@ $.when(
$("#ScoresBox").html(scores_div)
$("#scores_selector").on('change', function() {
console.log( this.value )
var result = Main_test( ngrams_data , this.value , "filter_all")
var result = Main_test( NGrams["main"] , this.value , "filter_all")
console.log( result )
});
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment