Commit 16bac6f4 authored by delanoe's avatar delanoe

[FEAT] Intersection graph.

parent 03c5ffae
......@@ -15,9 +15,11 @@ import csv
_node_available_fields = ['id', 'parent_id', 'name', 'typename', 'hyperdata', 'ngrams']
_node_default_fields = ['id', 'parent_id', 'name', 'typename']
_node_available_formats = ['json', 'csv', 'bibex']
_node_available_types = NODETYPES
#_hyperdata_available_fields = ['title', 'resourcetype']
#_node_available_formats = ['json', 'csv', 'bibex']
def _query_nodes(request, node_id=None):
user = cache.User[request.user.id]
......@@ -25,6 +27,9 @@ def _query_nodes(request, node_id=None):
parameters = get_parameters(request)
parameters = validate(parameters, {'type': dict, 'items': {
'formated': {'type': str, 'required' : False, 'default': 'json'},
# 'hyperdata': {'type': list, 'default' : _hyperdata_available_fields, 'items': {
# 'type': str, 'range' : _node_available_fields,
# }},
'pagination_limit': {'type': int, 'default': 10},
'pagination_offset': {'type': int, 'default': 0},
'fields': {'type': list, 'default': _node_default_fields, 'items': {
......@@ -75,7 +80,7 @@ class NodeListResource(APIView):
'parameters': parameters,
'count': count,
'records': [
{field: getattr(node, field) for field in parameters['fields']}
{ field: getattr(node, field) for field in parameters['fields'] }
for node in query
]
})
......
......@@ -5,128 +5,72 @@ from gargantext.util.db import session, aliased, bulk_insert, func
from gargantext.util.lists import WeightedMatrix, UnweightedList, Translations
from gargantext.util.http import JsonHttpResponse
from sqlalchemy import desc, asc, or_, and_
from sqlalchemy import desc, asc, or_, and_, func
import datetime
import ast
import networkx as nx
def intersection(request , corpuses_ids, measure='cooc'):
FinalDict = False
'''
intersection :: (str(Int) + "a" str(Int)) -> Dict(Ngram.id :: Int, Score :: Int)
intersection = gives the intersection of two graphs
'''
if request.method == 'POST' and "nodeids" in request.POST and len(request.POST["nodeids"])>0 :
import ast
import networkx as nx
node_ids = [int(i) for i in (ast.literal_eval( request.POST["nodeids"] )) ]
# Here are the visible nodes of the initial semantic map.
corpuses_ids = corpuses_ids.split('a')
corpuses_ids = [int(i) for i in corpuses_ids]
print(corpuses_ids)
# corpus[1] will be the corpus to compare
def get_score(corpus_id):
cooc_ids = (session.query(Node.id)
.filter(Node.user_id == request.user.id
, Node.parent_id==corpus_id
, Node.typename == 'COOCCURRENCES' )
.first()
def representativity(corpus_id):
ngrams_data = ( session.query(Ngram.id, NodeNgram)
.join(Node, NodeNgram.node_id == Node.id)
.filter( Node.parent_id == corpus_id
, Node.typename== 'DOCUMENT')
.filter( NodeNgram.weight > 0 )
.filter( Ngram.id.in_(node_ids) )
.group_by(Ngram.id)
.count()
)
nodes_count = ( session.query(Node)
.filter( Node.parent_id == corpus_id
, Node.typename == 'DOCUMENT'
)
.count()
)[0]
result = dict()
for ngram_id, somme in ngrams_data:
result[ngram_id] = somme / nodes_count
if len(cooc_ids)==0:
return JsonHttpResponse(FinalDict)
# If corpus[1] has a coocurrence.id then lets continue
Coocs = {}
return result
G = nx.Graph()
# undirected graph only
# because direction doesnt matter here
# coocs is triangular matrix
data_0 = representativity(corpuses_ids[0])
data_1 = representativity(corpuses_ids[1])
queue = list()
ngrams_data = ( session.query(NodeNgramNgram)
.filter( NodeNgramNgram.node_id==cooc_ids[0]
, or_( NodeNgramNgram.ngram1_id.in_( node_ids )
, NodeNgramNgram.ngram2_id.in_( node_ids )
)
)
.group_by(NodeNgramNgram)
.all()
)
for ngram in ngrams_data :
# are there visible nodes in the X-axis of corpus to compare ?
G.add_edge( ngram.ngram1_id , ngram.ngram2_id , weight=ngram.weight)
print(corpus_id, ngram)
for e in G.edges_iter() :
n1 = e[0]
n2 = e[1]
# print( G[n1][n2]["weight"] , "\t", n1,",",n2 )
if n1 not in Coocs :
Coocs[n1] = 0
if n2 not in Coocs :
Coocs[n2] = 0
Coocs[n1] += G[n1][n2]["weight"]
Coocs[n2] += G[n1][n2]["weight"]
return(Coocs,G)
Coocs_0,G_0 = get_score( corpuses_ids[0] )
Coocs_1,G_1 = get_score( corpuses_ids[1] )
FinalDict = {}
if measure == 'jacquard':
for node in node_ids :
if node in G_1.nodes() and node in G_0.nodes():
neighbors_0 = set(G_0.neighbors(node))
neighbors_1 = set(G_1.neighbors(node))
jacquard = len(neighbors_0.intersection(neighbors_1)) / len(neighbors_0.union(neighbors_1))
FinalDict[node] = jacquard * 3
elif node in G_0.nodes() and node not in G_1.nodes() :
FinalDict[node] = 2
elif node not in G_0.nodes() and node in G_1.nodes() :
FinalDict[node] = 1
else:
FinalDict[node] = 0
elif measure == 'degree':
for node in node_ids :
if node in G_1.nodes() and node in G_0.nodes():
score_0 = Coocs_0[node] / G_0.degree(node)
score_1 = Coocs_1[node] / G_1.degree(node)
FinalDict[node] = 5 * score_0 / score_1
elif node in G_0.nodes() and node not in G_1.nodes() :
FinalDict[node] = 0.5
elif node not in G_0.nodes() and node in G_1.nodes() :
FinalDict[node] = 0.2
else:
FinalDict[node] = 0
elif measure == 'cooc':
for node in node_ids :
if node in G_1.nodes() and node in G_0.nodes():
#FinalDict[node] = Coocs_1[node] / Coocs_0[node]
FinalDict[node] = Coocs_0[node] / Coocs_1[node]
elif node in G_0.nodes() and node not in G_1.nodes() :
FinalDict[node] = 0.0
elif node not in G_0.nodes() and node in G_1.nodes() :
FinalDict[node] = 0.0
else:
FinalDict[node] = 0
for k in data_0.keys():
if data_1[k] == 0:
queue.append(k)
else:
FinalDict[k] = data_0[k] / data_1[k]
maximum = max([ FinalDict[k] for k in FinalDict.keys()])
for k in queue:
FinalDict[k] = maximum +1
print("-" * 100)
print(FinalDict)
#print(node,score)
# Getting AVG-COOC of each ngram that exists in the cooc-matrix of the compared-corpus.
return JsonHttpResponse(FinalDict)
return JsonHttpResponse(FinalDict)
......@@ -815,6 +815,7 @@ function GetUserPortfolio() {
return true;
var query_url = window.location.origin+'/api/nodes?types[]=PROJECT&types[]=CORPUS&pagination_limit=100'
// var query_url = window.location.origin+'/api/nodes?types[]=PROJECT&types[]=CORPUS&pagination_limit=100&fields[]=hyperdata'
$.ajax({
type: 'GET',
dataType : 'JSON',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment