Commit b0d02616 authored by delanoe's avatar delanoe

[FEAT] Diff of graph implemented. TODO: change colors in Graph Explorer....

[FEAT] Diff of graph implemented. TODO: change colors in Graph Explorer. Instead of yellow -> red : blue -> red.
parent 69006133
......@@ -11,66 +11,93 @@ import datetime
import ast
import networkx as nx
def intersection(request , corpuses_ids, measure='cooc'):
'''
intersection :: (str(Int) + "a" str(Int)) -> Dict(Ngram.id :: Int, Score :: Int)
intersection = gives the intersection of two graphs
def doc_freq(corpus_id, node_ids):
'''
if request.method == 'POST' and "nodeids" in request.POST and len(request.POST["nodeids"])>0 :
node_ids = [int(i) for i in (ast.literal_eval( request.POST["nodeids"] )) ]
# Here are the visible nodes of the initial semantic map.
corpuses_ids = corpuses_ids.split('a')
corpuses_ids = [int(i) for i in corpuses_ids]
# corpus[1] will be the corpus to compare
doc_freq :: Corpus_id -> [(Ngram_id, Int)]
Given a corpus, compute number of documents that have the ngram in it.
'''
return ( session.query(NodeNgram.ngram_id, func.count(NodeNgram.node_id))
.join(Node, NodeNgram.node_id == Node.id)
.filter( Node.parent_id == corpus_id
, Node.typename== 'DOCUMENT')
.filter( NodeNgram.weight > 0
, NodeNgram.ngram_id.in_(node_ids) )
.group_by(NodeNgram.ngram_id)
.all()
)
def doc_ngram_representativity(corpus_id, node_ids):
'''
doc_ngram_representativity :: Corpus_ID -> Dict Ngram_id Float
Given a corpus, compute part of of documents that have the ngram it it.
'''
nodes_count = ( session.query(Node)
.filter( Node.parent_id == corpus_id
, Node.typename == 'DOCUMENT'
)
.count()
)
result = dict()
for ngram_id, somme in doc_freq(corpus_id, node_ids):
result[ngram_id] = somme / nodes_count
def representativity(corpus_id):
ngrams_data = ( session.query(Ngram.id, NodeNgram)
.join(Node, NodeNgram.node_id == Node.id)
.filter( Node.parent_id == corpus_id
, Node.typename== 'DOCUMENT')
.filter( NodeNgram.weight > 0 )
.filter( Ngram.id.in_(node_ids) )
.group_by(Ngram.id)
.count()
)
nodes_count = ( session.query(Node)
.filter( Node.parent_id == corpus_id
, Node.typename == 'DOCUMENT'
)
.count()
)[0]
result = dict()
for ngram_id, somme in ngrams_data:
result[ngram_id] = somme / nodes_count
return result
return result
def compare_corpora(Corpus_id_A, Corpus_id_B, node_ids):
'''
compare_corpora :: Corpus_id -> Corpus_id -> Dict Ngram_id Float
Given two corpus :
- if corpora are the same, it return :
(dict of document frequency per ngram as key)
- if corpora are different, it returns :
doc_ngram_representativit(Corpus_id_A) / doc_ngram_representativity(Corpus_id_B)
(as dict per ngram as key)
'''
data_0 = representativity(corpuses_ids[0])
data_1 = representativity(corpuses_ids[1])
result = dict()
if int(Corpus_id_A) == int(Corpus_id_B):
for ngram_id, somme in doc_freq(Corpus_id_A, node_ids):
result[ngram_id] = somme
else:
data_A = doc_ngram_representativity(Corpus_id_A, node_ids)
data_B = doc_ngram_representativity(Corpus_id_B, node_ids)
queue = list()
for k in data_0.keys():
if data_1[k] == 0:
for k in data_A.keys():
if k not in data_B.keys():
queue.append(k)
else:
FinalDict[k] = data_0[k] / data_1[k]
result[k] = data_A[k] / data_B[k]
maximum = max([ FinalDict[k] for k in FinalDict.keys()])
maximum = max([ result[k] for k in result.keys()])
minimum = min([ result[k] for k in result.keys()])
for k in queue:
FinalDict[k] = maximum +1
result[k] = minimum
return result
def intersection(request , corpuses_ids, measure='cooc'):
'''
intersection :: (str(Int) + "a" str(Int)) -> Dict(Ngram.id :: Int, Score :: Int)
intersection = returns as Json Http Response the intersection of two graphs
'''
if request.method == 'POST' and "nodeids" in request.POST and len(request.POST["nodeids"])>0 :
print("-" * 100)
print(FinalDict)
node_ids = [int(i) for i in (ast.literal_eval( request.POST["nodeids"] )) ]
# Here are the visible nodes of the initial semantic map.
return JsonHttpResponse(FinalDict)
corpuses_ids = corpuses_ids.split('a')
corpuses_ids = [int(i) for i in corpuses_ids]
# corpus[1] will be the corpus to compare
return JsonHttpResponse(compare_corpora(corpuses_ids[0], corpuses_ids[1], node_ids))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment