Commit 5d951915 authored by PkSM3's avatar PkSM3

[UPDATE] corpus-comparison: level up!

parent b1483b9a
......@@ -77,6 +77,8 @@ urlpatterns = patterns('',
url(r'^ngrams$', views.ngrams), # to be removed
url(r'^nodeinfo/(\d+)$', views.nodeinfo), # to be removed ?
url(r'^tfidf/(\d+)/(\w+)$', views_optimized.tfidf),
url(r'^api/corpusintersection/(\w+)$', views_optimized.getCorpusIntersection),
url(r'^api/userportfolio/project/(\d+)/corpuses$', views_optimized.getUserPortfolio),
url(r'^project/(\d+)/corpus/(\d+)/(\w+)/update$', views.update_nodes),
# TODO rest to update corpus and information for progress bar
......
......@@ -566,33 +566,14 @@ def graph(request, project_id, corpus_id, generic=100, specific=100):
project_type_id = cache.NodeType['Project'].id
corpus_type_id = cache.NodeType['Corpus'].id
results = {}
projs = session.query(Node).filter(Node.user_id == user_id,Node.type_id==project_type_id).all()
for i in projs:
# print(i.id , i.name)
if i.id not in results: results[i.id] = {}
results[i.id]["proj_name"] = i.name
results[i.id]["corpuses"] = []
corpuses = session.query(Node).filter(Node.parent_id==i.id , Node.type_id==corpus_type_id).all()
for j in corpuses:
if int(j.id)!=int(corpus_id):
info = { "id":j.id , "name":j.name }
results[i.id]["corpuses"].append(info)
# print("\t",j.id , j.name)
# import pprint
# pprint.pprint(results)
# if specific != None and generic != None :
graphurl = "corpus/"+str(corpus_id)+"/node_link.json"
html = t.render(Context({\
'debug': settings.DEBUG,
'user' : user,\
'date' : date,\
'corpus' : corpus,\
'project' : project,\
'corpusinfo' : results,\
'graphfile' : graphurl,\
}))
......
......@@ -195,7 +195,7 @@ def tfidf(request, corpus_id, ngram_ids):
"""Takes IDs of corpus and ngram and returns list of relevent documents in json format
according to TFIDF score (order is decreasing).
"""
limit=6
limit=5
nodes_list = []
# filter input
ngram_ids = ngram_ids.split('a')
......@@ -219,7 +219,7 @@ def tfidf(request, corpus_id, ngram_ids):
# print("\tcorpus_id:",corpus_id)
# convert query result to a list of dicts
for node, score in nodes_query:
print("\t corpus:",corpus_id,"\t",node.name)
# print("\t corpus:",corpus_id,"\t",node.name)
node_dict = {
'id': node.id,
'score': score,
......@@ -229,6 +229,84 @@ def tfidf(request, corpus_id, ngram_ids):
node_dict[key] = node.hyperdata[key]
nodes_list.append(node_dict)
# print("= = = = = = = = \n")
data = json.dumps(nodes_list)
return JsonHttpResponse(nodes_list)
def getCorpusIntersection(request , corpuses_ids):
FinalDict = False
if request.method == 'POST' and "nodeids" in request.POST and len(request.POST["nodeids"])>0:
import ast
node_ids = [int(i) for i in (ast.literal_eval( request.POST["nodeids"] )) ]
# Here are the visible nodes of the initial semantic map.
corpuses_ids = corpuses_ids.split('a')
corpuses_ids = [int(i) for i in corpuses_ids] # corpus[1] will be the corpus to compare
cooc_type_id = cache.NodeType['Cooccurrence'].id
cooc_ids = session.query(Node.id).filter(Node.user_id == request.user.id , Node.parent_id==corpuses_ids[1] , Node.type_id == cooc_type_id ).first()
if len(cooc_ids)==0:
return JsonHttpResponse(FinalDict)
# If corpus[1] has a coocurrence.id then lets continue
FinalDict = {}
import networkx as nx
G = nx.Graph() # I use an undirected graph, because direction doesnt matter here, coocs should be a triangular matrix, so...
ngrams_data1 = session.query(NodeNgramNgram).filter( NodeNgramNgram.node_id==cooc_ids[0], NodeNgramNgram.ngramx_id.in_( node_ids )).all()
for ngram in ngrams_data1: # are there visible nodes in the X-axis of corpus to compare ?
G.add_edge( ngram.ngramx_id , ngram.ngramy_id , weight=ngram.score)
ngrams_data2 = session.query(NodeNgramNgram).filter( NodeNgramNgram.node_id==cooc_ids[0], NodeNgramNgram.ngramy_id.in_( node_ids )).all()
for ngram in ngrams_data2: # are there visible nodes in the Y-axis of corpus to compare ?
if not G.has_edge(ngram.ngramx_id,ngram.ngramy_id):
G.add_edge( ngram.ngramx_id , ngram.ngramy_id , weight=ngram.score)
for e in G.edges_iter():
n1 = e[0]
n2 = e[1]
print( G[n1][n2]["weight"] , "\t", n1,",",n2 )
if n1 not in FinalDict:
FinalDict[n1]=0
if n2 not in FinalDict:
FinalDict[n2]=0
FinalDict[n1]+=G[n1][n2]["weight"]
FinalDict[n2]+=G[n1][n2]["weight"]
for node in FinalDict:
FinalDict[node] = FinalDict[node]/G.degree(node)
# Getting AVG-COOC of each ngram that exists in the cooc-matrix of the compared-corpus.
return JsonHttpResponse(FinalDict)
def getUserPortfolio(request , project_id):
user = request.user
user_id = cache.User[request.user.username].id
project_type_id = cache.NodeType['Project'].id
corpus_type_id = cache.NodeType['Corpus'].id
results = {}
projs = session.query(Node).filter(Node.user_id == user_id,Node.type_id==project_type_id ).all()
for i in projs:
# print (i.id,i.name)
if i.id not in results:
results[i.id] = {}
results[i.id]["proj_name"] = i.name
results[i.id]["corpuses"] = []
corpuses = session.query(Node).filter(Node.parent_id==i.id , Node.type_id==corpus_type_id).all()
for j in corpuses:
doc_count = session.query(func.count(Node.id)).filter(Node.parent_id==j.id).all()[0][0]
if doc_count >= 10:
# print(session.query(Node).filter(Node.id==j.id).first())
info = {
"id":j.id ,
"name":j.name ,
"c":doc_count
}
results[i.id]["corpuses"].append(info)
print("\t\t",j.id , j.name , doc_count)
if len(results[i.id]["corpuses"])==0:
del results[i.id]
return JsonHttpResponse( results )
......@@ -308,9 +308,9 @@
</div>
<div id="topPapers"></div>
<!--
<div id="tab-container-top" class='tab-container'>
<!-- <div id="topPapers"></div> -->
<div id="tab-container-top" class='tab-container' style="display: none;">
<ul class='etabs'>
<li id="tabmed" class='tab active'><a href="#tabs3">Medline Pubs</a></li>
......@@ -326,7 +326,7 @@
</div>
</div>
</div>
-->
......@@ -409,55 +409,22 @@
<div class="modal-header">
<button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>
<h4 class="modal-title">Corpus Comparison</h4>
<h3 class="modal-title">Corpus Comparison Tool</h3>
</div>
<div class="modal-body form-horizontal">
Chose another corpus to compare with:
<div class="form-group">
<label class="col-lg-2 control-label"> </label>
<div class="col-lg-10">
<form id="corpuses_form" role="form">
<ul>
{% if corpusinfo %}
{% for k1, v1 in corpusinfo.items %}
{% if v1.corpuses|length > 0 %}
<br><li><a href="/project/{{k1}}/">{{v1.proj_name}}</a><br>
<ul style="list-style-type: none;">
{% for c in v1.corpuses %}
<li>
<div class="radio">
<label><input type="radio" id="{{c.id}}" name="optradio">
<a href="/project/{{k1}}/corpus/{{c.id}}/">{{c.name}}</a>
</label>
</div>
</li>
{% endfor %}
</ul>
</li>
{% endif %}
{% endfor %}
{% endif %}
</ul>
</form>
<h4>Choose one corpus:</h4>
<div style="color:red;" id="selected_corpus"></div>
<div id="user_portfolio">
</div>
</div>
</div>
<div class="modal-footer">
<button id="closecorpuses" type="button" class="btn btn-default" data-dismiss="modal">Close</button>
<button type="button" class="btn btn-primary" onclick='printCorpuses();'>Add Tab</button>
<button id="add_corpus_tab" type="button" class="btn btn-primary" disabled onclick='printCorpuses();'>Add Tab</button>
</div>
</div>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment