Commit 5d951915 authored by PkSM3's avatar PkSM3

[UPDATE] corpus-comparison: level up!

parent b1483b9a
...@@ -77,6 +77,8 @@ urlpatterns = patterns('', ...@@ -77,6 +77,8 @@ urlpatterns = patterns('',
url(r'^ngrams$', views.ngrams), # to be removed url(r'^ngrams$', views.ngrams), # to be removed
url(r'^nodeinfo/(\d+)$', views.nodeinfo), # to be removed ? url(r'^nodeinfo/(\d+)$', views.nodeinfo), # to be removed ?
url(r'^tfidf/(\d+)/(\w+)$', views_optimized.tfidf), url(r'^tfidf/(\d+)/(\w+)$', views_optimized.tfidf),
url(r'^api/corpusintersection/(\w+)$', views_optimized.getCorpusIntersection),
url(r'^api/userportfolio/project/(\d+)/corpuses$', views_optimized.getUserPortfolio),
url(r'^project/(\d+)/corpus/(\d+)/(\w+)/update$', views.update_nodes), url(r'^project/(\d+)/corpus/(\d+)/(\w+)/update$', views.update_nodes),
# TODO rest to update corpus and information for progress bar # TODO rest to update corpus and information for progress bar
......
...@@ -566,33 +566,14 @@ def graph(request, project_id, corpus_id, generic=100, specific=100): ...@@ -566,33 +566,14 @@ def graph(request, project_id, corpus_id, generic=100, specific=100):
project_type_id = cache.NodeType['Project'].id project_type_id = cache.NodeType['Project'].id
corpus_type_id = cache.NodeType['Corpus'].id corpus_type_id = cache.NodeType['Corpus'].id
results = {}
projs = session.query(Node).filter(Node.user_id == user_id,Node.type_id==project_type_id).all()
for i in projs:
# print(i.id , i.name)
if i.id not in results: results[i.id] = {}
results[i.id]["proj_name"] = i.name
results[i.id]["corpuses"] = []
corpuses = session.query(Node).filter(Node.parent_id==i.id , Node.type_id==corpus_type_id).all()
for j in corpuses:
if int(j.id)!=int(corpus_id):
info = { "id":j.id , "name":j.name }
results[i.id]["corpuses"].append(info)
# print("\t",j.id , j.name)
# import pprint
# pprint.pprint(results)
# if specific != None and generic != None :
graphurl = "corpus/"+str(corpus_id)+"/node_link.json" graphurl = "corpus/"+str(corpus_id)+"/node_link.json"
html = t.render(Context({\ html = t.render(Context({\
'debug': settings.DEBUG, 'debug': settings.DEBUG,
'user' : user,\ 'user' : user,\
'date' : date,\ 'date' : date,\
'corpus' : corpus,\ 'corpus' : corpus,\
'project' : project,\ 'project' : project,\
'corpusinfo' : results,\
'graphfile' : graphurl,\ 'graphfile' : graphurl,\
})) }))
......
...@@ -195,7 +195,7 @@ def tfidf(request, corpus_id, ngram_ids): ...@@ -195,7 +195,7 @@ def tfidf(request, corpus_id, ngram_ids):
"""Takes IDs of corpus and ngram and returns list of relevent documents in json format """Takes IDs of corpus and ngram and returns list of relevent documents in json format
according to TFIDF score (order is decreasing). according to TFIDF score (order is decreasing).
""" """
limit=6 limit=5
nodes_list = [] nodes_list = []
# filter input # filter input
ngram_ids = ngram_ids.split('a') ngram_ids = ngram_ids.split('a')
...@@ -219,7 +219,7 @@ def tfidf(request, corpus_id, ngram_ids): ...@@ -219,7 +219,7 @@ def tfidf(request, corpus_id, ngram_ids):
# print("\tcorpus_id:",corpus_id) # print("\tcorpus_id:",corpus_id)
# convert query result to a list of dicts # convert query result to a list of dicts
for node, score in nodes_query: for node, score in nodes_query:
print("\t corpus:",corpus_id,"\t",node.name) # print("\t corpus:",corpus_id,"\t",node.name)
node_dict = { node_dict = {
'id': node.id, 'id': node.id,
'score': score, 'score': score,
...@@ -229,6 +229,84 @@ def tfidf(request, corpus_id, ngram_ids): ...@@ -229,6 +229,84 @@ def tfidf(request, corpus_id, ngram_ids):
node_dict[key] = node.hyperdata[key] node_dict[key] = node.hyperdata[key]
nodes_list.append(node_dict) nodes_list.append(node_dict)
# print("= = = = = = = = \n")
data = json.dumps(nodes_list)
return JsonHttpResponse(nodes_list) return JsonHttpResponse(nodes_list)
def getCorpusIntersection(request , corpuses_ids):
FinalDict = False
if request.method == 'POST' and "nodeids" in request.POST and len(request.POST["nodeids"])>0:
import ast
node_ids = [int(i) for i in (ast.literal_eval( request.POST["nodeids"] )) ]
# Here are the visible nodes of the initial semantic map.
corpuses_ids = corpuses_ids.split('a')
corpuses_ids = [int(i) for i in corpuses_ids] # corpus[1] will be the corpus to compare
cooc_type_id = cache.NodeType['Cooccurrence'].id
cooc_ids = session.query(Node.id).filter(Node.user_id == request.user.id , Node.parent_id==corpuses_ids[1] , Node.type_id == cooc_type_id ).first()
if len(cooc_ids)==0:
return JsonHttpResponse(FinalDict)
# If corpus[1] has a coocurrence.id then lets continue
FinalDict = {}
import networkx as nx
G = nx.Graph() # I use an undirected graph, because direction doesnt matter here, coocs should be a triangular matrix, so...
ngrams_data1 = session.query(NodeNgramNgram).filter( NodeNgramNgram.node_id==cooc_ids[0], NodeNgramNgram.ngramx_id.in_( node_ids )).all()
for ngram in ngrams_data1: # are there visible nodes in the X-axis of corpus to compare ?
G.add_edge( ngram.ngramx_id , ngram.ngramy_id , weight=ngram.score)
ngrams_data2 = session.query(NodeNgramNgram).filter( NodeNgramNgram.node_id==cooc_ids[0], NodeNgramNgram.ngramy_id.in_( node_ids )).all()
for ngram in ngrams_data2: # are there visible nodes in the Y-axis of corpus to compare ?
if not G.has_edge(ngram.ngramx_id,ngram.ngramy_id):
G.add_edge( ngram.ngramx_id , ngram.ngramy_id , weight=ngram.score)
for e in G.edges_iter():
n1 = e[0]
n2 = e[1]
print( G[n1][n2]["weight"] , "\t", n1,",",n2 )
if n1 not in FinalDict:
FinalDict[n1]=0
if n2 not in FinalDict:
FinalDict[n2]=0
FinalDict[n1]+=G[n1][n2]["weight"]
FinalDict[n2]+=G[n1][n2]["weight"]
for node in FinalDict:
FinalDict[node] = FinalDict[node]/G.degree(node)
# Getting AVG-COOC of each ngram that exists in the cooc-matrix of the compared-corpus.
return JsonHttpResponse(FinalDict)
def getUserPortfolio(request , project_id):
user = request.user
user_id = cache.User[request.user.username].id
project_type_id = cache.NodeType['Project'].id
corpus_type_id = cache.NodeType['Corpus'].id
results = {}
projs = session.query(Node).filter(Node.user_id == user_id,Node.type_id==project_type_id ).all()
for i in projs:
# print (i.id,i.name)
if i.id not in results:
results[i.id] = {}
results[i.id]["proj_name"] = i.name
results[i.id]["corpuses"] = []
corpuses = session.query(Node).filter(Node.parent_id==i.id , Node.type_id==corpus_type_id).all()
for j in corpuses:
doc_count = session.query(func.count(Node.id)).filter(Node.parent_id==j.id).all()[0][0]
if doc_count >= 10:
# print(session.query(Node).filter(Node.id==j.id).first())
info = {
"id":j.id ,
"name":j.name ,
"c":doc_count
}
results[i.id]["corpuses"].append(info)
print("\t\t",j.id , j.name , doc_count)
if len(results[i.id]["corpuses"])==0:
del results[i.id]
return JsonHttpResponse( results )
...@@ -308,9 +308,9 @@ ...@@ -308,9 +308,9 @@
</div> </div>
<div id="topPapers"></div> <!-- <div id="topPapers"></div> -->
<!--
<div id="tab-container-top" class='tab-container'> <div id="tab-container-top" class='tab-container' style="display: none;">
<ul class='etabs'> <ul class='etabs'>
<li id="tabmed" class='tab active'><a href="#tabs3">Medline Pubs</a></li> <li id="tabmed" class='tab active'><a href="#tabs3">Medline Pubs</a></li>
...@@ -326,7 +326,7 @@ ...@@ -326,7 +326,7 @@
</div> </div>
</div> </div>
</div> </div>
-->
...@@ -409,55 +409,22 @@ ...@@ -409,55 +409,22 @@
<div class="modal-header"> <div class="modal-header">
<button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button> <button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>
<h4 class="modal-title">Corpus Comparison</h4> <h3 class="modal-title">Corpus Comparison Tool</h3>
</div> </div>
<div class="modal-body form-horizontal"> <div class="modal-body form-horizontal">
Chose another corpus to compare with: <h4>Choose one corpus:</h4>
<div class="form-group"> <div style="color:red;" id="selected_corpus"></div>
<label class="col-lg-2 control-label"> </label> <div id="user_portfolio">
<div class="col-lg-10">
<form id="corpuses_form" role="form">
<ul>
{% if corpusinfo %}
{% for k1, v1 in corpusinfo.items %}
{% if v1.corpuses|length > 0 %}
<br><li><a href="/project/{{k1}}/">{{v1.proj_name}}</a><br>
<ul style="list-style-type: none;">
{% for c in v1.corpuses %}
<li>
<div class="radio">
<label><input type="radio" id="{{c.id}}" name="optradio">
<a href="/project/{{k1}}/corpus/{{c.id}}/">{{c.name}}</a>
</label>
</div>
</li>
{% endfor %}
</ul>
</li>
{% endif %}
{% endfor %}
{% endif %}
</ul>
</form>
</div>
</div> </div>
</div>
<div class="modal-footer"> <div class="modal-footer">
<button id="closecorpuses" type="button" class="btn btn-default" data-dismiss="modal">Close</button> <button id="closecorpuses" type="button" class="btn btn-default" data-dismiss="modal">Close</button>
<button type="button" class="btn btn-primary" onclick='printCorpuses();'>Add Tab</button> <button id="add_corpus_tab" type="button" class="btn btn-primary" disabled onclick='printCorpuses();'>Add Tab</button>
</div> </div>
</div> </div>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment