Commit 4f8fdd86 authored by PkSM3's avatar PkSM3

[UPDATE] tfidf ok

parent bfd825b4
...@@ -197,11 +197,11 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150 ...@@ -197,11 +197,11 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150
partition = best_partition(G) partition = best_partition(G)
if type == "node_link": if type == "node_link":
for node in G.nodes(): for node in G.nodes():
try: try:
#node,type(labels[node]) #node,type(labels[node])
G.node[node]['id'] = ids[node] G.node[node]['pk'] = ids[node]
G.node[node]['label'] = node G.node[node]['label'] = node
# G.node[node]['pk'] = ids[str(node)] # G.node[node]['pk'] = ids[str(node)]
G.node[node]['size'] = weight[ids[node]] G.node[node]['size'] = weight[ids[node]]
...@@ -209,8 +209,21 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150 ...@@ -209,8 +209,21 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150
# G.add_edge(node, "cluster " + str(partition[node]), weight=3) # G.add_edge(node, "cluster " + str(partition[node]), weight=3)
except Exception as error: except Exception as error:
print("error01: ",error) print("error01: ",error)
data = json_graph.node_link_data(G) data = json_graph.node_link_data(G)
links = []
i=1
for e in G.edges_iter():
s = e[0]
t = e[1]
info = { "id":i , "source":ids[s] , "target":ids[t]}
# print(info)
links.append(info)
i+=1
# print(data)
data["links"] = []
data["links"] = links
elif type == "adjacency": elif type == "adjacency":
for node in G.nodes(): for node in G.nodes():
......
...@@ -52,7 +52,7 @@ urlpatterns = patterns('', ...@@ -52,7 +52,7 @@ urlpatterns = patterns('',
url(r'^corpus/(\d+)/node_link.json$', views.node_link), # => api.analysis('type': 'node_link', 'format' : 'json') url(r'^corpus/(\d+)/node_link.json$', views.node_link), # => api.analysis('type': 'node_link', 'format' : 'json')
url(r'^corpus/(\d+)/adjacency.json$', views.adjacency), # => api.analysis('type': 'adjacency', 'format' : 'json') url(r'^corpus/(\d+)/adjacency.json$', views.adjacency), # => api.analysis('type': 'adjacency', 'format' : 'json')
url(r'^api/tfidf/(\d+)/(\d+(?:,\d+)+)$', views_optimized.tfidf), url(r'^api/tfidf/(\d+)/(\w+)$', views_optimized.tfidf),
# url(r'^api/tfidf/(\d+)/(\w+)$', views.tfidf), # url(r'^api/tfidf/(\d+)/(\w+)$', views.tfidf),
url(r'^api/tfidf2/(\d+)/(\w+)$', views.tfidf2), url(r'^api/tfidf2/(\d+)/(\w+)$', views.tfidf2),
......
...@@ -13,6 +13,7 @@ from node.admin import CustomForm ...@@ -13,6 +13,7 @@ from node.admin import CustomForm
from gargantext_web.db import * from gargantext_web.db import *
from gargantext_web.settings import DEBUG, MEDIA_ROOT from gargantext_web.settings import DEBUG, MEDIA_ROOT
from gargantext_web.api import JsonHttpResponse from gargantext_web.api import JsonHttpResponse
import json
from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf
...@@ -163,12 +164,15 @@ def project(request, project_id): ...@@ -163,12 +164,15 @@ def project(request, project_id):
'number' : corpora_count, 'number' : corpora_count,
}) })
def tfidf(request, corpus_id, ngram_ids, limit=6): def tfidf(request, corpus_id, ngram_ids):
"""Takes IDs of corpus and ngram and returns list of relevent documents in json format """Takes IDs of corpus and ngram and returns list of relevent documents in json format
according to TFIDF score (order is decreasing). according to TFIDF score (order is decreasing).
""" """
limit=6
nodes_list = []
# filter input # filter input
ngram_ids = ngram_ids.split(',') ngram_ids = ngram_ids.split('a')
ngram_ids = [int(i) for i in ngram_ids]
# request data # request data
nodes_query = (session nodes_query = (session
.query(Node, func.sum(NodeNodeNgram.score)) .query(Node, func.sum(NodeNodeNgram.score))
...@@ -180,7 +184,6 @@ def tfidf(request, corpus_id, ngram_ids, limit=6): ...@@ -180,7 +184,6 @@ def tfidf(request, corpus_id, ngram_ids, limit=6):
.limit(limit) .limit(limit)
) )
# convert query result to a list of dicts # convert query result to a list of dicts
nodes_list = []
for node, score in nodes_query: for node, score in nodes_query:
node_dict = { node_dict = {
'id': node.id, 'id': node.id,
...@@ -190,5 +193,6 @@ def tfidf(request, corpus_id, ngram_ids, limit=6): ...@@ -190,5 +193,6 @@ def tfidf(request, corpus_id, ngram_ids, limit=6):
if key in node.metadata: if key in node.metadata:
node_dict[key] = node.metadata[key] node_dict[key] = node.metadata[key]
nodes_list.append(node_dict) nodes_list.append(node_dict)
# return the result
return JsonHttpResponse(nodes_list) data = json.dumps(nodes_list)
return JsonHttpResponse(data)
...@@ -40,7 +40,7 @@ def getGlobalStats(request ): ...@@ -40,7 +40,7 @@ def getGlobalStats(request ):
alist = ["bar","foo"] alist = ["bar","foo"]
if request.method == "POST": if request.method == "POST":
N = 1000 N = 10
query = request.POST["query"] query = request.POST["query"]
print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" query =", query ) print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" query =", query )
print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" N =", N ) print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" N =", N )
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment