Commit 333e9e3d authored by delanoe's avatar delanoe

[FIX] Some bugs with new functions for cooc.

parent f65279d2
...@@ -55,6 +55,7 @@ def cooc(corpus=None ...@@ -55,6 +55,7 @@ def cooc(corpus=None
cooc_query = (session.query(NodeNgramX.ngram_id, NodeNgramY.ngram_id, func.count()) cooc_query = (session.query(NodeNgramX.ngram_id, NodeNgramY.ngram_id, func.count())
.join(Node, Node.id == NodeNgramX.node_id) .join(Node, Node.id == NodeNgramX.node_id)
.join(NodeNgramY, NodeNgramY.node_id == Node.id) .join(NodeNgramY, NodeNgramY.node_id == Node.id)
.filter(Node.parent_id==corpus.id, Node.type_id==doc_id)
) )
...@@ -81,14 +82,14 @@ def cooc(corpus=None ...@@ -81,14 +82,14 @@ def cooc(corpus=None
cooc_query = (cooc_query.filter(Node.parent_id == corpus.id, Node.type_id == doc_id) cooc_query = (cooc_query.filter(Node.parent_id == corpus.id, Node.type_id == doc_id)
.filter(NodeNgramX.ngram_id < NodeNgramY.ngram_id) .filter(NodeNgramX.ngram_id < NodeNgramY.ngram_id)
.group_by(NodeNgramX.ngram_id, NodeNgramY.ngram_id) .group_by(Node.id, NodeNgramX.ngram_id, NodeNgramY.ngram_id)
.order_by(func.count()) .order_by(func.count())
.limit(limit) .limit(limit)
) )
matrix = WeightedMatrix(cooc_query) matrix = WeightedMatrix(cooc_query)
print(matrix) #print(matrix)
if cvalue_id is not None : if cvalue_id is not None :
#miam = get_or_create_node(nodetype='Cvalue', corpus=corpus) #miam = get_or_create_node(nodetype='Cvalue', corpus=corpus)
......
...@@ -181,17 +181,18 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz ...@@ -181,17 +181,18 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
miam_id = get_or_create_node(nodetype='MiamList', corpus=corpus).id miam_id = get_or_create_node(nodetype='MiamList', corpus=corpus).id
stop_id = get_or_create_node(nodetype='StopList', corpus=corpus).id stop_id = get_or_create_node(nodetype='StopList', corpus=corpus).id
group_id = get_or_create_node(nodetype='Group', corpus=corpus).id group_id = get_or_create_node(nodetype='Group', corpus=corpus).id
cooc_id = get_or_create_node(nodetype='Cooccurrence', corpus=corpus).id
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).delete()
#cooc_id = cooc(corpus=corpus, miam_id=miam_id, stop_id=stop_id, limit=size) #cooc_id = cooc(corpus=corpus, miam_id=miam_id, stop_id=stop_id, limit=size)
cooc_id = cooc(corpus=corpus, miam_id=miam_id, group_id=group_id, stop_id=stop_id, limit=size) cooc_id = cooc(corpus=corpus, miam_id=miam_id, group_id=group_id, stop_id=stop_id, limit=size)
#cooc_id = cooc(corpus=corpus, miam_id=miam_id, limit=size) #cooc_id = cooc(corpus=corpus, miam_id=miam_id, limit=size)
print([n for n in session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).all()]) #print([n for n in session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).all()])
for cooccurrence in session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).all(): for cooccurrence in session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).all():
#print(cooccurrence) #print(cooccurrence)
# print(cooccurrence.ngramx.terms," <=> ",cooccurrence.ngramy.terms,"\t",cooccurrence.score) # print(cooccurrence.ngramx.terms," <=> ",cooccurrence.ngramy.terms,"\t",cooccurrence.score)
labels[cooccurrence.ngramx_id] = session.query(Ngram.terms).filter(Ngram.id == cooccurrence.ngramx_id).first()[0] labels[cooccurrence.ngramx_id] = session.query(Ngram.id).filter(Ngram.id == cooccurrence.ngramx_id).first()[0]
labels[cooccurrence.ngramy_id] = session.query(Ngram.terms).filter(Ngram.id == cooccurrence.ngramy_id).first()[0] labels[cooccurrence.ngramy_id] = session.query(Ngram.id).filter(Ngram.id == cooccurrence.ngramy_id).first()[0]
matrix[cooccurrence.ngramx_id][cooccurrence.ngramy_id] = cooccurrence.score matrix[cooccurrence.ngramx_id][cooccurrence.ngramy_id] = cooccurrence.score
matrix[cooccurrence.ngramy_id][cooccurrence.ngramx_id] = cooccurrence.score matrix[cooccurrence.ngramy_id][cooccurrence.ngramx_id] = cooccurrence.score
......
...@@ -76,6 +76,7 @@ urlpatterns = patterns('', ...@@ -76,6 +76,7 @@ urlpatterns = patterns('',
url(r'^ngrams$', views.ngrams), # to be removed url(r'^ngrams$', views.ngrams), # to be removed
url(r'^nodeinfo/(\d+)$', views.nodeinfo), # to be removed ? url(r'^nodeinfo/(\d+)$', views.nodeinfo), # to be removed ?
url(r'^tfidf/(\d+)/(\w+)$', views_optimized.tfidf),
url(r'^project/(\d+)/corpus/(\d+)/(\w+)/update$', views.update_nodes), url(r'^project/(\d+)/corpus/(\d+)/(\w+)/update$', views.update_nodes),
# TODO rest to update corpus and information for progress bar # TODO rest to update corpus and information for progress bar
......
...@@ -15,6 +15,7 @@ from threading import Thread ...@@ -15,6 +15,7 @@ from threading import Thread
from node.admin import CustomForm from node.admin import CustomForm
from gargantext_web.db import * from gargantext_web.db import *
from gargantext_web.db import get_or_create_node
from gargantext_web.settings import DEBUG, MEDIA_ROOT from gargantext_web.settings import DEBUG, MEDIA_ROOT
from rest_v1_0.api import JsonHttpResponse from rest_v1_0.api import JsonHttpResponse
...@@ -28,7 +29,6 @@ from gargantext_web.celery import apply_workflow ...@@ -28,7 +29,6 @@ from gargantext_web.celery import apply_workflow
from admin.utils import ensure_dir from admin.utils import ensure_dir
def project(request, project_id): def project(request, project_id):
# do we have a valid project id? # do we have a valid project id?
...@@ -200,11 +200,15 @@ def tfidf(request, corpus_id, ngram_ids): ...@@ -200,11 +200,15 @@ def tfidf(request, corpus_id, ngram_ids):
# filter input # filter input
ngram_ids = ngram_ids.split('a') ngram_ids = ngram_ids.split('a')
ngram_ids = [int(i) for i in ngram_ids] ngram_ids = [int(i) for i in ngram_ids]
corpus = session.query(Node).filter(Node.id==corpus_id).first()
tfidf_id = get_or_create_node(corpus=corpus, nodetype='Tfidf').id
print(tfidf_id)
# request data # request data
nodes_query = (session nodes_query = (session
.query(Node, func.sum(NodeNodeNgram.score)) .query(Node, func.sum(NodeNodeNgram.score))
.join(NodeNodeNgram, NodeNodeNgram.nodey_id == Node.id) .join(NodeNodeNgram, NodeNodeNgram.nodey_id == Node.id)
.filter(NodeNodeNgram.nodex_id == corpus_id) .filter(NodeNodeNgram.nodex_id == tfidf_id)
.filter(NodeNodeNgram.ngram_id.in_(ngram_ids)) .filter(NodeNodeNgram.ngram_id.in_(ngram_ids))
.group_by(Node) .group_by(Node)
.order_by(func.sum(NodeNodeNgram.score).desc()) .order_by(func.sum(NodeNodeNgram.score).desc())
......
from django.conf.urls import patterns, url from django.conf.urls import patterns, url
from gargantext_web import views_optimized
from rest_v1_0 import api, ngrams from rest_v1_0 import api, ngrams
urlpatterns = patterns('', urlpatterns = patterns('',
...@@ -32,6 +34,6 @@ urlpatterns = patterns('', ...@@ -32,6 +34,6 @@ urlpatterns = patterns('',
url(r'nodes/(\d+)/ngrams$', api.CorpusController.ngrams), url(r'nodes/(\d+)/ngrams$', api.CorpusController.ngrams),
url(r'nodes/(\d+)/ngrams$', api.CorpusController.ngrams), url(r'nodes/(\d+)/ngrams$', api.CorpusController.ngrams),
#url(r'tfidf/(\d+)/(\w+)$', views_optimized.tfidf), url(r'tfidf/(\d+)/(\w+)$', views_optimized.tfidf),
) )
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment