Commit cf569279 authored by delanoe's avatar delanoe

[FEAT] API to get ids with search in ngrams.

parent 09b31df8
...@@ -212,7 +212,8 @@ def tfidf(request, corpus_id, ngram_ids): ...@@ -212,7 +212,8 @@ def tfidf(request, corpus_id, ngram_ids):
.query(Node, func.sum(NodeNodeNgram.score)) .query(Node, func.sum(NodeNodeNgram.score))
.join(NodeNodeNgram, NodeNodeNgram.nodey_id == Node.id) .join(NodeNodeNgram, NodeNodeNgram.nodey_id == Node.id)
.filter(NodeNodeNgram.nodex_id == tfidf_id) .filter(NodeNodeNgram.nodex_id == tfidf_id)
.filter(NodeNodeNgram.ngram_id.in_(ngram_ids)) .filter(Node.type_id == cache.NodeType['Document'].id)
.filter(or_(*[NodeNodeNgram.ngram_id==ngram_id for ngram_id in ngram_ids]))
.group_by(Node) .group_by(Node)
.order_by(func.sum(NodeNodeNgram.score).desc()) .order_by(func.sum(NodeNodeNgram.score).desc())
.limit(limit) .limit(limit)
...@@ -221,8 +222,21 @@ def tfidf(request, corpus_id, ngram_ids): ...@@ -221,8 +222,21 @@ def tfidf(request, corpus_id, ngram_ids):
# print("in TFIDF:") # print("in TFIDF:")
# print("\tcorpus_id:",corpus_id) # print("\tcorpus_id:",corpus_id)
# convert query result to a list of dicts # convert query result to a list of dicts
if nodes_query is None:
print("TFIDF error, juste take sums")
nodes_query = (session
.query(Node, func.sum(NodeNgram.weight))
.join(NodeNgram, NodeNgram.node_id == Node.id)
.filter(Node.parent_id == corpus_id)
.filter(Node.type_id == cache.NodeType['Document'].id)
.filter(or_(*[NodeNgram.ngram_id==ngram_id for ngram_id in ngram_ids]))
.group_by(Node)
.order_by(func.sum(NodeNgram.weight).desc())
.limit(limit)
)
for node, score in nodes_query: for node, score in nodes_query:
# print("\t corpus:",corpus_id,"\t",node.name) print("\t corpus:",corpus_id,"\t",node.name)
node_dict = { node_dict = {
'id': node.id, 'id': node.id,
'score': score, 'score': score,
......
...@@ -50,23 +50,4 @@ def ngram_workflow(corpus, n=5000): ...@@ -50,23 +50,4 @@ def ngram_workflow(corpus, n=5000):
# update_state.processing_(corpus, "OCCS local score") # update_state.processing_(corpus, "OCCS local score")
# compute_occs(corpus) # compute_occs(corpus)
#corpus=session.query(Node).filter(Node.id==540420).first()
#corpus=session.query(Node).filter(Node.id==559637).first()
#update_stateprocessing(corpus, 0)
check_stop = False
if check_stop:
stop = get_or_create_node(corpus=corpus,nodetype='StopList')
#session.query(NodeNgram).filter(NodeNgram.node_id==stop.id).delete()
#session.commit()
stop_ngrams = (session.query(Ngram)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.filter(NodeNgram.node_id==stop.id)
.all()
)
print([n for n in stop_ngrams])
...@@ -11,7 +11,7 @@ import datetime ...@@ -11,7 +11,7 @@ import datetime
import copy import copy
from gargantext_web.views import move_to_trash from gargantext_web.views import move_to_trash
from gargantext_web.db import session, Node, NodeNgram, NodeNgramNgram, NodeNodeNgram, Ngram, Hyperdata, Node_Ngram\ from gargantext_web.db import session, cache, Node, NodeNgram, NodeNgramNgram, NodeNodeNgram, Ngram, Hyperdata, Node_Ngram\
, NodeType, Node_Hyperdata , NodeType, Node_Hyperdata
from gargantext_web.validation import validate, ValidationException from gargantext_web.validation import validate, ValidationException
from node import models from node import models
...@@ -139,6 +139,50 @@ class NodesChildrenNgrams(APIView): ...@@ -139,6 +139,50 @@ class NodesChildrenNgrams(APIView):
], ],
}) })
class NodesChildrenNgramsIds(APIView):
def get(self, request, node_id):
# query ngrams
ParentNode = aliased(Node)
ngrams_query = (session
.query(Node.id, func.sum(Node_Ngram.weight).label('count'))
.join(Node_Ngram, Node_Ngram.node_id == Node.id)
.join(Ngram, Ngram.id == Node_Ngram.ngram_id)
.filter(Node.parent_id == node_id)
.filter(Node.type_id == cache.NodeType['Document'].id)
.group_by(Node.id)
# .group_by(Ngram)
.order_by(func.sum(Node_Ngram.weight).desc())
)
# filters
if 'startwith' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.startswith(request.GET['startwith']))
if 'contain' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.contains(request.GET['contain']))
#if 'doesnotcontain' in request.GET:
# ngrams_query = ngrams_query.filter(not_(Ngram.terms.contains(request.GET['doesnotcontain'])))
# pagination
offset = int(request.GET.get('offset', 0))
limit = int(request.GET.get('limit', 20))
total = ngrams_query.count()
# return formatted result
return JsonHttpResponse({
'pagination': {
'offset': offset,
'limit': limit,
'total': total,
},
'data': [
{
'id': node,
'count': count
}
for node, count in ngrams_query[offset : offset+limit]
],
})
from gargantext_web.db import get_or_create_node from gargantext_web.db import get_or_create_node
class Ngrams(APIView): class Ngrams(APIView):
......
...@@ -17,6 +17,7 @@ urlpatterns = patterns('', ...@@ -17,6 +17,7 @@ urlpatterns = patterns('',
url(r'nodes$', api.NodesList.as_view()), url(r'nodes$', api.NodesList.as_view()),
url(r'nodes/(\d+)$', api.Nodes.as_view()), url(r'nodes/(\d+)$', api.Nodes.as_view()),
url(r'nodes/(\d+)/children/ngrams$', api.NodesChildrenNgrams.as_view()), # => repeated children ? url(r'nodes/(\d+)/children/ngrams$', api.NodesChildrenNgrams.as_view()), # => repeated children ?
url(r'nodes/(\d+)/children/ids$', api.NodesChildrenNgramsIds.as_view()), # => repeated children ?
# NGRAMS table & annotations # NGRAMS table & annotations
url(r'node/(\d+)/ngrams$' , ngrams.Ngrams.as_view()), url(r'node/(\d+)/ngrams$' , ngrams.Ngrams.as_view()),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment