Commit 311d73ec authored by Romain Loth's avatar Romain Loth

nodes api: merge 31de1848 (status) and bf3ce4bd (publications)

parent 389fb0c4
from gargantext.models import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNode from gargantext.models import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNode
from gargantext.constants import NODETYPES from gargantext.constants import NODETYPES, DEFAULT_N_DOCS_HAVING_NGRAM
from gargantext.util.db import session, delete, func, bulk_insert from gargantext.util.db import session, delete, func, bulk_insert
from gargantext.util.db_cache import cache, or_ from gargantext.util.db_cache import cache, or_
from gargantext.util.validation import validate from gargantext.util.validation import validate
...@@ -192,6 +192,8 @@ class NodeListHaving(APIView): ...@@ -192,6 +192,8 @@ class NodeListHaving(APIView):
Simple implementation: Simple implementation:
Takes IDs of corpus and ngram and returns list of relevent documents in json format Takes IDs of corpus and ngram and returns list of relevent documents in json format
according to TFIDF score (order is decreasing). according to TFIDF score (order is decreasing).
2016-09: add total counts to output json
''' '''
def get(self, request, corpus_id): def get(self, request, corpus_id):
parameters = get_parameters(request) parameters = get_parameters(request)
...@@ -202,7 +204,7 @@ class NodeListHaving(APIView): ...@@ -202,7 +204,7 @@ class NodeListHaving(APIView):
except : except :
raise ValidationException('"ngram_ids" needs integers separated by comma.') raise ValidationException('"ngram_ids" needs integers separated by comma.')
limit=5 limit = DEFAULT_N_DOCS_HAVING_NGRAM
nodes_list = [] nodes_list = []
corpus = session.query(Node).filter(Node.id==corpus_id).first() corpus = session.query(Node).filter(Node.id==corpus_id).first()
...@@ -225,26 +227,18 @@ class NodeListHaving(APIView): ...@@ -225,26 +227,18 @@ class NodeListHaving(APIView):
.filter(Node.typename == 'DOCUMENT', Node.parent_id== corpus.id) .filter(Node.typename == 'DOCUMENT', Node.parent_id== corpus.id)
.filter(or_(*[NodeNodeNgram.ngram_id==ngram_id for ngram_id in ngram_ids])) .filter(or_(*[NodeNodeNgram.ngram_id==ngram_id for ngram_id in ngram_ids]))
.group_by(Node) .group_by(Node)
.order_by(func.sum(NodeNodeNgram.score).desc())
.limit(limit)
) )
# print("\n")
# print("in TFIDF:") # get the total count before applying limit
# print("\tcorpus_id:",corpus_id) nodes_count = nodes_query.count()
# convert query result to a list of dicts
# if nodes_query is None: # now the query with the limit
# print("TFIDF error, juste take sums") nodes_results_query = (nodes_query
# nodes_query = (session .order_by(func.sum(NodeNodeNgram.score).desc())
# .query(Node, func.sum(NodeNgram.weight)) .limit(limit)
# .join(NodeNgram, NodeNgram.node_id == Node.id) )
# .filter(Node.parent_id == corpus_id)
# .filter(Node.typename == 'DOCUMENT') for node, score in nodes_results_query:
# .filter(or_(*[NodeNgram.ngram_id==ngram_id for ngram_id in ngram_ids]))
# .group_by(Node)
# .order_by(func.sum(NodeNgram.weight).desc())
# .limit(limit)
# )
for node, score in nodes_query:
print(node,score) print(node,score)
print("\t corpus:",corpus_id,"\t",node.name) print("\t corpus:",corpus_id,"\t",node.name)
node_dict = { node_dict = {
...@@ -256,7 +250,10 @@ class NodeListHaving(APIView): ...@@ -256,7 +250,10 @@ class NodeListHaving(APIView):
node_dict[key] = node.hyperdata[key] node_dict[key] = node.hyperdata[key]
nodes_list.append(node_dict) nodes_list.append(node_dict)
return JsonHttpResponse(nodes_list) return JsonHttpResponse({
'count': nodes_count,
'records': nodes_list
})
...@@ -531,7 +528,8 @@ class CorpusFacet(APIView): ...@@ -531,7 +528,8 @@ class CorpusFacet(APIView):
# check that the hyperfield parameter makes sense # check that the hyperfield parameter makes sense
_facet_available_subfields = [ _facet_available_subfields = [
'journal', 'publication_year', 'rubrique', 'journal', 'publication_year', 'rubrique',
'language_iso2', 'language_iso3', 'language_name' 'language_iso2', 'language_iso3', 'language_name',
'authors'
] ]
parameters = get_parameters(request) parameters = get_parameters(request)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment