Commit 9ca95e04 authored by delanoe's avatar delanoe

[FEAT] Adding cooc in the rest api to get ngrams.

parent ea5d2621
......@@ -76,76 +76,6 @@ from rest_framework.decorators import api_view
# TODO how to secure REST ?
def get_occtfidf( ngrams , user_id , corpus_id , list_name):
ngram_ids = {}
corpus = session.query(Node).filter( Node.id==corpus_id ).first()
nodes_ngrams = session.query(Ngram).filter(Ngram.id.in_( ngrams ) ).all()
for ngram in nodes_ngrams:
ngram_ids[ngram.id] = {
"id": ngram.id,
"name": ngram.terms,
"scores": {}
}
# [ = = = = = = Get Uniq_Occs = = = = = = ]
myamlist = session.query(Node).filter(Node.user_id == user_id , Node.parent_id==corpus_id , Node.type_id == cache.NodeType[list_name].id ).first()
Miam = aliased(NodeNgram)
ngrams_occs = (session.query(NodeNgram.ngram_id, func.sum(NodeNgram.weight))
.join(Node, Node.id == NodeNgram.node_id)
.join(Miam, Miam.ngram_id == NodeNgram.ngram_id)
.filter(Node.parent_id == corpus_id, Node.type_id==cache.NodeType['Document'].id)
.filter(Miam.node_id==myamlist.id)
.group_by(NodeNgram.ngram_id)
.all()
)
for ngram in ngrams_occs:
try:
ngram_ids [ ngram[0] ]["scores"][ "occ_uniq" ] = ngram[1]
except:
pass
for i in ngram_ids:
if "occ_uniq" not in ngram_ids[i]["scores"]:
ngram_ids[i]["scores"][ "occ_uniq" ] = 1
# [ = = = = = = / Get Uniq_Occs = = = = = = ]
group_by = []
results = ['id', 'terms']
ngrams_query = (session
.query(Ngram.id, Ngram.terms)
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
.join(Node, Node.id == Node_Ngram.node_id)
)
Tfidf = aliased(NodeNodeNgram)
tfidf_id = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus).id
ngrams_query = (ngrams_query.add_column(Tfidf.score.label('tfidf'))
.join(Tfidf, Tfidf.ngram_id == Ngram.id)
.filter(Tfidf.nodex_id == tfidf_id)
)
group_by.append(Tfidf.score)
results.append('tfidf')
ngrams_query = (ngrams_query.filter(Node.parent_id == corpus_id)
.group_by(Ngram.id, Ngram.terms, *group_by)
)
TheList = aliased(NodeNgram)
list_id = get_or_create_node(nodetype=list_name, corpus=corpus).id
ngrams_query = (ngrams_query.join(TheList, TheList.ngram_id == Ngram.id )
.filter(TheList.node_id == list_id)
)
for ngram in ngrams_query:
try:
ngram_ids [ ngram[0] ]["scores"][ "tfidf" ] = ngram[2]
except:
pass
for i in ngram_ids:
if "tfidf" not in ngram_ids[i]["scores"]:
ngram_ids[i]["scores"][ "tfidf" ] = 0.01
return ngram_ids
class List(APIView):
......@@ -168,7 +98,8 @@ class List(APIView):
class Ngrams(APIView):
'''
REST application to manage ngrams
Example :
http://localhost:8000/api/node/1444485/ngrams?format=json&score=tfidf,occurrences
'''
def get(self, request, node_id):
# query ngrams
......@@ -203,6 +134,10 @@ class Ngrams(APIView):
# )
# for i in ngrams_query:
# print(i)
if 'occurrences' in the_score:
occurrences = func.sum(Node_Ngram.weight).label('occurrences')
ngrams_query = (ngrams_query.add_column(occurrences))
results.append('occurences')
if 'tfidf' in the_score:
Tfidf = aliased(NodeNodeNgram)
......@@ -235,7 +170,9 @@ class Ngrams(APIView):
results.append('specificity')
order_query = request.GET.get('order', False)
if order_query == 'cvalue':
if order_query == 'occurrences':
ngrams_query = ngrams_query.order_by(desc(occurrences))
elif order_query == 'cvalue':
ngrams_query = ngrams_query.order_by(desc(Cvalue.score))
elif order_query == 'tfidf':
ngrams_query = ngrams_query.order_by(desc(Tfidf.score))
......@@ -309,6 +246,8 @@ class Ngrams(APIView):
except: pass
try: info["terms"] = ngram.terms
except: pass
try: info["occurrences"] = ngram.occurrences
except: pass
try: info["tfidf"] = ngram.tfidf
except: pass
try: info["cvalue"] = ngram.cvalue
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment