Commit 017614ef authored by PkSM3's avatar PkSM3

Merge branch 'unstable' of ssh://delanoe.org:1979/gargantext into samuel

parents 6d312af8 4cf882c2
...@@ -212,7 +212,8 @@ def tfidf(request, corpus_id, ngram_ids): ...@@ -212,7 +212,8 @@ def tfidf(request, corpus_id, ngram_ids):
.query(Node, func.sum(NodeNodeNgram.score)) .query(Node, func.sum(NodeNodeNgram.score))
.join(NodeNodeNgram, NodeNodeNgram.nodey_id == Node.id) .join(NodeNodeNgram, NodeNodeNgram.nodey_id == Node.id)
.filter(NodeNodeNgram.nodex_id == tfidf_id) .filter(NodeNodeNgram.nodex_id == tfidf_id)
.filter(NodeNodeNgram.ngram_id.in_(ngram_ids)) .filter(Node.type_id == cache.NodeType['Document'].id)
.filter(or_(*[NodeNodeNgram.ngram_id==ngram_id for ngram_id in ngram_ids]))
.group_by(Node) .group_by(Node)
.order_by(func.sum(NodeNodeNgram.score).desc()) .order_by(func.sum(NodeNodeNgram.score).desc())
.limit(limit) .limit(limit)
...@@ -221,8 +222,21 @@ def tfidf(request, corpus_id, ngram_ids): ...@@ -221,8 +222,21 @@ def tfidf(request, corpus_id, ngram_ids):
# print("in TFIDF:") # print("in TFIDF:")
# print("\tcorpus_id:",corpus_id) # print("\tcorpus_id:",corpus_id)
# convert query result to a list of dicts # convert query result to a list of dicts
if nodes_query is None:
print("TFIDF error, juste take sums")
nodes_query = (session
.query(Node, func.sum(NodeNgram.weight))
.join(NodeNgram, NodeNgram.node_id == Node.id)
.filter(Node.parent_id == corpus_id)
.filter(Node.type_id == cache.NodeType['Document'].id)
.filter(or_(*[NodeNgram.ngram_id==ngram_id for ngram_id in ngram_ids]))
.group_by(Node)
.order_by(func.sum(NodeNgram.weight).desc())
.limit(limit)
)
for node, score in nodes_query: for node, score in nodes_query:
# print("\t corpus:",corpus_id,"\t",node.name) print("\t corpus:",corpus_id,"\t",node.name)
node_dict = { node_dict = {
'id': node.id, 'id': node.id,
'score': score, 'score': score,
......
from gargantext_web.db import session, cache, get_cursor
from gargantext_web.db import Node, NodeNgram, NodeNodeNgram
from gargantext_web.db import get_or_create_node
from admin.utils import DebugTime
def compute_occs(corpus):
dbg = DebugTime('Corpus #%d - OCCURRENCES' % corpus.id)
dbg.show('Calculate occurrences')
occs_node = get_or_create_node(nodetype='Occurrences', corpus=corpus)
#print(occs_node.id)
(session.query(NodeNodeNgram)
.filter(NodeNodeNgram.nodex_id==occs_node.id).delete()
)
session.commit()
db, cursor = get_cursor()
cursor.execute('''
INSERT INTO
%s (nodex_id, nodey_id, ngram_id, score)
SELECT
%d AS nodex_id,
%d AS nodey_id,
nodengram.ngram_id AS ngram_id,
SUM(nodengram.weight) AS score
FROM
%s AS nodengram
INNER JOIN
%s AS node ON nodengram.node_id = node.id
WHERE
node.parent_id = %d
AND
node.type_id = %d
GROUP BY
nodengram.ngram_id
''' % ( NodeNodeNgram.__table__.name
, occs_node.id, corpus.id
, NodeNgram.__table__.name
, Node.__table__.name
, corpus.id
, cache.NodeType['Document'].id
)
)
db.commit()
#data = session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==occs_node.id).all()
#print([n for n in data])
from gargantext_web.db import session
from gargantext_web.db import Ngram, NodeNgram, NodeNgramNgram
from gargantext_web.db import Ngram, NodeNgramNgram from gargantext_web.db import get_cursor, bulk_insert, get_or_create_node
from gargantext_web.db import get_cursor, bulk_insert def insert_ngrams_to_list(list_of_ngrams, corpus, list_type='MapList', erase=True):
'''
Works only for Stop and Map
'''
list_node = get_or_create_node(corpus=corpus, nodetype=list_type)
group_node = get_or_create_node(corpus=corpus, nodetype='GroupList')
group_list = (session.query(NodeNgramNgram.ngramy_id)
.filter(NodeNgramNgram.id==group_node.id)
.all()
)
#print(list_node)
if erase == True:
session.query(NodeNgram).filter(NodeNgram.node_id==list_node.id).delete()
session.commit()
def get_id(ngram):
query = session.query(Ngram.id).filter(Ngram.terms==ngram).first()
return(query)
list_to_insert = list()
for ngram in list_of_ngrams:
ngram_candidate = get_id(ngram)
if ngram_candidate is not None:
ngram_id = ngram_candidate[0]
if ngram_id is not None and ngram_id not in group_list:
list_to_insert.append((list_node.id, ngram_id, 1))
#print(list_to_insert)
db, cursor = get_cursor()
bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], [n for n in list_to_insert])
def insert_ngrams(ngrams,get='terms-id'): def insert_ngrams(ngrams,get='terms-id'):
''' '''
...@@ -111,28 +142,3 @@ def insert_nodengramngram(nodengramngram): ...@@ -111,28 +142,3 @@ def insert_nodengramngram(nodengramngram):
db.commit() db.commit()
#def queryNodeNodeNgram(nodeMeasure_id=None, corpus_id=None, limit=None):
# '''
# queryNodeNodeNgram :: Int -> Int -> Int -> (Int, String, Float)
# Get list of ngrams according to a measure related to the corpus: maybe tfidf
# cvalue.
# '''
# query = (session.query(Ngram.id, Ngram.terms, NodeNodeNgram.score)
# .join(NodeNodeNgram, NodeNodeNgram.ngram_id == Ngram.id)
# .join(Node, Node.id == NodeNodeNgram.nodex_id)
# .filter(NodeNodeNgram.nodex_id == nodeMeasure_id)
# .filter(NodeNodeNgram.nodey_id == corpus_id)
# .group_by(Ngram.id, Ngram.terms, NodeNodeNgram.score)
# .order_by(desc(NodeNodeNgram.score))
# )
#
# if limit is None:
# query = query.count()
# elif limit == 0 :
# query = query.all()
# else:
# query = query.limit(limit)
#
# return(query)
#
...@@ -50,23 +50,4 @@ def ngram_workflow(corpus, n=5000): ...@@ -50,23 +50,4 @@ def ngram_workflow(corpus, n=5000):
# update_state.processing_(corpus, "OCCS local score") # update_state.processing_(corpus, "OCCS local score")
# compute_occs(corpus) # compute_occs(corpus)
#corpus=session.query(Node).filter(Node.id==540420).first()
#corpus=session.query(Node).filter(Node.id==559637).first()
#update_stateprocessing(corpus, 0)
check_stop = False
if check_stop:
stop = get_or_create_node(corpus=corpus,nodetype='StopList')
#session.query(NodeNgram).filter(NodeNgram.node_id==stop.id).delete()
#session.commit()
stop_ngrams = (session.query(Ngram)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.filter(NodeNgram.node_id==stop.id)
.all()
)
print([n for n in stop_ngrams])
...@@ -11,7 +11,7 @@ import datetime ...@@ -11,7 +11,7 @@ import datetime
import copy import copy
from gargantext_web.views import move_to_trash from gargantext_web.views import move_to_trash
from gargantext_web.db import session, Node, NodeNgram, NodeNgramNgram, NodeNodeNgram, Ngram, Hyperdata, Node_Ngram\ from gargantext_web.db import session, cache, Node, NodeNgram, NodeNgramNgram, NodeNodeNgram, Ngram, Hyperdata, Node_Ngram\
, NodeType, Node_Hyperdata , NodeType, Node_Hyperdata
from gargantext_web.validation import validate, ValidationException from gargantext_web.validation import validate, ValidationException
from node import models from node import models
...@@ -139,6 +139,50 @@ class NodesChildrenNgrams(APIView): ...@@ -139,6 +139,50 @@ class NodesChildrenNgrams(APIView):
], ],
}) })
class NodesChildrenNgramsIds(APIView):
def get(self, request, node_id):
# query ngrams
ParentNode = aliased(Node)
ngrams_query = (session
.query(Node.id, func.sum(Node_Ngram.weight).label('count'))
.join(Node_Ngram, Node_Ngram.node_id == Node.id)
.join(Ngram, Ngram.id == Node_Ngram.ngram_id)
.filter(Node.parent_id == node_id)
.filter(Node.type_id == cache.NodeType['Document'].id)
.group_by(Node.id)
# .group_by(Ngram)
.order_by(func.sum(Node_Ngram.weight).desc())
)
# filters
if 'startwith' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.startswith(request.GET['startwith']))
if 'contain' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.contains(request.GET['contain']))
#if 'doesnotcontain' in request.GET:
# ngrams_query = ngrams_query.filter(not_(Ngram.terms.contains(request.GET['doesnotcontain'])))
# pagination
offset = int(request.GET.get('offset', 0))
limit = int(request.GET.get('limit', 20))
total = ngrams_query.count()
# return formatted result
return JsonHttpResponse({
'pagination': {
'offset': offset,
'limit': limit,
'total': total,
},
'data': [
{
'id': node,
'count': count
}
for node, count in ngrams_query[offset : offset+limit]
],
})
from gargantext_web.db import get_or_create_node from gargantext_web.db import get_or_create_node
class Ngrams(APIView): class Ngrams(APIView):
......
...@@ -17,6 +17,7 @@ urlpatterns = patterns('', ...@@ -17,6 +17,7 @@ urlpatterns = patterns('',
url(r'nodes$', api.NodesList.as_view()), url(r'nodes$', api.NodesList.as_view()),
url(r'nodes/(\d+)$', api.Nodes.as_view()), url(r'nodes/(\d+)$', api.Nodes.as_view()),
url(r'nodes/(\d+)/children/ngrams$', api.NodesChildrenNgrams.as_view()), # => repeated children ? url(r'nodes/(\d+)/children/ngrams$', api.NodesChildrenNgrams.as_view()), # => repeated children ?
url(r'nodes/(\d+)/children/ids$', api.NodesChildrenNgramsIds.as_view()), # => repeated children ?
# NGRAMS table & annotations # NGRAMS table & annotations
url(r'node/(\d+)/ngrams$' , ngrams.Ngrams.as_view()), url(r'node/(\d+)/ngrams$' , ngrams.Ngrams.as_view()),
......
#!/bin/bash
FILE=$(date +%Y%m%d-%H:%M:%S.log)
source /srv/gargantext_env/bin/activate
touch /var/log/gargantext/celery/$FILE && uwsgi gargantext.ini >> $FILE
#!/bin/bash
FILE=$(date +%Y%m%d-%H:%M:%S.log)
touch /var/log/gargantext/uwsgi/$FILE && uwsgi gargantext.ini >> $FILE
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment