Commit 9d7f655e authored by delanoe's avatar delanoe

[FIX] fix todos, clean code and tests of ngram filters by document

	modifié :         ngram/lists.py
	modifié :         test-list-management.py
parent cef06119
...@@ -60,6 +60,7 @@ def listIds(typeList=None, user_id=None, corpus_id=None): ...@@ -60,6 +60,7 @@ def listIds(typeList=None, user_id=None, corpus_id=None):
# Some functions to manage ngrams according to the lists # Some functions to manage ngrams according to the lists
def listNgramIds(list_id=None, typeList=None, def listNgramIds(list_id=None, typeList=None,
corpus_id=None, doc_id=None, user_id=None): corpus_id=None, doc_id=None, user_id=None):
''' '''
...@@ -137,34 +138,28 @@ def ngramList(do, list_id, ngram_ids=None) : ...@@ -137,34 +138,28 @@ def ngramList(do, list_id, ngram_ids=None) :
language='en') language='en')
ngram_ids += [ngram.id] ngram_ids += [ngram.id]
# TODO there should not be a try/except here, let the code crash as soon as possible for ngram_id in ngram_ids:
try: # Fetch the ngram from database
for ngram_id in ngram_ids: ngram = session.query(Ngram.id, Ngram.terms, func.count()).filter(Ngram.id == ngram_id).first()
# Fetch the ngram from database # Need to be optimized with list of ids
ngram = session.query(Ngram.id, Ngram.terms, func.count()).filter(Ngram.id == ngram_id).first() node_ngram = (session.query(NodeNgram)
# Need to be optimized with list of ids .filter(NodeNgram.ngram_id == ngram_id)
node_ngram = (session.query(NodeNgram) .filter(NodeNgram.node_id == list_id)
.filter(NodeNgram.ngram_id == ngram_id) .first()
.filter(NodeNgram.node_id == list_id) )
.first() # create NodeNgram if does not exists
) if node_ngram is None :
# create NodeNgram if does not exists node_ngram = NodeNgram(node_id = list_id, ngram_id=ngram_id,
if node_ngram is None : weight=1)
node_ngram = NodeNgram(node_id = list_id, ngram_id=ngram_id, if do == 'add' :
weight=1) session.add(node_ngram)
if do == 'add' : results += [ngram]
session.add(node_ngram)
results += [ngram]
elif do == 'del' : elif do == 'del' :
session.delete(node_ngram) session.delete(node_ngram)
session.commit() session.commit()
return(results) return(results)
except Exception as exc:
PrintException()
raise exc
# Some functions to manage automatically the lists # Some functions to manage automatically the lists
...@@ -341,3 +336,6 @@ def doList( ...@@ -341,3 +336,6 @@ def doList(
bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], query) bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], query)
return(list_dict[type_list]['id']) return(list_dict[type_list]['id'])
...@@ -4,11 +4,6 @@ from admin.env import * ...@@ -4,11 +4,6 @@ from admin.env import *
from ngram.stemLem import * from ngram.stemLem import *
from ngram.lists import * from ngram.lists import *
#from cooccurrences import *
from gargantext_web.views import empty_trash
empty_trash()
#user = session.query(User).all()[0] #user = session.query(User).all()[0]
user = session.query(User).filter(User.username=='alexandre').first() user = session.query(User).filter(User.username=='alexandre').first()
...@@ -59,7 +54,7 @@ if corpus is None: ...@@ -59,7 +54,7 @@ if corpus is None:
compute_tfidf(corpus) compute_tfidf(corpus)
doc_id = session.query(Node.id).filter(Node.parent_id == corpus.id, doc_id = session.query(Node.id).filter(Node.parent_id == corpus.id,
Node.type_id == cache.NodeType['Document'].id).all()[1] Node.type_id == cache.NodeType['Document'].id).all()[2]
print('Miam list', listIds(typeList='MiamList', corpus_id=corpus.id, user_id=user.id)[0][0]) print('Miam list', listIds(typeList='MiamList', corpus_id=corpus.id, user_id=user.id)[0][0])
...@@ -93,26 +88,13 @@ print('MiamList', miam_list_id) ...@@ -93,26 +88,13 @@ print('MiamList', miam_list_id)
print(session.query(Node.id).filter(Node.parent_id==corpus.id, Node.type_id==cache.NodeType['WhiteList'].id).first()) print(session.query(Node.id).filter(Node.parent_id==corpus.id, Node.type_id==cache.NodeType['WhiteList'].id).first())
#ngrams2miam(user_id=user.id, corpus_id=corpus.id) #ngrams2miam(user_id=user.id, corpus_id=corpus.id)
#print(listNgramIds(list_id=stop_list_id, user_id=user.id, corpus_id=corpus.id))
doc_ngram_list = listNgramIds(corpus_id=corpus.id, doc_id=doc_id, user_id=user.id)
print(doc_ngram_list)
#print(listNgramIds(list_id=stop_list_id, user_id=user.id, corpus_id=corpus.id))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment