Commit da2b3252 authored by delanoe's avatar delanoe

[FIX] Remove session with session.remove() when each function is over.

parent ea335122
......@@ -200,3 +200,4 @@ def do_cooc(corpus=None
cooc = matrix
cooc.save(node_cooc.id)
return(node_cooc.id)
session.remove()
......@@ -9,7 +9,6 @@ import numpy as np
import collections
session = get_session()
def result2dict(query):
results = dict()
......@@ -27,6 +26,7 @@ def diachronic_specificity(corpus_id, terms, order=True):
Values are measure to indicate diachronic specificity.
Nowadays, the measure is rather simple: distance of frequency of period from mean of frequency of all corpus.
'''
session = get_session()
ngram_frequency_query = (session
.query(Node.hyperdata['publication_year'], func.count('*'))
.join(NodeNgram, Node.id == NodeNgram.node_id)
......@@ -64,6 +64,7 @@ def diachronic_specificity(corpus_id, terms, order=True):
else:
return relative_terms_count
session.remove()
# For tests
# diachronic_specificity(102750, "bayer", order=True)
......
......@@ -76,6 +76,7 @@ class Translations(BaseClass):
self.groups = defaultdict(set)
for key, value in self.items.items():
self.groups[value].add(key)
session.remove()
elif isinstance(other, Translations):
self.items = other.items.copy()
self.groups = other.groups.copy()
......@@ -128,6 +129,7 @@ class Translations(BaseClass):
('node_id', 'ngramy_id', 'ngramx_id', 'score'),
((node_id, key, value, 1.0) for key, value in self.items.items())
)
session.remove()
class WeightedMatrix(BaseClass):
......@@ -144,6 +146,7 @@ class WeightedMatrix(BaseClass):
self.items = defaultdict(lambda: defaultdict(float))
for key1, key2, value in self.items.items():
self.items[key1][key2] = value
session.remove()
elif isinstance(other, WeightedMatrix):
self.items = defaultdict(lambda: defaultdict(float))
for key1, key2, value in other:
......@@ -171,6 +174,7 @@ class WeightedMatrix(BaseClass):
('node_id', 'ngramx_id', 'ngramy_id', 'score'),
((node_id, key1, key2, value) for key1, key2, value in self)
)
session.remove()
def __radd__(self, other):
result = NotImplemented
......@@ -253,6 +257,7 @@ class UnweightedList(BaseClass):
.filter(NodeNgram.node_id == other)
)
self.items = {row[0] for row in query}
session.remove()
elif isinstance(other, WeightedList):
self.items = set(other.items.keys())
elif isinstance(other, UnweightedList):
......@@ -337,6 +342,7 @@ class UnweightedList(BaseClass):
('node_id', 'ngram_id', 'weight'),
((node_id, key, 1.0) for key in self.items)
)
session.remove()
class WeightedList(BaseClass):
......@@ -351,6 +357,7 @@ class WeightedList(BaseClass):
.filter(NodeNgram.node_id == other)
)
self.items = defaultdict(float, query)
session.remove()
elif isinstance(other, WeightedList):
self.items = other.items.copy()
elif isinstance(other, UnweightedList):
......@@ -451,6 +458,7 @@ class WeightedList(BaseClass):
('node_id', 'ngram_id', 'weight'),
((node_id, key, value) for key, value in self.items.items())
)
session.remove()
def test():
......
......@@ -63,12 +63,12 @@ class NgramEdit(APIView):
"""
renderer_classes = (JSONRenderer,)
authentication_classes = (SessionAuthentication, BasicAuthentication)
session = get_session()
def post(self, request, list_id, ngram_ids):
"""
Edit an existing NGram in a given list
"""
session = get_session()
list_id = int(list_id)
list_node = session.query(Node).filter(Node.id==list_id).first()
# TODO add 1 for MapList social score ?
......@@ -90,6 +90,8 @@ class NgramEdit(APIView):
'uuid': ngram_id,
'list_id': list_id,
} for ngram_id in ngram_ids)
session.remove()
def put(self, request, list_id, ngram_ids):
return Response(None, 204)
......@@ -98,6 +100,7 @@ class NgramEdit(APIView):
"""
Delete a ngram from a list
"""
session = get_session()
print("to del",ngram_ids)
for ngram_id in ngram_ids.split('+'):
print('ngram_id', ngram_id)
......@@ -128,6 +131,7 @@ class NgramEdit(APIView):
# [ = = = = / del from map-list = = = = ]
return Response(None, 204)
session.remove()
class NgramCreate(APIView):
"""
......@@ -135,7 +139,6 @@ class NgramCreate(APIView):
"""
renderer_classes = (JSONRenderer,)
authentication_classes = (SessionAuthentication, BasicAuthentication)
session = get_session()
def post(self, request, list_id):
"""
......@@ -143,6 +146,7 @@ class NgramCreate(APIView):
example: request.data = {'text': 'phylogeny'}
"""
session = get_session()
list_id = int(list_id)
# format the ngram's text
ngram_text = request.data.get('text', None)
......@@ -177,6 +181,7 @@ class NgramCreate(APIView):
'list_id': list_id,
})
session.remove()
class Document(APIView):
"""
......@@ -186,6 +191,7 @@ class Document(APIView):
def get(self, request, doc_id):
"""Document by ID"""
session = get_session()
node = session.query(Node).filter(Node.id == doc_id).first()
if node is None:
raise APIException('This node does not exist', 404)
......@@ -207,5 +213,5 @@ class Document(APIView):
'id': node.id
}
return Response(data)
session.remove()
......@@ -20,7 +20,7 @@ def apply_sum(x, y):
print(x+y)
session = get_session()
print(session.query(Node.name).first())
session.remove()
from parsing.corpustools import parse_resources, extract_ngrams #add_resource,
from ngram.lists import ngrams2miam
......@@ -52,7 +52,8 @@ def apply_workflow(corpus_id):
print("End of the Workflow for corpus %d" % (corpus_id))
update_state.processing_(corpus, "0")
session.remove()
@shared_task
def empty_trash(corpus_id):
......
......@@ -168,6 +168,7 @@ class ModelCache(dict):
raise KeyError
self[key] = element
return element
session.remove()
def preload(self):
self.clear()
......@@ -176,6 +177,7 @@ class ModelCache(dict):
for column_name in self._columns_names:
key = getattr(element, column_name)
self[key] = element
session.remove()
class Cache():
......@@ -243,8 +245,11 @@ def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hy
name_str :: String
hyperdata :: Dict
'''
sessionToRemove = False
if session is None:
session = get_session()
sessionToRemove = True
if nodetype is None:
print("Need to give a type node")
......@@ -285,3 +290,7 @@ def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hy
session.commit()
#print(parent_id, n.parent_id, n.id, n.name)
return(node)
if sessionToRemove:
session.remove()
......@@ -220,6 +220,7 @@ def projects(request):
Each project is described with hyperdata that are updateded on each following view.
To each project, we can link a resource that can be an image.
'''
session = get_session()
if not request.user.is_authenticated():
return redirect('/auth/')
......@@ -230,7 +231,6 @@ def projects(request):
date = datetime.datetime.now()
# print(Logger.write("STATIC_ROOT"))
session = get_session()
projects = session.query(Node).filter(Node.user_id == user_id, Node.type_id == project_type_id).order_by(Node.date).all()
number = len(projects)
......@@ -288,7 +288,7 @@ def projects(request):
'common_projects':common_projects,
'common_users':common_users,
})
session.remove()
def update_nodes(request, project_id, corpus_id, view=None):
'''
......@@ -297,10 +297,11 @@ def update_nodes(request, project_id, corpus_id, view=None):
- permanent deletion of Trash
'''
session = get_session()
if not request.user.is_authenticated():
return redirect('/login/?next=%s' % request.path)
session = get_session()
try:
offset = int(project_id)
offset = int(corpus_id)
......@@ -358,8 +359,12 @@ def update_nodes(request, project_id, corpus_id, view=None):
# context_instance=RequestContext(request)
# )
#
session.remove()
def corpus(request, project_id, corpus_id):
session = get_session()
if not request.user.is_authenticated():
return redirect('/login/?next=%s' % request.path)
......@@ -378,7 +383,6 @@ def corpus(request, project_id, corpus_id):
type_doc_id = cache.NodeType['Document'].id
session = get_session()
number = session.query(func.count(Node.id)).filter(Node.parent_id==corpus_id, Node.type_id==type_doc_id).all()[0][0]
......@@ -405,15 +409,15 @@ def corpus(request, project_id, corpus_id):
}))
return HttpResponse(html)
session.remove()
def newpaginatorJSON(request , corpus_id):
results = ["hola" , "mundo"]
session = get_session()
# t = get_template('tests/newpag/thetable.html')
# project = session.query(Node).filter(Node.id==project_id).first()
session = get_session()
corpus = session.query(Node).filter(Node.id==corpus_id).first()
type_document_id = cache.NodeType['Document'].id
user_id = request.user.id
......@@ -464,11 +468,11 @@ def newpaginatorJSON(request , corpus_id):
"totalRecordCount":len(results)
}
return JsonHttpResponse(finaldict)
session.remove()
def move_to_trash(node_id):
session = get_session()
try:
session = get_session()
node = session.query(Node).filter(Node.id == node_id).first()
previous_type_id = node.type_id
......@@ -486,9 +490,14 @@ def move_to_trash(node_id):
#return(previous_type_id)
except Exception as error:
print("can not move to trash Node" + str(node_id) + ":" + str(error))
session.remove()
def move_to_trash_multiple(request):
session = get_session()
user = request.user
if not user.is_authenticated():
return redirect('/login/?next=%s' % request.path)
......@@ -498,7 +507,6 @@ def move_to_trash_multiple(request):
nodes2trash = json.loads(request.POST["nodeids"])
print("nodes to the trash:")
print(nodes2trash)
session = get_session()
nodes = session.query(Node).filter(Node.id.in_(nodes2trash)).all()
for node in nodes:
node.type_id = cache.NodeType['Trash'].id
......@@ -509,13 +517,15 @@ def move_to_trash_multiple(request):
results = ["tudo","fixe"]
return JsonHttpResponse(results)
session.remove()
def delete_node(request, node_id):
session = get_session()
# do we have a valid user?
user = request.user
session = get_session()
node = session.query(Node).filter(Node.id == node_id).first()
if not user.is_authenticated():
......@@ -531,7 +541,8 @@ def delete_node(request, node_id):
return HttpResponseRedirect('/project/' + str(node_parent_id))
else:
return HttpResponseRedirect('/projects/')
session.remove()
def delete_corpus(request, project_id, node_id):
# ORM Django
......@@ -553,11 +564,12 @@ def delete_corpus(request, project_id, node_id):
def chart(request, project_id, corpus_id):
''' Charts to compare, filter, count'''
session = get_session()
t = get_template('chart.html')
user = request.user
date = datetime.datetime.now()
session = get_session()
project = session.query(Node).filter(Node.id==project_id).first()
corpus = session.query(Node).filter(Node.id==corpus_id).first()
......@@ -569,13 +581,15 @@ def chart(request, project_id, corpus_id):
'corpus' : corpus,
}))
return HttpResponse(html)
session.remove()
def sankey(request, corpus_id):
session = get_session()
t = get_template('sankey.html')
user = request.user
date = datetime.datetime.now()
session = get_session()
corpus = session.query(Node).filter(Node.id==corpus_id).first()
html = t.render(Context({\
......@@ -586,15 +600,15 @@ def sankey(request, corpus_id):
}))
return HttpResponse(html)
session.remove()
def matrix(request, project_id, corpus_id):
session = get_session()
t = get_template('matrix.html')
user = request.user
date = datetime.datetime.now()
session = get_session()
project = session.query(Node).filter(Node.id==project_id).first()
corpus = session.query(Node).filter(Node.id==corpus_id).first()
......@@ -607,13 +621,15 @@ def matrix(request, project_id, corpus_id):
}))
return HttpResponse(html)
session.remove()
def graph(request, project_id, corpus_id, generic=100, specific=100):
session = get_session()
t = get_template('explorer.html')
user = request.user
date = datetime.datetime.now()
session = get_session()
project = session.query(Node).filter(Node.id==project_id).first()
corpus = session.query(Node).filter(Node.id==corpus_id).first()
......@@ -638,6 +654,7 @@ def graph(request, project_id, corpus_id, generic=100, specific=100):
}))
return HttpResponse(html)
session.remove()
def exploration(request):
t = get_template('exploration.html')
......@@ -672,12 +689,13 @@ def corpus_csv(request, project_id, corpus_id):
'''
Create the HttpResponse object with the appropriate CSV header.
'''
session = get_session()
response = HttpResponse(content_type='text/csv')
response['Content-Disposition'] = 'attachment; filename="corpus.csv"'
writer = csv.writer(response)
session = get_session()
corpus_id = session.query(Node.id).filter(Node.id==corpus_id).first()
type_document_id = cache.NodeType['Document'].id
documents = session.query(Node).filter(Node.parent_id==corpus_id, Node.type_id==type_document_id).all()
......@@ -700,6 +718,7 @@ def corpus_csv(request, project_id, corpus_id):
return response
session.remove()
def send_csv(request, corpus_id):
'''
......@@ -748,17 +767,17 @@ def node_link(request, corpus_id):
'''
Create the HttpResponse object with the node_link dataset.
'''
data = []
session = get_session()
data = []
corpus = session.query(Node).filter(Node.id==corpus_id).first()
data = get_cooc(request=request, corpus=corpus, type="node_link")
return JsonHttpResponse(data)
session.remove()
def sankey_csv(request, corpus_id):
data = []
session = get_session()
data = []
corpus = session.query(Node).filter(Node.id==corpus_id).first()
data = [
["source", "target", "value"]
......@@ -775,6 +794,7 @@ def sankey_csv(request, corpus_id):
, ["Theme_3", "Reco_par_5", 1]
]
return(CsvHttpResponse(data))
session.remove()
def adjacency(request, corpus_id):
'''
......
......@@ -199,6 +199,8 @@ def project(request, project_id):
'number' : corpora_count,
})
session.remove()
def tfidf(request, corpus_id, ngram_ids):
"""Takes IDs of corpus and ngram and returns list of relevent documents in json format
according to TFIDF score (order is decreasing).
......@@ -254,11 +256,13 @@ def tfidf(request, corpus_id, ngram_ids):
nodes_list.append(node_dict)
return JsonHttpResponse(nodes_list)
session.remove()
def getCorpusIntersection(request , corpuses_ids):
FinalDict = False
session = get_session()
FinalDict = False
if request.method == 'POST' and "nodeids" in request.POST and len(request.POST["nodeids"])>0:
import ast
......@@ -303,15 +307,16 @@ def getCorpusIntersection(request , corpuses_ids):
# Getting AVG-COOC of each ngram that exists in the cooc-matrix of the compared-corpus.
return JsonHttpResponse(FinalDict)
session.remove()
def getUserPortfolio(request , project_id):
session = get_session()
user = request.user
user_id = cache.User[request.user.username].id
project_type_id = cache.NodeType['Project'].id
corpus_type_id = cache.NodeType['Corpus'].id
results = {}
session = get_session()
projs = session.query(Node).filter(Node.user_id == user_id,Node.type_id==project_type_id ).all()
......@@ -349,3 +354,4 @@ def getUserPortfolio(request , project_id):
return JsonHttpResponse( results )
session.remove()
......@@ -41,10 +41,11 @@ def getNgrams(corpus=None, limit=1000):
'''
getNgrams :: Corpus -> [(Int, String, String, Float)]
'''
session = get_session()
terms = dict()
tfidf_node = get_or_create_node(nodetype='Tfidf (global)'
, corpus=corpus)
session = get_session()
#print(corpus.name)
ngrams = (session.query(Ngram.id, Ngram.terms, func.sum(NodeNgram.weight), NodeNodeNgram.score)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
......@@ -64,6 +65,7 @@ def getNgrams(corpus=None, limit=1000):
except:
PrintException()
return(terms)
session.remove()
def compute_cvalue(corpus=None, limit=1000):
'''
......@@ -130,7 +132,7 @@ def compute_cvalue(corpus=None, limit=1000):
#bulk_insert(NodeNodeNgram, ['nodex_id', 'nodey_id', 'ngram_id', 'score'], [n for n in islice(result,0,100)])
bulk_insert(NodeNodeNgram, ['nodex_id', 'nodey_id', 'ngram_id', 'score'], [n for n in result])
session.remove()
# test
#corpus=session.query(Node).filter(Node.id==244250).first()
#computeCvalue(corpus)
......@@ -52,10 +52,11 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem'):
'''
group ngrams according to a function (stemming or lemming)
'''
session = get_session()
dbg = DebugTime('Corpus #%d - group' % corpus.id)
dbg.show('Group')
session = get_session()
#spec,cvalue = getNgrams(corpus, limit_inf=limit_inf, limit_sup=limit_sup)
#list_to_check=cvalue.union(spec)
......@@ -138,3 +139,5 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem'):
, [data for data in group_to_insert])
bulk_insert(NodeNgram, ('node_id', 'ngram_id', 'weight'), [data for data in list(miam_to_insert)])
session.remove()
......@@ -131,7 +131,7 @@ def exportNgramLists(node,filename,delimiter="\t"):
# csv_rows = [[ligne1_a, ligne1_b..],[ligne2_a, ligne2_b..],..]
return csv_rows
session.remove()
# on applique notre fonction ng_to_csv sur chaque liste
# ------------------------------------------------------
......@@ -380,9 +380,7 @@ def importNgramLists(node,filename,delimiter="\t", del_lists=[]):
print("INFO: added %i elements in the lists indices" % added_nd_ng)
print("INFO: added %i new ngrams in the lexicon" % added_ng)
session.remove()
# à chronométrer:
......
......@@ -59,6 +59,9 @@ def listIds(typeList=None, user_id=None, corpus_id=None):
else:
raise Exception("Usage (Warning): Need corpus_id and user_id")
session.remove()
# Some functions to manage ngrams according to the lists
......@@ -118,6 +121,8 @@ def listNgramIds(list_id=None, typeList=None,
)
return(query.all())
session.remove()
def ngramList(do, list_id, ngram_ids=None) :
'''
......@@ -129,8 +134,9 @@ def ngramList(do, list_id, ngram_ids=None) :
ngram_id = [Int] : list of Ngrams id (Ngrams.id)
list_id = Int : list id (Node.id)
'''
results = []
session = get_session()
results = []
if do == 'create':
terms = copy(ngram_ids)
......@@ -163,6 +169,7 @@ def ngramList(do, list_id, ngram_ids=None) :
session.commit()
return(results)
session.remove()
# Some functions to manage automatically the lists
def doStopList(user_id=None, corpus_id=None, stop_id=None, reset=False, limit=None):
......@@ -202,6 +209,7 @@ def ngrams2miam(user_id=None, corpus_id=None):
.all()
)
bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], query)
session.remove()
from gargantext_web.db import get_or_create_node
from analysis.lists import Translations, UnweightedList
......@@ -232,6 +240,7 @@ def ngrams2miamBis(corpus):
.all()
)
bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], query)
session.remove()
def doList(
type_list='MiamList',
......@@ -365,6 +374,6 @@ def doList(
bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], query)
return(list_dict[type_list]['id'])
session.remove()
......@@ -87,10 +87,12 @@ def compute_mapList(corpus,limit=500,n=1):
bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], [d for d in data])
dbg.show('MapList computed')
session.remove()
def insert_miam(corpus, ngrams=None, path_file_csv=None):
dbg = DebugTime('Corpus #%d - computing Miam' % corpus.id)
session = get_session()
dbg = DebugTime('Corpus #%d - computing Miam' % corpus.id)
node_miam = get_or_create_node(nodetype='MiamList', corpus=corpus)
session.query(NodeNgram).filter(NodeNgram.node_id==node_miam.id).delete()
session.commit()
......@@ -122,8 +124,6 @@ def insert_miam(corpus, ngrams=None, path_file_csv=None):
bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], [d for d in data])
file_csv.close()
dbg.show('Miam computed')
session.remove()
#corpus = session.query(Node).filter(Node.id==540420).first()
#compute_mapList(corpus)
#insert_miam(corpus=corpus, path_file_csv="Thesaurus_tag.csv")
......@@ -5,11 +5,12 @@ from gargantext_web.db import get_or_create_node
from admin.utils import DebugTime
def compute_occs(corpus):
session = get_session()
dbg = DebugTime('Corpus #%d - OCCURRENCES' % corpus.id)
dbg.show('Calculate occurrences')
occs_node = get_or_create_node(nodetype='Occurrences', corpus=corpus)
session = get_session()
#print(occs_node.id)
(session.query(NodeNodeNgram)
......@@ -47,5 +48,8 @@ def compute_occs(corpus):
)
)
db.commit()
session.remove()
#data = session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==occs_node.id).all()
#print([n for n in data])
......@@ -20,6 +20,7 @@ def specificity(cooc_id=None, corpus=None, limit=100):
Compute the specificity, simple calculus.
'''
session = get_session()
cooccurrences = (session.query(NodeNgramNgram)
.filter(NodeNgramNgram.node_id==cooc_id)
.order_by(NodeNgramNgram.score)
......@@ -54,6 +55,7 @@ def specificity(cooc_id=None, corpus=None, limit=100):
bulk_insert(NodeNodeNgram, ['nodex_id', 'nodey_id', 'ngram_id', 'score'], [d for d in data])
return(node.id)
session.remove()
def compute_specificity(corpus,limit=100):
'''
......@@ -62,15 +64,16 @@ def compute_specificity(corpus,limit=100):
1) Compute the cooc matrix
2) Compute the specificity score, saving it in database, return its Node
'''
session = get_session()
dbg = DebugTime('Corpus #%d - specificity' % corpus.id)
session = get_session()
list_cvalue = get_or_create_node(nodetype='Cvalue', corpus=corpus, session=session)
cooc_id = do_cooc(corpus=corpus, cvalue_id=list_cvalue.id,limit=limit)
specificity(cooc_id=cooc_id,corpus=corpus,limit=limit)
dbg.show('specificity')
session.remove()
#corpus=session.query(Node).filter(Node.id==244250).first()
#compute_specificity(corpus)
......
......@@ -11,6 +11,7 @@ def get_ngramogram(corpus, limit=None):
Ngram is a composition of ograms (ogram = 1gram)
"""
session = get_session()
try:
query = (session
.query(Ngram.id, Ngram.terms)
......@@ -34,6 +35,8 @@ def get_ngramogram(corpus, limit=None):
except Exception as error:
PrintException()
session.remove()
def split_ngram(ngram):
if isinstance(ngram, str):
......@@ -329,6 +332,7 @@ def stem_corpus(corpus_id=None):
PrintException()
else:
print('Usage: stem_corpus(corpus_id=corpus.id)')
session.remove()
......@@ -36,6 +36,7 @@ def importStopList(node,filename,language='fr'):
)
bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], [d for d in data])
session.remove()
def isStopWord(ngram, stop_words=None):
'''
......@@ -78,10 +79,11 @@ def compute_stop(corpus,limit=2000,debug=False):
'''
do some statitics on all stop lists of database of the same type
'''
session = get_session()
stop_node_id = get_or_create_node(nodetype='StopList', corpus=corpus).id
# TODO do a function to get all stop words with social scores
session = get_session()
root = session.query(Node).filter(Node.type_id == cache.NodeType['Root'].id).first()
root_stop_id = get_or_create_node(nodetype='StopList', corpus=root).id
......@@ -112,4 +114,5 @@ def compute_stop(corpus,limit=2000,debug=False):
stop = WeightedList({ n[0] : -1 for n in ngrams_to_stop})
stop.save(stop_node_id)
session.remove()
......@@ -7,9 +7,11 @@ from admin.utils import DebugTime
def compute_tfidf(corpus):
# compute terms frequency sum
session = get_session()
dbg = DebugTime('Corpus #%d - TFIDF' % corpus.id)
dbg.show('calculate terms frequencies sums')
tfidf_node = get_or_create_node(nodetype='Tfidf', corpus=corpus)
tfidf_node = get_or_create_node(nodetype='Tfidf', corpus=corpus, session=session)
db, cursor = get_cursor()
cursor.execute('''
......@@ -119,16 +121,20 @@ def compute_tfidf(corpus):
# the end!
db.commit()
session.remove()
def compute_tfidf_global(corpus):
'''
Maybe improve this with:
#http://stackoverflow.com/questions/8674718/best-way-to-select-random-rows-postgresql
'''
session = get_session()
dbg = DebugTime('Corpus #%d - tfidf global' % corpus.id)
dbg.show('calculate terms frequencies sums')
tfidf_node = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus)
tfidf_node = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus, session=session)
session = get_session()
# update would be better
session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==tfidf_node.id).delete()
session.commit()
......@@ -258,6 +264,8 @@ def compute_tfidf_global(corpus):
db.commit()
dbg.show('insert tfidf')
session.remove()
#corpus=session.query(Node).filter(Node.id==244250).first()
#compute_tfidf_global(corpus)
......@@ -8,8 +8,8 @@ def insert_ngrams_to_list(list_of_ngrams, corpus, list_type='MapList', erase=Tru
'''
session = get_session()
list_node = get_or_create_node(corpus=corpus, nodetype=list_type)
group_node = get_or_create_node(corpus=corpus, nodetype='GroupList')
list_node = get_or_create_node(corpus=corpus, nodetype=list_type, session=session)
group_node = get_or_create_node(corpus=corpus, nodetype='GroupList', session=session)
group_list = (session.query(NodeNgramNgram.ngramy_id)
.filter(NodeNgramNgram.id==group_node.id)
.all()
......@@ -35,6 +35,7 @@ def insert_ngrams_to_list(list_of_ngrams, corpus, list_type='MapList', erase=Tru
#print(list_to_insert)
db, cursor = get_cursor()
bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], [n for n in list_to_insert])
session.remove()
def insert_ngrams(ngrams,get='terms-id'):
'''
......
......@@ -30,8 +30,9 @@ parsers = Parsers()
# resources management
def add_resource(corpus, **kwargs):
# only for tests
session = get_session()
# only for tests
resource = Resource(guid=str(random()), **kwargs )
# User
if 'user_id' not in kwargs:
......@@ -64,11 +65,12 @@ def add_resource(corpus, **kwargs):
session.commit()
# return result
return resource
session.remove()
def parse_resources(corpus, user=None, user_id=None):
dbg = DebugTime('Corpus #%d - parsing' % corpus.id)
session = get_session()
dbg = DebugTime('Corpus #%d - parsing' % corpus.id)
corpus_id = corpus.id
type_id = cache.NodeType['Document'].id
......@@ -176,7 +178,7 @@ def parse_resources(corpus, user=None, user_id=None):
# mark the corpus as parsed
corpus.parsed = True
session.remove()
# ngrams extraction
from .NgramsExtractors import EnglishNgramsExtractor, FrenchNgramsExtractor, NgramsExtractor
......@@ -207,8 +209,9 @@ class NgramsExtractors(defaultdict):
ngramsextractors = NgramsExtractors()
def extract_ngrams(corpus, keys, nlp=True):
dbg = DebugTime('Corpus #%d - ngrams' % corpus.id)
session = get_session()
dbg = DebugTime('Corpus #%d - ngrams' % corpus.id)
default_language_iso2 = None if corpus.language_id is None else cache.Language[corpus.language_id].iso2
# query the hyperdata associated with the given keys
columns = [Node.id, Node.language_id] + [Node.hyperdata[key] for key in keys]
......@@ -289,4 +292,4 @@ def extract_ngrams(corpus, keys, nlp=True):
dbg.message = 'insert %d associations' % len(node_ngram_data)
# commit to database
db.commit()
session.remove()
......@@ -97,13 +97,16 @@ def Root(request, format=None):
'snippets': reverse('snippet-list', request=request, format=format)
})
session.remove()
class NodesChildrenNgrams(APIView):
def get(self, request, node_id):
session = get_session()
# query ngrams
ParentNode = aliased(Node)
session = get_session()
ngrams_query = (session
.query(Ngram.terms, func.sum(Node_Ngram.weight).label('count'))
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
......@@ -140,13 +143,16 @@ class NodesChildrenNgrams(APIView):
for ngram in ngrams_query[offset : offset+limit]
],
})
session.remove()
class NodesChildrenNgramsIds(APIView):
def get(self, request, node_id):
session = get_session()
# query ngrams
ParentNode = aliased(Node)
session = get_session()
ngrams_query = (session
.query(Node.id, func.sum(Node_Ngram.weight).label('count'))
.join(Node_Ngram, Node_Ngram.node_id == Node.id)
......@@ -183,16 +189,18 @@ class NodesChildrenNgramsIds(APIView):
for node, count in ngrams_query[offset : offset+limit]
],
})
session.remove()
from gargantext_web.db import get_or_create_node
class Ngrams(APIView):
def get(self, request, node_id):
session = get_session()
# query ngrams
ParentNode = aliased(Node)
session = get_session()
corpus = session.query(Node).filter(Node.id==node_id).first()
group_by = []
results = ['id', 'terms']
......@@ -307,11 +315,13 @@ class Ngrams(APIView):
],
})
session.remove()
class NodesChildrenDuplicates(APIView):
def _fetch_duplicates(self, request, node_id, extra_columns=None, min_count=1):
session = get_session()
# input validation
if extra_columns is None:
extra_columns = []
......@@ -319,7 +329,6 @@ class NodesChildrenDuplicates(APIView):
raise APIException('Missing GET parameter: "keys"', 400)
keys = request.GET['keys'].split(',')
# hyperdata retrieval
session = get_session()
hyperdata_query = (session
.query(Hyperdata)
.filter(Hyperdata.name.in_(keys))
......@@ -351,6 +360,8 @@ class NodesChildrenDuplicates(APIView):
duplicates_query = duplicates_query.having(func.count() > min_count)
# and now, return it
return duplicates_query
session.remove()
def get(self, request, node_id):
# data to be returned
......@@ -400,10 +411,11 @@ class NodesChildrenDuplicates(APIView):
# retrieve metadata from a given list of parent node
def get_metadata(corpus_id_list):
session = get_session()
# query hyperdata keys
ParentNode = aliased(Node)
session = get_session()
hyperdata_query = (session
.query(Hyperdata)
.join(Node_Hyperdata, Node_Hyperdata.hyperdata_id == Hyperdata.id)
......@@ -455,6 +467,7 @@ def get_metadata(corpus_id_list):
# give the result back
return collection
session.remove()
class ApiHyperdata(APIView):
......@@ -520,6 +533,7 @@ class ApiNgrams(APIView):
class NodesChildrenQueries(APIView):
def _sql(self, input, node_id):
session = get_session()
fields = dict()
tables = set('nodes')
hyperdata_aliases = dict()
......@@ -602,6 +616,7 @@ class NodesChildrenQueries(APIView):
else query[input['pagination']['offset']:]
)
return output
session.remove()
def _haskell(self, input, node_id):
output = copy.deepcopy(input)
......@@ -702,8 +717,9 @@ class NodesList(APIView):
authentication_classes = (SessionAuthentication, BasicAuthentication)
def get(self, request):
print("user id : " + str(request.user))
session = get_session()
print("user id : " + str(request.user))
query = (session
.query(Node.id, Node.name, NodeType.name.label('type'))
.filter(Node.user_id == int(request.user.id))
......@@ -718,10 +734,13 @@ class NodesList(APIView):
node._asdict()
for node in query.all()
]})
session.remove()
class Nodes(APIView):
def get(self, request, node_id):
session = get_session()
node = session.query(Node).filter(Node.id == node_id).first()
if node is None:
raise APIException('This node does not exist', 404)
......@@ -734,6 +753,8 @@ class Nodes(APIView):
#'hyperdata': dict(node.hyperdata),
'hyperdata': node.hyperdata,
})
session.remove()
# deleting node by id
# currently, very dangerous.
......@@ -741,8 +762,9 @@ class Nodes(APIView):
# for better constistency...
def delete(self, request, node_id):
user = request.user
session = get_session()
user = request.user
node = session.query(Node).filter(Node.id == node_id).first()
msgres = str()
......@@ -754,6 +776,8 @@ class Nodes(APIView):
except Exception as error:
msgres ="error deleting : " + node_id + str(error)
session.remove()
class CorpusController:
@classmethod
......@@ -774,7 +798,7 @@ class CorpusController:
# if corpus.user != request.user:
# raise Http403("Unauthorized access.")
return corpus
session.remove()
@classmethod
def ngrams(cls, request, node_id):
......@@ -785,6 +809,7 @@ class CorpusController:
# build query
ParentNode = aliased(Node)
session = get_session()
query = (session
.query(Ngram.terms, func.count('*'))
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
......@@ -811,3 +836,5 @@ class CorpusController:
)
else:
raise ValidationError('Unrecognized "format=%s", should be "csv" or "json"' % (format, ))
session.remove()
......@@ -6,7 +6,6 @@ from analysis.functions import get_cooc
class Graph(APIView):
authentication_classes = (SessionAuthentication, BasicAuthentication)
session = get_session()
def get(self, request, corpus_id):
'''
......@@ -15,6 +14,8 @@ class Graph(APIView):
graph?field1=ngrams&field2=ngrams&
graph?field1=ngrams&field2=ngrams&start=''&end=''
'''
session = get_session()
field1 = request.GET.get('field1', 'ngrams')
field2 = request.GET.get('field2', 'ngrams')
......@@ -52,3 +53,5 @@ class Graph(APIView):
, 'field2' : accepted_field2
, 'options': options
})
session.remove()
......@@ -82,9 +82,10 @@ class List(APIView):
def get_metadata ( self , ngram_ids , parent_id ):
session = get_session()
start_ = time.time()
session = get_session()
nodes_ngrams = session.query(Ngram.id , Ngram.terms).filter( Ngram.id.in_( list(ngram_ids.keys()))).all()
for node in nodes_ngrams:
if node.id in ngram_ids:
......@@ -120,7 +121,7 @@ class List(APIView):
end_ = time.time()
return { "data":ngram_ids , "secs":(end_ - start_) }
session.remove()
def get(self, request, corpus_id , list_name ):
......@@ -154,6 +155,8 @@ class List(APIView):
measurements["tfidf"] = { "s" : ngrams_meta["secs"], "n": len(ngrams_meta["data"].keys()) }
return JsonHttpResponse( {"data":ngram_ids , "time":measurements } )
session.remove()
class Ngrams(APIView):
'''
......@@ -323,6 +326,8 @@ class Ngrams(APIView):
'data': output,
"time" : measurements
})
session.remove()
def post(self , request , node_id ):
return JsonHttpResponse(["POST","ok"])
......@@ -343,11 +348,15 @@ class Group(APIView):
'''
def get_group_id(self , node_id , user_id):
node_id = int(node_id)
session = get_session()
corpus = session.query(Node).filter( Node.id==node_id).first()
if corpus==None: return None
group = get_or_create_node(corpus=corpus, nodetype='Group')
return(group.id)
session.remove()
def get(self, request, corpus_id):
if not request.user.is_authenticated():
......@@ -376,6 +385,7 @@ class Group(APIView):
DG = nx.DiGraph()
session = get_session()
ngrams_ngrams = (session
.query(NodeNgramNgram)
.filter(NodeNgramNgram.node_id==group_id)
......@@ -415,7 +425,8 @@ class Group(APIView):
# groups["nodes"] = get_occtfidf( ngrams , request.user.id , corpus_id , "Group")
return JsonHttpResponse( { "data" : groups } )
session.remove()
def post(self, request, node_id):
return JsonHttpResponse( ["hola" , "mundo"] )
......@@ -440,6 +451,8 @@ class Group(APIView):
return JsonHttpResponse(True, 201)
else:
raise APIException('Missing parameter: "{\'data\' : [\'source\': Int, \'target\': [Int]}"', 400)
session.remove()
def put(self , request , corpus_id ):
session = get_session()
......@@ -523,6 +536,7 @@ class Group(APIView):
nodengramngram = NodeNgramNgram(node_id=existing_group_id, ngramx_id=n1 , ngramy_id=n2, score=1.0)
session.add(nodengramngram)
session.commit()
# [ - - - / doing links of new clique and adding to DB - - - ] #
......@@ -573,6 +587,7 @@ class Group(APIView):
return JsonHttpResponse(True, 201)
session.remove()
class Keep(APIView):
"""
......@@ -580,9 +595,9 @@ class Keep(APIView):
"""
renderer_classes = (JSONRenderer,)
authentication_classes = (SessionAuthentication, BasicAuthentication)
session = get_session()
def get (self, request, corpus_id):
session = get_session()
# list_id = session.query(Node).filter(Node.id==list_id).first()
corpus = session.query(Node).filter( Node.id==corpus_id ).first()
node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus )
......@@ -591,11 +606,13 @@ class Keep(APIView):
for node in nodes_in_map:
results[node.ngram_id] = True
return JsonHttpResponse(results)
session.remove()
def put (self, request, corpus_id):
"""
Add ngrams to map list
"""
session = get_session()
group_rawreq = dict(request.data)
ngram_2add = [int(i) for i in list(group_rawreq.keys())]
corpus = session.query(Node).filter( Node.id==corpus_id ).first()
......@@ -605,11 +622,14 @@ class Keep(APIView):
session.add(map_node)
session.commit()
return JsonHttpResponse(True, 201)
session.remove()
def delete (self, request, corpus_id):
"""
Delete ngrams from the map list
"""
session = get_session()
group_rawreq = dict(request.data)
# print("group_rawreq:")
# print(group_rawreq)
......@@ -627,5 +647,5 @@ class Keep(APIView):
return JsonHttpResponse(True, 201)
session.remove()
......@@ -84,15 +84,12 @@ def getGlobalStatsISTEXT(request ):
def doTheQuery(request , project_id):
alist = ["hola","mundo"]
makeSession = get_sessionmaker()
session = makeSession() # get_session()
session = get_session()
# do we have a valid project id?
try:
project_id = int(project_id)
except ValueError:
raise Http404()
# do we have a valid project?
project = (session
.query(Node)
......@@ -184,14 +181,14 @@ def doTheQuery(request , project_id):
data = alist
return JsonHttpResponse(data)
session.remove()
def testISTEX(request , project_id):
print("testISTEX:")
print(request.method)
alist = ["bar","foo"]
sessionMaker = get_sessionmaker() # get_session()
session = sessionMaker()
session = get_session()
# do we have a valid project id?
try:
project_id = int(project_id)
......@@ -292,4 +289,4 @@ def testISTEX(request , project_id):
data = [query_string,query,N]
return JsonHttpResponse(data)
session.remove()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment