Commit 3476d4a2 authored by delanoe's avatar delanoe

[FIX SESSIONS] local session for workflow and get_or_create_node are renamed to mysession.

parent c54058e2
...@@ -47,13 +47,12 @@ def PrintException(): ...@@ -47,13 +47,12 @@ def PrintException():
class WorkflowTracking: class WorkflowTracking:
def __init__( self ): def __init__( self ):
self.hola = "mundo" self.hola = "mundo"
def processing_(self , corpus , step): def processing_(self , corpus_id , step):
try: try:
the_query = """ UPDATE node_node SET hyperdata=\'{ \"%s\" : \"%s\"}\' WHERE id=%d """ % ( "Processing", step , corpus.id ) the_query = """ UPDATE node_node SET hyperdata=\'{ \"%s\" : \"%s\"}\' WHERE id=%d """ % ( "Processing", step , corpus_id )
cursor = connection.cursor() cursor = connection.cursor()
try: try:
cursor.execute(the_query) cursor.execute(the_query)
...@@ -61,4 +60,4 @@ class WorkflowTracking: ...@@ -61,4 +60,4 @@ class WorkflowTracking:
finally: finally:
connection.close() connection.close()
except : except :
PrintException() PrintException()
\ No newline at end of file
...@@ -18,7 +18,8 @@ def do_cooc(corpus=None ...@@ -18,7 +18,8 @@ def do_cooc(corpus=None
, start=None, end=None , start=None, end=None
, limit=1000 , limit=1000
, isMonopartite=True , isMonopartite=True
, hapax = 3): , hapax = 3
, mysession=None):
''' '''
Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id
For the moment list of paramters are not supported because, lists need to For the moment list of paramters are not supported because, lists need to
...@@ -40,13 +41,16 @@ def do_cooc(corpus=None ...@@ -40,13 +41,16 @@ def do_cooc(corpus=None
# Security test # Security test
field1,field2 = str(field1), str(field2) field1,field2 = str(field1), str(field2)
session = get_session() if mysession is None:
from gargantext_web.db import session
mysession = session
# Get node # Get node
node_cooc = get_or_create_node(nodetype='Cooccurrence', corpus=corpus node_cooc = get_or_create_node(nodetype='Cooccurrence', corpus=corpus
, name_str="Cooccurrences corpus " \ , name_str="Cooccurrences corpus " \
+ str(corpus.id) + "list_id: " + str(miam_id) + str(corpus.id) + "list_id: " + str(miam_id)
#, hyperdata={'field1': field1, 'field2':field2} #, hyperdata={'field1': field1, 'field2':field2}
, session=session) , mysession=mysession)
# BEGIN # BEGIN
...@@ -60,12 +64,12 @@ def do_cooc(corpus=None ...@@ -60,12 +64,12 @@ def do_cooc(corpus=None
# #
# node_cooc.hyperdata = hyperdata # node_cooc.hyperdata = hyperdata
# #
session.add(node_cooc) mysession.add(node_cooc)
session.commit() mysession.commit()
# END # END
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==node_cooc.id).delete() mysession.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==node_cooc.id).delete()
session.commit() mysession.commit()
doc_id = cache.NodeType['Document'].id doc_id = cache.NodeType['Document'].id
...@@ -77,7 +81,7 @@ def do_cooc(corpus=None ...@@ -77,7 +81,7 @@ def do_cooc(corpus=None
if isMonopartite : if isMonopartite :
NodeNgramY = aliased(NodeNgram) NodeNgramY = aliased(NodeNgram)
cooc_query = (session.query(NodeNgramX.ngram_id, NodeNgramY.ngram_id, cooc_score) cooc_query = (mysession.query(NodeNgramX.ngram_id, NodeNgramY.ngram_id, cooc_score)
.join(Node, Node.id == NodeNgramX.node_id) .join(Node, Node.id == NodeNgramX.node_id)
.join(NodeNgramY, NodeNgramY.node_id == Node.id) .join(NodeNgramY, NodeNgramY.node_id == Node.id)
.filter(Node.parent_id==corpus.id, Node.type_id==doc_id) .filter(Node.parent_id==corpus.id, Node.type_id==doc_id)
...@@ -85,7 +89,7 @@ def do_cooc(corpus=None ...@@ -85,7 +89,7 @@ def do_cooc(corpus=None
else : else :
NodeNgramY = aliased(NodeNgram) NodeNgramY = aliased(NodeNgram)
cooc_query = (session.query(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id, cooc_score) cooc_query = (mysession.query(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id, cooc_score)
.join(Node, Node.id == NodeHyperdataNgram.node_id) .join(Node, Node.id == NodeHyperdataNgram.node_id)
.join(NodeNgramY, NodeNgramY.node_id == Node.id) .join(NodeNgramY, NodeNgramY.node_id == Node.id)
.join(Hyperdata, Hyperdata.id == NodeHyperdataNgram.hyperdata_id) .join(Hyperdata, Hyperdata.id == NodeHyperdataNgram.hyperdata_id)
...@@ -169,7 +173,7 @@ def do_cooc(corpus=None ...@@ -169,7 +173,7 @@ def do_cooc(corpus=None
# Select according some scores # Select according some scores
if cvalue_id is not None : if cvalue_id is not None :
#miam = get_or_create_node(nodetype='Cvalue', corpus=corpus) #miam = get_or_create_node(nodetype='Cvalue', corpus=corpus)
cvalue_list = UnweightedList(session.query(NodeNodeNgram.ngram_id) cvalue_list = UnweightedList(mysession.query(NodeNodeNgram.ngram_id)
.filter(NodeNodeNgram.nodex_id == cvalue_id).all() .filter(NodeNodeNgram.nodex_id == cvalue_id).all()
) )
...@@ -200,4 +204,3 @@ def do_cooc(corpus=None ...@@ -200,4 +204,3 @@ def do_cooc(corpus=None
cooc = matrix cooc = matrix
cooc.save(node_cooc.id) cooc.save(node_cooc.id)
return(node_cooc.id) return(node_cooc.id)
session.remove()
...@@ -44,9 +44,9 @@ def get_cooc(request=None, corpus=None ...@@ -44,9 +44,9 @@ def get_cooc(request=None, corpus=None
data = {} data = {}
#if session.query(Node).filter(Node.type_id==type_cooc_id, Node.parent_id==corpus_id).first() is None: #if session.query(Node).filter(Node.type_id==type_cooc_id, Node.parent_id==corpus_id).first() is None:
print("Cooccurrences do not exist yet, creating it.") print("Cooccurrences do not exist yet, creating it.")
miam_id = get_or_create_node(nodetype='MapList', corpus=corpus, session=session).id miam_id = get_or_create_node(nodetype='MapList', corpus=corpus, mysession=session).id
stop_id = get_or_create_node(nodetype='StopList', corpus=corpus, session=session).id stop_id = get_or_create_node(nodetype='StopList', corpus=corpus, mysession=session).id
group_id = get_or_create_node(nodetype='Group', corpus=corpus, session=session).id group_id = get_or_create_node(nodetype='Group', corpus=corpus, mysession=session).id
SamuelFlag = False SamuelFlag = False
# if field1 == field2 == 'ngrams' : # if field1 == field2 == 'ngrams' :
......
...@@ -51,7 +51,7 @@ def periods(corpus, start=None, end=None): ...@@ -51,7 +51,7 @@ def periods(corpus, start=None, end=None):
if duration.days > 365 * 3 : if duration.days > 365 * 3 :
print("OK") print("OK")
miam_id = get_or_create_node(nodetype='MiamList', corpus=corpus).id miam_id = get_or_create_node(nodetype='MiamList', corpus=corpus, mysession=session).id
result_list = list() result_list = list()
for t in times: for t in times:
...@@ -86,7 +86,7 @@ def jacquard(period1, period2): ...@@ -86,7 +86,7 @@ def jacquard(period1, period2):
def get_partition(corpus, start=None, end=None, distance=distance): def get_partition(corpus, start=None, end=None, distance=distance):
session = get_session() session = get_session()
miam_id = get_or_create_node(corpus=corpus, nodetype='MapList', session=session).id miam_id = get_or_create_node(corpus=corpus, nodetype='MapList', mysession=session).id
print("get Partition %s - %s" % (start, end)) print("get Partition %s - %s" % (start, end))
cooc_id = do_cooc(corpus=corpus cooc_id = do_cooc(corpus=corpus
, start=start , start=start
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
from celery import shared_task from celery import shared_task
from node import models from node import models
from django.db import transaction from django.db import transaction
from admin.utils import DebugTime from admin.utils import DebugTime, PrintException
import cProfile import cProfile
#@app.task(bind=True) #@app.task(bind=True)
...@@ -15,15 +15,8 @@ from gargantext_web.db import get_session, cache, Node ...@@ -15,15 +15,8 @@ from gargantext_web.db import get_session, cache, Node
from ngram.workflow import ngram_workflow from ngram.workflow import ngram_workflow
@shared_task
def apply_sum(x, y):
print(x+y)
session = get_session()
print(session.query(Node.name).first())
session.remove()
from parsing.corpustools import parse_resources, extract_ngrams #add_resource, from parsing.corpustools import parse_resources, extract_ngrams #add_resource,
from ngram.lists import ngrams2miam #from ngram.lists import ngrams2miam
from admin.utils import WorkflowTracking from admin.utils import WorkflowTracking
...@@ -36,28 +29,29 @@ def apply_workflow(corpus_id): ...@@ -36,28 +29,29 @@ def apply_workflow(corpus_id):
update_state = WorkflowTracking() update_state = WorkflowTracking()
try : try :
session = get_session() mysession = get_session()
corpus = session.query(Node).filter(Node.id==corpus_id).first() corpus = mysession.query(Node).filter(Node.id==corpus_id).first()
update_state.processing_(corpus, "Parsing") update_state.processing_(int(corpus_id), "Parsing")
#cProfile.runctx('parse_resources(corpus)', global,locals) #cProfile.runctx('parse_resources(corpus)', global,locals)
parse_resources(corpus, session=session) parse_resources(corpus, mysession=mysession)
update_state.processing_(corpus, "Terms extraction") update_state.processing_(int(corpus_id), "Terms extraction")
extract_ngrams(corpus, ['title', 'abstract'], nlp=True, session=session) extract_ngrams(corpus, ['title', 'abstract'], nlp=True, mysession=mysession)
# update_state.processing_(corpus, "") # update_state.processing_(corpus, "")
ngram_workflow(corpus, session=session) ngram_workflow(corpus, mysession=mysession)
#ngrams2miam(user_id=corpus.user_id, corpus_id=corpus_id) #ngrams2miam(user_id=corpus.user_id, corpus_id=corpus_id)
print("End of the Workflow for corpus %d" % (corpus_id)) print("End of the Workflow for corpus %d" % (corpus_id))
update_state.processing_(corpus, "0") update_state.processing_(int(corpus_id), "0")
session.remove() mysession.remove()
except Exception as error: except Exception as error:
print(error) print(error)
session.remove() PrintException()
mysession.remove()
@shared_task @shared_task
def empty_trash(corpus_id): def empty_trash(corpus_id):
......
...@@ -242,17 +242,16 @@ class bulk_insert: ...@@ -242,17 +242,16 @@ class bulk_insert:
readline = read readline = read
def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hyperdata=None, session=None): def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hyperdata=None, mysession=None):
''' '''
Should be a method of the object. __get_or_create__ ? Should be a method of the object. __get_or_create__ ?
name_str :: String name_str :: String
hyperdata :: Dict hyperdata :: Dict
''' '''
sessionToRemove = False if mysession is None:
if session is None: from gargantext_web.db import session
session = get_session() mysession = session
sessionToRemove = True
if nodetype is None: if nodetype is None:
print("Need to give a type node") print("Need to give a type node")
...@@ -262,13 +261,13 @@ def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hy ...@@ -262,13 +261,13 @@ def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hy
except KeyError: except KeyError:
ntype = cache.NodeType[nodetype] = NodeType() ntype = cache.NodeType[nodetype] = NodeType()
ntype.name = nodetype ntype.name = nodetype
session.add(ntype) mysession.add(ntype)
session.commit() mysession.commit()
if corpus_id is not None and corpus is None: if corpus_id is not None and corpus is None:
corpus = session.query(Node).filter(Node.id==corpus_id).first() corpus = mysession.query(Node).filter(Node.id==corpus_id).first()
node = (session.query(Node).filter(Node.type_id == ntype.id node = (mysession.query(Node).filter(Node.type_id == ntype.id
, Node.parent_id == corpus.id , Node.parent_id == corpus.id
, Node.user_id == corpus.user_id , Node.user_id == corpus.user_id
) )
...@@ -289,11 +288,9 @@ def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hy ...@@ -289,11 +288,9 @@ def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hy
node.name=name_str node.name=name_str
else: else:
node.name=ntype.name node.name=ntype.name
session.add(node) mysession.add(node)
session.commit() mysession.commit()
#print(parent_id, n.parent_id, n.id, n.name) #print(parent_id, n.parent_id, n.id, n.name)
return(node) return(node)
if sessionToRemove:
session.remove()
...@@ -67,7 +67,7 @@ def getNgrams(corpus=None, limit=1000): ...@@ -67,7 +67,7 @@ def getNgrams(corpus=None, limit=1000):
return(terms) return(terms)
session.remove() session.remove()
def compute_cvalue(corpus=None, limit=1000): def compute_cvalue(corpus=None, limit=1000, mysession=None):
''' '''
computeCvalue :: Corpus computeCvalue :: Corpus
frequency :: String -> Int -> Int frequency :: String -> Int -> Int
...@@ -126,13 +126,11 @@ def compute_cvalue(corpus=None, limit=1000): ...@@ -126,13 +126,11 @@ def compute_cvalue(corpus=None, limit=1000):
result = cvalueAll() result = cvalueAll()
#print([n for n in result]) #print([n for n in result])
session = get_session() mysession.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==cvalue_node.id).delete()
session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==cvalue_node.id).delete() mysession.commit()
session.commit()
#bulk_insert(NodeNodeNgram, ['nodex_id', 'nodey_id', 'ngram_id', 'score'], [n for n in islice(result,0,100)]) #bulk_insert(NodeNodeNgram, ['nodex_id', 'nodey_id', 'ngram_id', 'score'], [n for n in islice(result,0,100)])
bulk_insert(NodeNodeNgram, ['nodex_id', 'nodey_id', 'ngram_id', 'score'], [n for n in result]) bulk_insert(NodeNodeNgram, ['nodex_id', 'nodey_id', 'ngram_id', 'score'], [n for n in result])
session.remove()
# test # test
#corpus=session.query(Node).filter(Node.id==244250).first() #corpus=session.query(Node).filter(Node.id==244250).first()
#computeCvalue(corpus) #computeCvalue(corpus)
...@@ -47,14 +47,10 @@ def getStemmer(corpus): ...@@ -47,14 +47,10 @@ def getStemmer(corpus):
return(stemIt) return(stemIt)
def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem', session=None): def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem', mysession=None):
''' '''
group ngrams according to a function (stemming or lemming) group ngrams according to a function (stemming or lemming)
''' '''
sessionToRemove = False
if session is None:
session = get_session()
sessionToRemove = True
dbg = DebugTime('Corpus #%d - group' % corpus.id) dbg = DebugTime('Corpus #%d - group' % corpus.id)
dbg.show('Group') dbg.show('Group')
...@@ -66,19 +62,19 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem', session=N ...@@ -66,19 +62,19 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem', session=N
stemIt = getStemmer(corpus) stemIt = getStemmer(corpus)
group_to_insert = set() group_to_insert = set()
node_group = get_or_create_node(nodetype='Group', corpus=corpus, session=session) node_group = get_or_create_node(nodetype='Group', corpus=corpus, mysession=mysession)
miam_to_insert = set() miam_to_insert = set()
miam_node = get_or_create_node(nodetype='MiamList', corpus=corpus, session=session) miam_node = get_or_create_node(nodetype='MiamList', corpus=corpus, mysession=mysession)
stop_node = get_or_create_node(nodetype='StopList', corpus=corpus, session=session) stop_node = get_or_create_node(nodetype='StopList', corpus=corpus, mysession=mysession)
#stop_list = UnweightedList(stop_node.id) #stop_list = UnweightedList(stop_node.id)
Stop = aliased(NodeNgram) Stop = aliased(NodeNgram)
frequency = sa.func.count(NodeNgram.weight) frequency = sa.func.count(NodeNgram.weight)
ngrams = (session.query(Ngram.id, Ngram.terms, frequency ) ngrams = (mysession.query(Ngram.id, Ngram.terms, frequency )
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id) .join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.join(Node, Node.id == NodeNgram.node_id) .join(Node, Node.id == NodeNgram.node_id)
#.outerjoin(Stop, Stop.ngram_id == Ngram.id) #.outerjoin(Stop, Stop.ngram_id == Ngram.id)
...@@ -90,7 +86,7 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem', session=N ...@@ -90,7 +86,7 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem', session=N
.limit(limit_sup) .limit(limit_sup)
) )
stops = (session.query(Ngram.id, Ngram.terms, frequency) stops = (mysession.query(Ngram.id, Ngram.terms, frequency)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id) .join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.join(Node, Node.id == NodeNgram.node_id) .join(Node, Node.id == NodeNgram.node_id)
.join(Stop, Stop.ngram_id == Ngram.id) .join(Stop, Stop.ngram_id == Ngram.id)
...@@ -131,10 +127,10 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem', session=N ...@@ -131,10 +127,10 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem', session=N
miam_to_insert.add((miam_node.id, group[key]['mainForm'], 1)) miam_to_insert.add((miam_node.id, group[key]['mainForm'], 1))
# # Deleting previous groups # # Deleting previous groups
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id == node_group.id).delete() mysession.query(NodeNgramNgram).filter(NodeNgramNgram.node_id == node_group.id).delete()
# # Deleting previous ngrams miam list # # Deleting previous ngrams miam list
session.query(NodeNgram).filter(NodeNgram.node_id == miam_node.id).delete() mysession.query(NodeNgram).filter(NodeNgram.node_id == miam_node.id).delete()
session.commit() mysession.commit()
bulk_insert(NodeNgramNgram bulk_insert(NodeNgramNgram
, ('node_id', 'ngramx_id', 'ngramy_id', 'score') , ('node_id', 'ngramx_id', 'ngramy_id', 'score')
...@@ -142,4 +138,3 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem', session=N ...@@ -142,4 +138,3 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem', session=N
bulk_insert(NodeNgram, ('node_id', 'ngram_id', 'weight'), [data for data in list(miam_to_insert)]) bulk_insert(NodeNgram, ('node_id', 'ngram_id', 'weight'), [data for data in list(miam_to_insert)])
if sessionToRemove: session.remove()
...@@ -15,15 +15,11 @@ from sqlalchemy.orm import aliased ...@@ -15,15 +15,11 @@ from sqlalchemy.orm import aliased
from ngram.tools import insert_ngrams from ngram.tools import insert_ngrams
import csv import csv
def compute_mapList(corpus,limit=500,n=1, session=None): def compute_mapList(corpus,limit=500,n=1, mysession=None):
''' '''
According to Specificities and stoplist, According to Specificities and stoplist,
''' '''
sessionToRemove = False
if session is None:
session = get_session()
sessionToRemove = True
monograms_part = 0.005 monograms_part = 0.005
monograms_limit = round(limit * monograms_part) monograms_limit = round(limit * monograms_part)
...@@ -31,11 +27,11 @@ def compute_mapList(corpus,limit=500,n=1, session=None): ...@@ -31,11 +27,11 @@ def compute_mapList(corpus,limit=500,n=1, session=None):
dbg = DebugTime('Corpus #%d - computing Miam' % corpus.id) dbg = DebugTime('Corpus #%d - computing Miam' % corpus.id)
node_miam = get_or_create_node(nodetype='MiamList', corpus=corpus) node_miam = get_or_create_node(nodetype='MiamList', corpus=corpus, mysession=mysession)
node_stop = get_or_create_node(nodetype='StopList', corpus=corpus) node_stop = get_or_create_node(nodetype='StopList', corpus=corpus, mysession=mysession)
node_group = get_or_create_node(nodetype='Group', corpus=corpus) node_group = get_or_create_node(nodetype='Group', corpus=corpus, mysession=mysession)
node_spec = get_or_create_node(nodetype='Specificity', corpus=corpus) node_spec = get_or_create_node(nodetype='Specificity', corpus=corpus, mysession=mysession)
Miam=aliased(NodeNgram) Miam=aliased(NodeNgram)
Stop=aliased(NodeNgram) Stop=aliased(NodeNgram)
...@@ -43,7 +39,7 @@ def compute_mapList(corpus,limit=500,n=1, session=None): ...@@ -43,7 +39,7 @@ def compute_mapList(corpus,limit=500,n=1, session=None):
Spec=aliased(NodeNodeNgram) Spec=aliased(NodeNodeNgram)
query = (session.query(Spec.ngram_id, Spec.score) query = (mysession.query(Spec.ngram_id, Spec.score)
.join(Miam, Spec.ngram_id == Miam.ngram_id) .join(Miam, Spec.ngram_id == Miam.ngram_id)
.join(Ngram, Ngram.id == Spec.ngram_id) .join(Ngram, Ngram.id == Spec.ngram_id)
#.outerjoin(Group, Group.ngramy_id == Spec.ngram_id) #.outerjoin(Group, Group.ngramy_id == Spec.ngram_id)
...@@ -66,19 +62,19 @@ def compute_mapList(corpus,limit=500,n=1, session=None): ...@@ -66,19 +62,19 @@ def compute_mapList(corpus,limit=500,n=1, session=None):
.limit(multigrams_limit) .limit(multigrams_limit)
) )
stop_ngrams = (session.query(NodeNgram.ngram_id) stop_ngrams = (mysession.query(NodeNgram.ngram_id)
.filter(NodeNgram.node_id == node_stop.id) .filter(NodeNgram.node_id == node_stop.id)
.all() .all()
) )
grouped_ngrams = (session.query(NodeNgramNgram.ngramy_id) grouped_ngrams = (mysession.query(NodeNgramNgram.ngramy_id)
.filter(NodeNgramNgram.node_id == node_group.id) .filter(NodeNgramNgram.node_id == node_group.id)
.all() .all()
) )
node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus) node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus, mysession=mysession)
session.query(NodeNgram).filter(NodeNgram.node_id==node_mapList.id).delete() mysession.query(NodeNgram).filter(NodeNgram.node_id==node_mapList.id).delete()
session.commit() mysession.commit()
data = zip( data = zip(
[node_mapList.id for i in range(1,limit)] [node_mapList.id for i in range(1,limit)]
...@@ -91,20 +87,14 @@ def compute_mapList(corpus,limit=500,n=1, session=None): ...@@ -91,20 +87,14 @@ def compute_mapList(corpus,limit=500,n=1, session=None):
bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], [d for d in data]) bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], [d for d in data])
dbg.show('MapList computed') dbg.show('MapList computed')
if sessionToRemove: session.remove()
def insert_miam(corpus, ngrams=None, path_file_csv=None): def insert_miam(corpus, ngrams=None, path_file_csv=None, mysession=None):
sessionToRemove = False
if session is None:
session = get_session()
sessionToRemove = True
dbg = DebugTime('Corpus #%d - computing Miam' % corpus.id) dbg = DebugTime('Corpus #%d - computing Miam' % corpus.id)
node_miam = get_or_create_node(nodetype='MiamList', corpus=corpus) node_miam = get_or_create_node(nodetype='MiamList', corpus=corpus, mysession=mysession)
session.query(NodeNgram).filter(NodeNgram.node_id==node_miam.id).delete() mysession.query(NodeNgram).filter(NodeNgram.node_id==node_miam.id).delete()
session.commit() mysession.commit()
stop_words = set() stop_words = set()
miam_words = set() miam_words = set()
...@@ -133,6 +123,5 @@ def insert_miam(corpus, ngrams=None, path_file_csv=None): ...@@ -133,6 +123,5 @@ def insert_miam(corpus, ngrams=None, path_file_csv=None):
bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], [d for d in data]) bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], [d for d in data])
file_csv.close() file_csv.close()
dbg.show('Miam computed') dbg.show('Miam computed')
if sessionToRemove: session.remove()
...@@ -4,26 +4,22 @@ from gargantext_web.db import Node, NodeNgram, NodeNodeNgram ...@@ -4,26 +4,22 @@ from gargantext_web.db import Node, NodeNgram, NodeNodeNgram
from gargantext_web.db import get_or_create_node from gargantext_web.db import get_or_create_node
from admin.utils import DebugTime from admin.utils import DebugTime
def compute_occs(corpus, session=None): def compute_occs(corpus, mysession=None):
''' '''
compute_occs :: Corpus -> IO () compute_occs :: Corpus -> IO ()
''' '''
sessionToRemove = False
if session is None:
session = get_session()
sessionToRemove = True
dbg = DebugTime('Corpus #%d - OCCURRENCES' % corpus.id) dbg = DebugTime('Corpus #%d - OCCURRENCES' % corpus.id)
dbg.show('Calculate occurrences') dbg.show('Calculate occurrences')
occs_node = get_or_create_node(nodetype='Occurrences', corpus=corpus) occs_node = get_or_create_node(nodetype='Occurrences', corpus=corpus, mysession=mysession)
#print(occs_node.id) #print(occs_node.id)
(session.query(NodeNodeNgram) (mysession.query(NodeNodeNgram)
.filter(NodeNodeNgram.nodex_id==occs_node.id).delete() .filter(NodeNodeNgram.nodex_id==occs_node.id).delete()
) )
session.commit() mysession.commit()
db, cursor = get_cursor() db, cursor = get_cursor()
cursor.execute(''' cursor.execute('''
...@@ -55,7 +51,6 @@ def compute_occs(corpus, session=None): ...@@ -55,7 +51,6 @@ def compute_occs(corpus, session=None):
) )
) )
db.commit() db.commit()
if sessionToRemove: session.remove()
#data = session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==occs_node.id).all() #data = session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==occs_node.id).all()
......
...@@ -15,16 +15,12 @@ from gargantext_web.db import NodeNgramNgram, NodeNodeNgram ...@@ -15,16 +15,12 @@ from gargantext_web.db import NodeNgramNgram, NodeNodeNgram
from sqlalchemy import desc, asc, or_, and_, Date, cast, select from sqlalchemy import desc, asc, or_, and_, Date, cast, select
def specificity(cooc_id=None, corpus=None, limit=100, session=None): def specificity(cooc_id=None, corpus=None, limit=100, mysession=None):
''' '''
Compute the specificity, simple calculus. Compute the specificity, simple calculus.
''' '''
sessionToRemove = False
if session is None:
session = get_session()
sessionToRemove = True
cooccurrences = (session.query(NodeNgramNgram) cooccurrences = (mysession.query(NodeNgramNgram)
.filter(NodeNgramNgram.node_id==cooc_id) .filter(NodeNgramNgram.node_id==cooc_id)
.order_by(NodeNgramNgram.score) .order_by(NodeNgramNgram.score)
.limit(limit) .limit(limit)
...@@ -45,23 +41,22 @@ def specificity(cooc_id=None, corpus=None, limit=100, session=None): ...@@ -45,23 +41,22 @@ def specificity(cooc_id=None, corpus=None, limit=100, session=None):
m = ( xs - ys) / (2 * (x.shape[0] - 1)) m = ( xs - ys) / (2 * (x.shape[0] - 1))
m = m.sort(inplace=False) m = m.sort(inplace=False)
node = get_or_create_node(nodetype='Specificity',corpus=corpus,session=session) node = get_or_create_node(nodetype='Specificity',corpus=corpus, mysession=mysession)
data = zip( [node.id for i in range(1,m.shape[0])] data = zip( [node.id for i in range(1,m.shape[0])]
, [corpus.id for i in range(1,m.shape[0])] , [corpus.id for i in range(1,m.shape[0])]
, m.index.tolist() , m.index.tolist()
, m.values.tolist() , m.values.tolist()
) )
session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==node.id).delete() mysession.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==node.id).delete()
session.commit() mysession.commit()
bulk_insert(NodeNodeNgram, ['nodex_id', 'nodey_id', 'ngram_id', 'score'], [d for d in data]) bulk_insert(NodeNodeNgram, ['nodex_id', 'nodey_id', 'ngram_id', 'score'], [d for d in data])
return(node.id) return(node.id)
if sessionToRemove: session.remove()
def compute_specificity(corpus,limit=100, session=None): def compute_specificity(corpus,limit=100, mysession=None):
''' '''
Computing specificities as NodeNodeNgram. Computing specificities as NodeNodeNgram.
All workflow is the following: All workflow is the following:
...@@ -69,19 +64,13 @@ def compute_specificity(corpus,limit=100, session=None): ...@@ -69,19 +64,13 @@ def compute_specificity(corpus,limit=100, session=None):
2) Compute the specificity score, saving it in database, return its Node 2) Compute the specificity score, saving it in database, return its Node
''' '''
sessionToRemove = False
if session is None:
session = get_session()
sessionToRemove = True
dbg = DebugTime('Corpus #%d - specificity' % corpus.id) dbg = DebugTime('Corpus #%d - specificity' % corpus.id)
list_cvalue = get_or_create_node(nodetype='Cvalue', corpus=corpus, session=session) list_cvalue = get_or_create_node(nodetype='Cvalue', corpus=corpus, mysession=mysession)
cooc_id = do_cooc(corpus=corpus, cvalue_id=list_cvalue.id,limit=limit) cooc_id = do_cooc(corpus=corpus, cvalue_id=list_cvalue.id,limit=limit, mysession=mysession)
specificity(cooc_id=cooc_id,corpus=corpus,limit=limit) specificity(cooc_id=cooc_id,corpus=corpus,limit=limit,mysession=mysession)
dbg.show('specificity') dbg.show('specificity')
if sessionToRemove: session.remove()
#corpus=session.query(Node).filter(Node.id==244250).first() #corpus=session.query(Node).filter(Node.id==244250).first()
#compute_specificity(corpus) #compute_specificity(corpus)
......
...@@ -75,22 +75,18 @@ def isStopWord(ngram, stop_words=None): ...@@ -75,22 +75,18 @@ def isStopWord(ngram, stop_words=None):
if test_match(word, regex) is True : if test_match(word, regex) is True :
return(True) return(True)
def compute_stop(corpus,limit=2000,debug=False, session=None): def compute_stop(corpus,limit=2000,debug=False, mysession=None):
''' '''
do some statitics on all stop lists of database of the same type do some statitics on all stop lists of database of the same type
''' '''
sessionToRemove = False
if session is None:
session = get_session()
sessionToRemove = True
stop_node_id = get_or_create_node(nodetype='StopList', corpus=corpus).id stop_node_id = get_or_create_node(nodetype='StopList', corpus=corpus, mysession=mysession).id
# TODO do a function to get all stop words with social scores # TODO do a function to get all stop words with social scores
root = session.query(Node).filter(Node.type_id == cache.NodeType['Root'].id).first() root = mysession.query(Node).filter(Node.type_id == cache.NodeType['Root'].id).first()
root_stop_id = get_or_create_node(nodetype='StopList', corpus=root).id root_stop_id = get_or_create_node(nodetype='StopList', corpus=root, mysession=mysession).id
stop_words = (session.query(Ngram.terms) stop_words = (mysession.query(Ngram.terms)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id) .join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.filter(NodeNgram.node_id == root_stop_id) .filter(NodeNgram.node_id == root_stop_id)
.all() .all()
...@@ -99,7 +95,7 @@ def compute_stop(corpus,limit=2000,debug=False, session=None): ...@@ -99,7 +95,7 @@ def compute_stop(corpus,limit=2000,debug=False, session=None):
#print([n for n in stop_words]) #print([n for n in stop_words])
frequency = sa.func.count( NodeNgram.weight ) frequency = sa.func.count( NodeNgram.weight )
ngrams = ( session.query( Ngram.id, Ngram.terms, frequency ) ngrams = ( mysession.query( Ngram.id, Ngram.terms, frequency )
.join( NodeNgram, NodeNgram.ngram_id == Ngram.id ) .join( NodeNgram, NodeNgram.ngram_id == Ngram.id )
.join( Node, Node.id == NodeNgram.node_id ) .join( Node, Node.id == NodeNgram.node_id )
.filter( Node.parent_id == corpus.id, .filter( Node.parent_id == corpus.id,
...@@ -118,4 +114,3 @@ def compute_stop(corpus,limit=2000,debug=False, session=None): ...@@ -118,4 +114,3 @@ def compute_stop(corpus,limit=2000,debug=False, session=None):
stop = WeightedList({ n[0] : -1 for n in ngrams_to_stop}) stop = WeightedList({ n[0] : -1 for n in ngrams_to_stop})
stop.save(stop_node_id) stop.save(stop_node_id)
if sessionToRemove: session.remove()
...@@ -5,17 +5,12 @@ from gargantext_web.db import get_session, get_or_create_node ...@@ -5,17 +5,12 @@ from gargantext_web.db import get_session, get_or_create_node
from admin.utils import DebugTime from admin.utils import DebugTime
def compute_tfidf(corpus, session=None): def compute_tfidf(corpus, mysession=None):
# compute terms frequency sum # compute terms frequency sum
sessionToRemove = False
if session is None:
session = get_session()
sessionToRemove = True
dbg = DebugTime('Corpus #%d - TFIDF' % corpus.id) dbg = DebugTime('Corpus #%d - TFIDF' % corpus.id)
dbg.show('calculate terms frequencies sums') dbg.show('calculate terms frequencies sums')
tfidf_node = get_or_create_node(nodetype='Tfidf', corpus=corpus, session=session) tfidf_node = get_or_create_node(nodetype='Tfidf', corpus=corpus, mysession=mysession)
db, cursor = get_cursor() db, cursor = get_cursor()
cursor.execute(''' cursor.execute('''
...@@ -125,26 +120,20 @@ def compute_tfidf(corpus, session=None): ...@@ -125,26 +120,20 @@ def compute_tfidf(corpus, session=None):
# the end! # the end!
db.commit() db.commit()
if sessionToRemove: session.remove()
def compute_tfidf_global(corpus, session=None): def compute_tfidf_global(corpus, mysession=None):
''' '''
Maybe improve this with: Maybe improve this with:
#http://stackoverflow.com/questions/8674718/best-way-to-select-random-rows-postgresql #http://stackoverflow.com/questions/8674718/best-way-to-select-random-rows-postgresql
''' '''
sessionToRemove = False
if session is None:
session = get_session()
sessionToRemove = True
dbg = DebugTime('Corpus #%d - tfidf global' % corpus.id) dbg = DebugTime('Corpus #%d - tfidf global' % corpus.id)
dbg.show('calculate terms frequencies sums') dbg.show('calculate terms frequencies sums')
tfidf_node = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus, session=session) tfidf_node = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus, mysession=mysession)
# update would be better # update would be better
session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==tfidf_node.id).delete() mysession.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==tfidf_node.id).delete()
session.commit() mysession.commit()
# compute terms frequency sum # compute terms frequency sum
db, cursor = get_cursor() db, cursor = get_cursor()
...@@ -271,8 +260,3 @@ def compute_tfidf_global(corpus, session=None): ...@@ -271,8 +260,3 @@ def compute_tfidf_global(corpus, session=None):
db.commit() db.commit()
dbg.show('insert tfidf') dbg.show('insert tfidf')
if sessionToRemove: session.remove()
#corpus=session.query(Node).filter(Node.id==244250).first()
#compute_tfidf_global(corpus)
...@@ -8,8 +8,8 @@ def insert_ngrams_to_list(list_of_ngrams, corpus, list_type='MapList', erase=Tru ...@@ -8,8 +8,8 @@ def insert_ngrams_to_list(list_of_ngrams, corpus, list_type='MapList', erase=Tru
''' '''
session = get_session() session = get_session()
list_node = get_or_create_node(corpus=corpus, nodetype=list_type, session=session) list_node = get_or_create_node(corpus=corpus, nodetype=list_type, mysession=session)
group_node = get_or_create_node(corpus=corpus, nodetype='GroupList', session=session) group_node = get_or_create_node(corpus=corpus, nodetype='GroupList', mysession=session)
group_list = (session.query(NodeNgramNgram.ngramy_id) group_list = (session.query(NodeNgramNgram.ngramy_id)
.filter(NodeNgramNgram.id==group_node.id) .filter(NodeNgramNgram.id==group_node.id)
.all() .all()
......
...@@ -12,17 +12,17 @@ from gargantext_web.db import Node , NodeNgram ...@@ -12,17 +12,17 @@ from gargantext_web.db import Node , NodeNgram
from admin.utils import WorkflowTracking from admin.utils import WorkflowTracking
def ngram_workflow(corpus, n=5000, session=None): def ngram_workflow(corpus, n=5000, mysession=None):
''' '''
All the workflow to filter the ngrams. All the workflow to filter the ngrams.
''' '''
update_state = WorkflowTracking() update_state = WorkflowTracking()
update_state.processing_(corpus, "Stop words") update_state.processing_(corpus.id, "Stop words")
compute_stop(corpus, session=session) compute_stop(corpus, mysession=mysession)
update_state.processing_(corpus, "TF-IDF global score") update_state.processing_(corpus.id, "TF-IDF global score")
compute_tfidf_global(corpus, session=session) compute_tfidf_global(corpus, mysession=mysession)
part = round(n * 0.9) part = round(n * 0.9)
...@@ -31,28 +31,28 @@ def ngram_workflow(corpus, n=5000, session=None): ...@@ -31,28 +31,28 @@ def ngram_workflow(corpus, n=5000, session=None):
# part = round(part * 0.8) # part = round(part * 0.8)
#print('spec part:', part) #print('spec part:', part)
update_state.processing_(corpus, "Specificity score") update_state.processing_(corpus.id, "Specificity score")
compute_specificity(corpus,limit=part, session=session) compute_specificity(corpus,limit=part, mysession=mysession)
part = round(part * 0.8) part = round(part * 0.8)
limit_inf = round(part * 1) limit_inf = round(part * 1)
limit_sup = round(part * 5) limit_sup = round(part * 5)
#print(limit_inf,limit_sup) #print(limit_inf,limit_sup)
update_state.processing_(corpus, "Synonyms") update_state.processing_(corpus.id, "Synonyms")
try: try:
compute_groups(corpus,limit_inf=limit_inf, limit_sup=limit_sup, session=session) compute_groups(corpus,limit_inf=limit_inf, limit_sup=limit_sup, mysession=mysession)
except Exception as error: except Exception as error:
print("Workflow Ngram Group error", error) print("Workflow Ngram Group error", error)
pass pass
update_state.processing_(corpus, "Map list terms") update_state.processing_(corpus.id, "Map list terms")
compute_mapList(corpus,limit=1000, session=session) # size compute_mapList(corpus,limit=1000, mysession=mysession) # size
update_state.processing_(corpus, "TF-IDF local score") update_state.processing_(corpus.id, "TF-IDF local score")
compute_tfidf(corpus, session=session) compute_tfidf(corpus, mysession=mysession)
update_state.processing_(corpus, "Occurrences") update_state.processing_(corpus.id, "Occurrences")
compute_occs(corpus, session=session) compute_occs(corpus, mysession=mysession)
...@@ -31,12 +31,11 @@ parsers = Parsers() ...@@ -31,12 +31,11 @@ parsers = Parsers()
# resources management # resources management
def add_resource(corpus, session=None, **kwargs): def add_resource(corpus, mysession=None, **kwargs):
sessionToRemove = False if mysession is None:
if session is None: from gargantext_web.db import session
session = get_session() mysession = session
sessionToRemove = True
# only for tests # only for tests
resource = Resource(guid=str(random()), **kwargs ) resource = Resource(guid=str(random()), **kwargs )
...@@ -50,7 +49,7 @@ def add_resource(corpus, session=None, **kwargs): ...@@ -50,7 +49,7 @@ def add_resource(corpus, session=None, **kwargs):
f.close() f.close()
resource.digest = h.hexdigest() resource.digest = h.hexdigest()
# check if a resource on this node already has this hash # check if a resource on this node already has this hash
tmp_resource = (session tmp_resource = (mysession
.query(Resource) .query(Resource)
.join(Node_Resource, Node_Resource.resource_id == Resource.id) .join(Node_Resource, Node_Resource.resource_id == Resource.id)
.filter(Resource.digest == resource.digest) .filter(Resource.digest == resource.digest)
...@@ -59,28 +58,24 @@ def add_resource(corpus, session=None, **kwargs): ...@@ -59,28 +58,24 @@ def add_resource(corpus, session=None, **kwargs):
if tmp_resource is not None: if tmp_resource is not None:
return tmp_resource return tmp_resource
else: else:
session.add(resource) mysession.add(resource)
session.commit() mysession.commit()
# link with the resource # link with the resource
node_resource = Node_Resource( node_resource = Node_Resource(
node_id = corpus.id, node_id = corpus.id,
resource_id = resource.id, resource_id = resource.id,
parsed = False, parsed = False,
) )
session.add(node_resource) mysession.add(node_resource)
session.commit() mysession.commit()
# return result
return resource return resource
if sessionToRemove:
session.remove()
def parse_resources(corpus, user=None, user_id=None, session=None): def parse_resources(corpus, user=None, user_id=None, mysession=None):
sessionToRemove = False if mysession is None:
if session is None: from gargantext_web.db import session
session = get_session() mysession = session
sessionToRemove = True
dbg = DebugTime('Corpus #%d - parsing' % corpus.id) dbg = DebugTime('Corpus #%d - parsing' % corpus.id)
...@@ -91,7 +86,7 @@ def parse_resources(corpus, user=None, user_id=None, session=None): ...@@ -91,7 +86,7 @@ def parse_resources(corpus, user=None, user_id=None, session=None):
else: else:
user_id = corpus.user_id user_id = corpus.user_id
# find resource of the corpus # find resource of the corpus
resources_query = (session resources_query = (mysession
.query(Resource, ResourceType) .query(Resource, ResourceType)
.join(ResourceType, ResourceType.id == Resource.type_id) .join(ResourceType, ResourceType.id == Resource.type_id)
.join(Node_Resource, Node_Resource.resource_id == Resource.id) .join(Node_Resource, Node_Resource.resource_id == Resource.id)
...@@ -134,14 +129,14 @@ def parse_resources(corpus, user=None, user_id=None, session=None): ...@@ -134,14 +129,14 @@ def parse_resources(corpus, user=None, user_id=None, session=None):
# TODO: mark node-resources associations as parsed # TODO: mark node-resources associations as parsed
# #
dbg.show('insert %d documents' % len(nodes)) dbg.show('insert %d documents' % len(nodes))
session.add_all(nodes) mysession.add_all(nodes)
session.commit() mysession.commit()
# now, index the hyperdata # now, index the hyperdata
dbg.show('insert hyperdata') dbg.show('insert hyperdata')
node_hyperdata_lists = defaultdict(list) node_hyperdata_lists = defaultdict(list)
hyperdata_types = { hyperdata_types = {
hyperdata.name: hyperdata hyperdata.name: hyperdata
for hyperdata in session.query(Hyperdata) for hyperdata in mysession.query(Hyperdata)
} }
#print('hyperdata_types', hyperdata_types) #print('hyperdata_types', hyperdata_types)
for node in nodes: for node in nodes:
...@@ -166,7 +161,7 @@ def parse_resources(corpus, user=None, user_id=None, session=None): ...@@ -166,7 +161,7 @@ def parse_resources(corpus, user=None, user_id=None, session=None):
node_hyperdata_ngrams = set() node_hyperdata_ngrams = set()
#for field in ['source', 'authors', 'journal']: #for field in ['source', 'authors', 'journal']:
for field in ['journal', 'authors']: for field in ['journal', 'authors']:
hyperdata_set.add(session.query(Hyperdata.id).filter(Hyperdata.name==field).first()[0]) hyperdata_set.add(mysession.query(Hyperdata.id).filter(Hyperdata.name==field).first()[0])
#print("hyperdata_set", hyperdata_set) #print("hyperdata_set", hyperdata_set)
...@@ -191,9 +186,6 @@ def parse_resources(corpus, user=None, user_id=None, session=None): ...@@ -191,9 +186,6 @@ def parse_resources(corpus, user=None, user_id=None, session=None):
# mark the corpus as parsed # mark the corpus as parsed
corpus.parsed = True corpus.parsed = True
if sessionToRemove:
session.remove()
# ngrams extraction # ngrams extraction
from .NgramsExtractors import EnglishNgramsExtractor, FrenchNgramsExtractor, NgramsExtractor from .NgramsExtractors import EnglishNgramsExtractor, FrenchNgramsExtractor, NgramsExtractor
from nltk.tokenize import word_tokenize, wordpunct_tokenize, sent_tokenize from nltk.tokenize import word_tokenize, wordpunct_tokenize, sent_tokenize
...@@ -222,18 +214,17 @@ class NgramsExtractors(defaultdict): ...@@ -222,18 +214,17 @@ class NgramsExtractors(defaultdict):
ngramsextractors = NgramsExtractors() ngramsextractors = NgramsExtractors()
def extract_ngrams(corpus, keys, nlp=True, session=None): def extract_ngrams(corpus, keys, nlp=True, mysession=None):
sessionToRemove = False if mysession is None:
if session is None: from gargantext_web.db import session
session = get_session() mysession = session
sessionToRemove = True
dbg = DebugTime('Corpus #%d - ngrams' % corpus.id) dbg = DebugTime('Corpus #%d - ngrams' % corpus.id)
default_language_iso2 = None if corpus.language_id is None else cache.Language[corpus.language_id].iso2 default_language_iso2 = None if corpus.language_id is None else cache.Language[corpus.language_id].iso2
# query the hyperdata associated with the given keys # query the hyperdata associated with the given keys
columns = [Node.id, Node.language_id] + [Node.hyperdata[key] for key in keys] columns = [Node.id, Node.language_id] + [Node.hyperdata[key] for key in keys]
hyperdata_query = (session hyperdata_query = (mysession
.query(*columns) .query(*columns)
.filter(Node.parent_id == corpus.id) .filter(Node.parent_id == corpus.id)
.filter(Node.type_id == cache.NodeType['Document'].id) .filter(Node.type_id == cache.NodeType['Document'].id)
...@@ -242,7 +233,7 @@ def extract_ngrams(corpus, keys, nlp=True, session=None): ...@@ -242,7 +233,7 @@ def extract_ngrams(corpus, keys, nlp=True, session=None):
dbg.show('find ngrams') dbg.show('find ngrams')
languages_by_id = { languages_by_id = {
language.id: language.iso2 language.id: language.iso2
for language in session.query(Language) for language in mysession.query(Language)
} }
ngrams_data = set() ngrams_data = set()
...@@ -321,9 +312,6 @@ def extract_ngrams(corpus, keys, nlp=True, session=None): ...@@ -321,9 +312,6 @@ def extract_ngrams(corpus, keys, nlp=True, session=None):
# commit to database # commit to database
db.commit() db.commit()
if sessionToRemove:
session.remove()
def text_prepa(my_str): def text_prepa(my_str):
""" """
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment