Commit 34d5edce authored by delanoe's avatar delanoe

To improve that part, I need to see the map terms in the table of terms.

parent 342aa4d3
...@@ -16,7 +16,8 @@ def do_cooc(corpus=None ...@@ -16,7 +16,8 @@ def do_cooc(corpus=None
, start=None, end=None , start=None, end=None
, n_min=1, n_max=None , limit=1000 , n_min=1, n_max=None , limit=1000
, isMonopartite=True , isMonopartite=True
, threshold = 3): , threshold = 3
, reset=True):
''' '''
Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id
For the moment list of paramters are not supported because, lists need to For the moment list of paramters are not supported because, lists need to
...@@ -57,32 +58,18 @@ def do_cooc(corpus=None ...@@ -57,32 +58,18 @@ def do_cooc(corpus=None
coocNode_id = coocNode.id coocNode_id = coocNode.id
else : else :
coocNode_id = coocNode_id[0] coocNode_id = coocNode_id[0]
# node_cooc = get_or_create_node(nodetype='Cooccurrence', corpus=corpus
# , name_str="Cooccurrences corpus " \
# + str(corpus.id) + "list_id: " + str(mainList_id)
# #, hyperdata={'field1': field1, 'field2':field2}
# , session=session)
# BEGIN if reset == True :
# Saving the parameters of the analysis in the Node JSONB hyperdata field session.query( NodeNgramNgram ).filter( NodeNgramNgram.node_id == coocNode_id ).delete()
# ok args, _, _, parameters = inspect.getargvalues(inspect.currentframe()) session.commit()
# hyperdata = dict()
#
# for parameter in parameters.keys():
# if parameter != 'corpus' and parameter != 'node_cooc':
# hyperdata[parameter] = parameters[parameter]
#
# node_cooc.hyperdata = hyperdata
#
# For tests only : delete previous cooccurrences
session.query( NodeNgramNgram ).filter( NodeNgramNgram.node_id == coocNode_id ).delete()
session.commit()
NodeNgramX = aliased(NodeNgram) NodeNgramX = aliased(NodeNgram)
# Simple Cooccurrences
cooc_score = func.count(NodeNgramX.node_id).label('cooc_score') cooc_score = func.count(NodeNgramX.node_id).label('cooc_score')
# A kind of Euclidean distance cooccurrences
#cooc_score = func.sqrt(func.sum(NodeNgramX.weight * NodeNgramY.weight)).label('cooc_score') #cooc_score = func.sqrt(func.sum(NodeNgramX.weight * NodeNgramY.weight)).label('cooc_score')
if isMonopartite : if isMonopartite :
...@@ -131,7 +118,7 @@ def do_cooc(corpus=None ...@@ -131,7 +118,7 @@ def do_cooc(corpus=None
cooc_query = cooc_query.join ( NgramX cooc_query = cooc_query.join ( NgramX
, NgramX.id == NodeNgramX.ngram_id , NgramX.id == NodeNgramX.ngram_id
) )
NgramY = aliased(Ngram) NgramY = aliased(Ngram)
cooc_query = cooc_query.join ( NgramY cooc_query = cooc_query.join ( NgramY
, NgramY.id == NodeNgramY.ngram_id , NgramY.id == NodeNgramY.ngram_id
...@@ -160,10 +147,14 @@ def do_cooc(corpus=None ...@@ -160,10 +147,14 @@ def do_cooc(corpus=None
Start=aliased(NodeHyperdata) Start=aliased(NodeHyperdata)
StartFormat = aliased(Hyperdata) StartFormat = aliased(Hyperdata)
cooc_query = (cooc_query.join(Start, Start.node_id == Node.id) cooc_query = (cooc_query.join( Start
.join(StartFormat, StartFormat.id == Start.hyperdata_id) , Start.node_id == Node.id
.filter(StartFormat.name == 'publication_date') )
.filter(Start.value_datetime >= date_start_utc) .join( StartFormat
, StartFormat.id == Start.hyperdata_id
)
.filter( StartFormat.name == 'publication_date')
.filter( Start.value_datetime >= date_start_utc)
) )
...@@ -174,10 +165,14 @@ def do_cooc(corpus=None ...@@ -174,10 +165,14 @@ def do_cooc(corpus=None
End=aliased(NodeHyperdata) End=aliased(NodeHyperdata)
EndFormat = aliased(Hyperdata) EndFormat = aliased(Hyperdata)
cooc_query = (cooc_query.join(End, End.node_id == Node.id) cooc_query = (cooc_query.join( End
.join(EndFormat, EndFormat.id == End.hyperdata_id) , End.node_id == Node.id
.filter(EndFormat.name == 'publication_date') )
.filter(End.value_datetime <= date_end_utc) .join( EndFormat
, EndFormat.id == End.hyperdata_id
)
.filter( EndFormat.name == 'publication_date' )
.filter( End.value_datetime <= date_end_utc )
) )
...@@ -192,12 +187,10 @@ def do_cooc(corpus=None ...@@ -192,12 +187,10 @@ def do_cooc(corpus=None
else: else:
cooc_query = cooc_query.group_by(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id) cooc_query = cooc_query.group_by(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id)
# Order according some scores
cooc_query = cooc_query.order_by(desc('cooc_score')) cooc_query = cooc_query.order_by(desc('cooc_score'))
matrix = WeightedMatrix(cooc_query) matrix = WeightedMatrix(cooc_query)
# Select according some scores
mainList = UnweightedList( mainList_id ) mainList = UnweightedList( mainList_id )
group_list = Translations ( groupList_id ) group_list = Translations ( groupList_id )
cooc = matrix & (mainList * group_list) cooc = matrix & (mainList * group_list)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment