Commit 5ca9085b authored by delanoe's avatar delanoe

[RENAME] cooc function > do_cooc.

parent db229347
......@@ -11,7 +11,7 @@ import inspect
# keep list
def cooc(corpus=None
def do_cooc(corpus=None
, field_X=None, field_Y=None
, miam_id=None, stop_id=None, group_id=None
, cvalue_id=None
......@@ -110,7 +110,7 @@ def cooc(corpus=None
# Cooc is symetric, take only the main cooccurrences and cut at the limit
cooc_query = (cooc_query
.filter(NodeNgramX.ngram_id < NodeNgramY.ngram_id)
.having(cooc_score > 2)
.having(cooc_score > 4)
#.having(cooc_score > 1)
.group_by(NodeNgramX.ngram_id, NodeNgramY.ngram_id)
......@@ -130,11 +130,9 @@ def cooc(corpus=None
)
if miam_id is not None :
#miam = get_or_create_node(nodetype='Cvalue', corpus=corpus)
miam_list = UnweightedList(miam_id)
if stop_id is not None :
#stop = get_or_create_node(nodetype='StopList', corpus=corpus)
stop_list = UnweightedList(stop_id)
if group_id is not None :
......@@ -145,7 +143,9 @@ def cooc(corpus=None
elif miam_id is not None and stop_id is not None and group_id is None :
cooc = matrix & (miam_list - stop_list)
elif miam_id is not None and stop_id is not None and group_id is not None :
cooc = matrix & (miam_list * group_list - stop_list)
print("miam_id is not None and stop_id is not None and group_id is not None")
#cooc = matrix & (miam_list * group_list - stop_list)
cooc = matrix & (miam_list - stop_list)
elif miam_id is not None and stop_id is None and group_id is not None :
cooc = matrix & (miam_list * group_list)
else :
......
......@@ -11,7 +11,7 @@ import scipy
from gargantext_web.db import get_or_create_node
from analysis.cooccurrences import cooc
from analysis.cooccurrences import do_cooc
import pandas as pd
from copy import copy
......@@ -29,7 +29,6 @@ from ngram.lists import listIds
def diag_null(x):
return x - x * scipy.eye(x.shape[0])
size = 1000
......@@ -51,24 +50,21 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
# data deleted each time
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).delete()
cooc_id = cooc(corpus=corpus, miam_id=miam_id, group_id=group_id, stop_id=stop_id, limit=size)
cooc_id = do_cooc(corpus=corpus, miam_id=miam_id, group_id=group_id, stop_id=stop_id, limit=size)
#print([n for n in session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).all()])
for cooccurrence in session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).all():
#print(cooccurrence)
# print(cooccurrence.ngramx.terms," <=> ",cooccurrence.ngramy.terms,"\t",cooccurrence.score)
# TODO clean this part, unuseful
labels[cooccurrence.ngramx_id] = cooccurrence.ngramx_id #session.query(Ngram.id).filter(Ngram.id == cooccurrence.ngramx_id).first()[0]
labels[cooccurrence.ngramy_id] = cooccurrence.ngramy_id #session.query(Ngram.id).filter(Ngram.id == cooccurrence.ngramy_id).first()[0]
for cooc in session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).all():
labels[cooc.ngramx_id] = cooc.ngramx_id
labels[cooc.ngramy_id] = cooc.ngramy_id
matrix[cooccurrence.ngramx_id][cooccurrence.ngramy_id] = cooccurrence.score
matrix[cooccurrence.ngramy_id][cooccurrence.ngramx_id] = cooccurrence.score
matrix[cooc.ngramx_id][cooc.ngramy_id] = cooc.score
matrix[cooc.ngramy_id][cooc.ngramx_id] = cooc.score
ids[labels[cooccurrence.ngramx_id]] = cooccurrence.ngramx_id
ids[labels[cooccurrence.ngramy_id]] = cooccurrence.ngramy_id
ids[labels[cooc.ngramx_id]] = cooc.ngramx_id
ids[labels[cooc.ngramy_id]] = cooc.ngramy_id
weight[cooccurrence.ngramx_id] = weight.get(cooccurrence.ngramx_id, 0) + cooccurrence.score
weight[cooccurrence.ngramy_id] = weight.get(cooccurrence.ngramy_id, 0) + cooccurrence.score
weight[cooc.ngramx_id] = weight.get(cooc.ngramx_id, 0) + cooc.score
weight[cooc.ngramy_id] = weight.get(cooc.ngramy_id, 0) + cooc.score
x = pd.DataFrame(matrix).fillna(0)
y = pd.DataFrame(matrix).fillna(0)
......
......@@ -9,7 +9,7 @@ from collections import defaultdict
import numpy as np
import pandas as pd
from analysis.cooccurrences import cooc
from analysis.cooccurrences import do_cooc
from gargantext_web.db import session, cache, get_or_create_node, bulk_insert
from gargantext_web.db import NodeNgramNgram, NodeNodeNgram
......@@ -65,7 +65,7 @@ def compute_specificity(corpus,limit=100):
dbg = DebugTime('Corpus #%d - specificity' % corpus.id)
list_cvalue = get_or_create_node(nodetype='Cvalue', corpus=corpus)
cooc_id = cooc(corpus=corpus, cvalue_id=list_cvalue.id,limit=limit)
cooc_id = do_cooc(corpus=corpus, cvalue_id=list_cvalue.id,limit=limit)
specificity(cooc_id=cooc_id,corpus=corpus,limit=limit)
dbg.show('specificity')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment