Commit 5ca9085b authored by delanoe's avatar delanoe

[RENAME] cooc function > do_cooc.

parent db229347
...@@ -11,7 +11,7 @@ import inspect ...@@ -11,7 +11,7 @@ import inspect
# keep list # keep list
def cooc(corpus=None def do_cooc(corpus=None
, field_X=None, field_Y=None , field_X=None, field_Y=None
, miam_id=None, stop_id=None, group_id=None , miam_id=None, stop_id=None, group_id=None
, cvalue_id=None , cvalue_id=None
...@@ -110,7 +110,7 @@ def cooc(corpus=None ...@@ -110,7 +110,7 @@ def cooc(corpus=None
# Cooc is symetric, take only the main cooccurrences and cut at the limit # Cooc is symetric, take only the main cooccurrences and cut at the limit
cooc_query = (cooc_query cooc_query = (cooc_query
.filter(NodeNgramX.ngram_id < NodeNgramY.ngram_id) .filter(NodeNgramX.ngram_id < NodeNgramY.ngram_id)
.having(cooc_score > 2) .having(cooc_score > 4)
#.having(cooc_score > 1) #.having(cooc_score > 1)
.group_by(NodeNgramX.ngram_id, NodeNgramY.ngram_id) .group_by(NodeNgramX.ngram_id, NodeNgramY.ngram_id)
...@@ -130,11 +130,9 @@ def cooc(corpus=None ...@@ -130,11 +130,9 @@ def cooc(corpus=None
) )
if miam_id is not None : if miam_id is not None :
#miam = get_or_create_node(nodetype='Cvalue', corpus=corpus)
miam_list = UnweightedList(miam_id) miam_list = UnweightedList(miam_id)
if stop_id is not None : if stop_id is not None :
#stop = get_or_create_node(nodetype='StopList', corpus=corpus)
stop_list = UnweightedList(stop_id) stop_list = UnweightedList(stop_id)
if group_id is not None : if group_id is not None :
...@@ -145,7 +143,9 @@ def cooc(corpus=None ...@@ -145,7 +143,9 @@ def cooc(corpus=None
elif miam_id is not None and stop_id is not None and group_id is None : elif miam_id is not None and stop_id is not None and group_id is None :
cooc = matrix & (miam_list - stop_list) cooc = matrix & (miam_list - stop_list)
elif miam_id is not None and stop_id is not None and group_id is not None : elif miam_id is not None and stop_id is not None and group_id is not None :
cooc = matrix & (miam_list * group_list - stop_list) print("miam_id is not None and stop_id is not None and group_id is not None")
#cooc = matrix & (miam_list * group_list - stop_list)
cooc = matrix & (miam_list - stop_list)
elif miam_id is not None and stop_id is None and group_id is not None : elif miam_id is not None and stop_id is None and group_id is not None :
cooc = matrix & (miam_list * group_list) cooc = matrix & (miam_list * group_list)
else : else :
......
...@@ -11,7 +11,7 @@ import scipy ...@@ -11,7 +11,7 @@ import scipy
from gargantext_web.db import get_or_create_node from gargantext_web.db import get_or_create_node
from analysis.cooccurrences import cooc from analysis.cooccurrences import do_cooc
import pandas as pd import pandas as pd
from copy import copy from copy import copy
...@@ -29,7 +29,6 @@ from ngram.lists import listIds ...@@ -29,7 +29,6 @@ from ngram.lists import listIds
def diag_null(x): def diag_null(x):
return x - x * scipy.eye(x.shape[0]) return x - x * scipy.eye(x.shape[0])
size = 1000 size = 1000
...@@ -51,24 +50,21 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz ...@@ -51,24 +50,21 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
# data deleted each time # data deleted each time
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).delete() session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).delete()
cooc_id = cooc(corpus=corpus, miam_id=miam_id, group_id=group_id, stop_id=stop_id, limit=size) cooc_id = do_cooc(corpus=corpus, miam_id=miam_id, group_id=group_id, stop_id=stop_id, limit=size)
#print([n for n in session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).all()]) #print([n for n in session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).all()])
for cooccurrence in session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).all(): for cooc in session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).all():
#print(cooccurrence) labels[cooc.ngramx_id] = cooc.ngramx_id
# print(cooccurrence.ngramx.terms," <=> ",cooccurrence.ngramy.terms,"\t",cooccurrence.score) labels[cooc.ngramy_id] = cooc.ngramy_id
# TODO clean this part, unuseful
labels[cooccurrence.ngramx_id] = cooccurrence.ngramx_id #session.query(Ngram.id).filter(Ngram.id == cooccurrence.ngramx_id).first()[0]
labels[cooccurrence.ngramy_id] = cooccurrence.ngramy_id #session.query(Ngram.id).filter(Ngram.id == cooccurrence.ngramy_id).first()[0]
matrix[cooccurrence.ngramx_id][cooccurrence.ngramy_id] = cooccurrence.score matrix[cooc.ngramx_id][cooc.ngramy_id] = cooc.score
matrix[cooccurrence.ngramy_id][cooccurrence.ngramx_id] = cooccurrence.score matrix[cooc.ngramy_id][cooc.ngramx_id] = cooc.score
ids[labels[cooccurrence.ngramx_id]] = cooccurrence.ngramx_id ids[labels[cooc.ngramx_id]] = cooc.ngramx_id
ids[labels[cooccurrence.ngramy_id]] = cooccurrence.ngramy_id ids[labels[cooc.ngramy_id]] = cooc.ngramy_id
weight[cooccurrence.ngramx_id] = weight.get(cooccurrence.ngramx_id, 0) + cooccurrence.score weight[cooc.ngramx_id] = weight.get(cooc.ngramx_id, 0) + cooc.score
weight[cooccurrence.ngramy_id] = weight.get(cooccurrence.ngramy_id, 0) + cooccurrence.score weight[cooc.ngramy_id] = weight.get(cooc.ngramy_id, 0) + cooc.score
x = pd.DataFrame(matrix).fillna(0) x = pd.DataFrame(matrix).fillna(0)
y = pd.DataFrame(matrix).fillna(0) y = pd.DataFrame(matrix).fillna(0)
......
...@@ -9,7 +9,7 @@ from collections import defaultdict ...@@ -9,7 +9,7 @@ from collections import defaultdict
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from analysis.cooccurrences import cooc from analysis.cooccurrences import do_cooc
from gargantext_web.db import session, cache, get_or_create_node, bulk_insert from gargantext_web.db import session, cache, get_or_create_node, bulk_insert
from gargantext_web.db import NodeNgramNgram, NodeNodeNgram from gargantext_web.db import NodeNgramNgram, NodeNodeNgram
...@@ -65,7 +65,7 @@ def compute_specificity(corpus,limit=100): ...@@ -65,7 +65,7 @@ def compute_specificity(corpus,limit=100):
dbg = DebugTime('Corpus #%d - specificity' % corpus.id) dbg = DebugTime('Corpus #%d - specificity' % corpus.id)
list_cvalue = get_or_create_node(nodetype='Cvalue', corpus=corpus) list_cvalue = get_or_create_node(nodetype='Cvalue', corpus=corpus)
cooc_id = cooc(corpus=corpus, cvalue_id=list_cvalue.id,limit=limit) cooc_id = do_cooc(corpus=corpus, cvalue_id=list_cvalue.id,limit=limit)
specificity(cooc_id=cooc_id,corpus=corpus,limit=limit) specificity(cooc_id=cooc_id,corpus=corpus,limit=limit)
dbg.show('specificity') dbg.show('specificity')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment