Commit 9fabf7ac authored by delanoe's avatar delanoe

[FEAT] Computing Coocc for corpus as NodeNodeNgram as n where n.nodex_id is...

[FEAT] Computing Coocc for corpus as NodeNodeNgram as n where n.nodex_id is Node with Occurrences type, n.nodey_id is id of corpus, n.ngram_id is ngram whose score is the sum of all occurrences in the corpus.
parent 4e4f273d
......@@ -93,7 +93,7 @@ node_types = [
'Project', 'Corpus', 'Document',
'MiamList', 'StopList', 'MainList', 'MapList', # TODO MiamList -> MainList
'Stem', 'Lem', 'Group', 'Tfidf', 'Tfidf (global)', 'Cvalue', 'Specificity'
, 'Cooccurrence',
, 'Cooccurrence', 'Occurrences',
]
for node_type in node_types:
......
......@@ -239,7 +239,6 @@ def compute_tfidf_global(corpus):
lnD = log(D)
cursor.execute('UPDATE tmp__idf SET idf = idf + %f' % (lnD, ))
# show off
dbg.show('insert tfidf')
cursor.execute('''
INSERT INTO
%s (nodex_id, nodey_id, ngram_id, score)
......@@ -255,6 +254,7 @@ def compute_tfidf_global(corpus):
''' % (NodeNodeNgram.__table__.name, tfidf_node.id, corpus.id, ))
db.commit()
dbg.show('insert tfidf')
#corpus=session.query(Node).filter(Node.id==244250).first()
#compute_tfidf_global(corpus)
......@@ -6,6 +6,7 @@ from ngram.stop import compute_stop
from ngram.group import compute_groups
from gargantext_web.db import get_or_create_node
from ngram.mapList import compute_mapList
from ngram.occurrences import compute_occs
from gargantext_web.db import session , Node , NodeNgram
from admin.utils import WorkflowTracking
......@@ -46,6 +47,8 @@ def ngram_workflow(corpus, n=5000):
update_state.processing_(corpus, "TF-IDF local score")
compute_tfidf(corpus)
update_state.processing_(corpus, "OCCS local score")
compute_occs(corpus)
#corpus=session.query(Node).filter(Node.id==540420).first()
......
......@@ -165,9 +165,14 @@ class Ngrams(APIView):
the_score = request.GET['score']
if 'occs' in the_score:
occs = func.sum(Node_Ngram.weight).label('occs')
ngrams_query = (ngrams_query.add_column(occs))
results.append('occurences')
Occs = NodeNodeNgram
occs_id = get_or_create_node(nodetype='Occurrences', corpus=corpus).id
ngrams_query = (ngrams_query.add_column(Occs.score.label('occs'))
.join(Occs, Occs.ngram_id == Ngram.id)
.filter(Occs.nodex_id==occs_id)
)
group_by.append(Occs.score)
results.append('occs')
if 'tfidf' in the_score:
Tfidf = aliased(NodeNodeNgram)
......@@ -272,7 +277,7 @@ class Ngrams(APIView):
except: pass
try: info["name"] = ngram.terms
except: pass
try: info["scores"]["occ_uniq"] = ngram.occs
try: info["scores"]["occs"] = ngram.occs
except: pass
try: info["scores"]["tfidf"] = ngram.tfidf
except: pass
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment