Commit d9c0428f authored by delanoe's avatar delanoe

[SOME FIX]

parent 6845ac3e
...@@ -91,7 +91,7 @@ def do_cooc(corpus=None ...@@ -91,7 +91,7 @@ def do_cooc(corpus=None
.filter(Node.parent_id == corpus.id, Node.type_id == doc_id) .filter(Node.parent_id == corpus.id, Node.type_id == doc_id)
.filter(Hyperdata.name == field1) .filter(Hyperdata.name == field1)
) )
print(cooc_query) #print(cooc_query)
# Size of the ngrams between n_min and n_max # Size of the ngrams between n_min and n_max
if n_min is not None or n_max is not None: if n_min is not None or n_max is not None:
......
...@@ -112,7 +112,7 @@ def do_distance(cooc_id, field1=None, field2=None, isMonopartite=True): ...@@ -112,7 +112,7 @@ def do_distance(cooc_id, field1=None, field2=None, isMonopartite=True):
return item[1] return item[1]
node_degree = sorted(G.degree().items(), key=getWeight, reverse=True) node_degree = sorted(G.degree().items(), key=getWeight, reverse=True)
print(node_degree) #print(node_degree)
nodes_too_connected = [n[0] for n in node_degree[0:(round(len(node_degree)/5))]] nodes_too_connected = [n[0] for n in node_degree[0:(round(len(node_degree)/5))]]
for n in nodes_too_connected: for n in nodes_too_connected:
......
from admin.env import *
from ngram.tfidf import compute_tfidf, compute_tfidf_global
from ngram.cvalue import compute_cvalue
from ngram.specificity import compute_specificity
from ngram.stop import compute_stop
from ngram.group import compute_groups
from gargantext_web.db import get_or_create_node
from ngram.mapList import compute_mapList
from gargantext_web.db import NodeNgram
#from gargantext_web.celery import update_processing
def ngram_workflow(corpus, n=5000):
'''
All the workflow to filter the ngrams.
'''
compute_stop(corpus)
compute_tfidf_global(corpus)
part = round(n * 0.9)
compute_cvalue(corpus,limit=1000) # size
part = round(part * 0.8)
print('spec part:', part)
compute_specificity(corpus,limit=part)
part = round(part * 0.8)
limit_inf = round(part * 1)
limit_sup = round(part * 5)
print(limit_inf,limit_sup)
compute_groups(corpus,limit_inf=limit_inf, limit_sup=limit_sup)
compute_mapList(corpus,limit=1000) # size
compute_tfidf(corpus)
node_id = 1427298
#corpus=session.query(Node).filter(Node.id==540420).first()
corpus=session.query(Node).filter(Node.id==node_id).first()
ngram_workflow(corpus)
#update_processing(corpus, 0)
check_stop = False
if check_stop:
stop = get_or_create_node(corpus=corpus,nodetype='StopList')
#session.query(NodeNgram).filter(NodeNgram.node_id==stop.id).delete()
#session.commit()
stop_ngrams = (session.query(Ngram)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.filter(NodeNgram.node_id==stop.id)
.all()
)
print([n for n in stop_ngrams])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment