Commit 38fcc5d6 authored by PkSM3's avatar PkSM3

Merge branch 'unstable' of ssh://delanoe.org:1979/gargantext into samuel

parents 20affcd1 a3d93529
...@@ -2,11 +2,12 @@ ...@@ -2,11 +2,12 @@
from ngram.tfidf import compute_tfidf, compute_tfidf_global from ngram.tfidf import compute_tfidf, compute_tfidf_global
from ngram.cvalue import compute_cvalue from ngram.cvalue import compute_cvalue
from ngram.specificity import compute_specificity from ngram.specificity import compute_specificity
#from ngram.stop import compute_stop from ngram.stop import compute_stop
from ngram.group import compute_groups from ngram.group import compute_groups
from gargantext_web.db import get_or_create_node from gargantext_web.db import get_or_create_node
from ngram.mapList import compute_mapList from ngram.mapList import compute_mapList
from gargantext_web.db import NodeNgram
#from gargantext_web.celery import update_processing #from gargantext_web.celery import update_processing
...@@ -14,35 +15,49 @@ def ngram_workflow(corpus, n=5000): ...@@ -14,35 +15,49 @@ def ngram_workflow(corpus, n=5000):
''' '''
All the workflow to filter the ngrams. All the workflow to filter the ngrams.
''' '''
#compute_tfidf_global(corpus)
compute_stop(corpus)
compute_tfidf_global(corpus)
part = round(n * 0.9) part = round(n * 0.9)
#compute_cvalue(corpus,limit=part) # size compute_cvalue(corpus,limit=part) # size
part = round(part * 0.8) part = round(part * 0.8)
print('spec part:', part) print('spec part:', part)
#compute_specificity(corpus,limit=part) compute_specificity(corpus,limit=part)
part = round(part * 0.8) part = round(part * 0.8)
# compute_stop(corpus)
limit_inf = round(part * 1) limit_inf = round(part * 1)
limit_sup = round(part * 5) limit_sup = round(part * 5)
print(limit_inf,limit_sup) print(limit_inf,limit_sup)
#compute_groups(corpus,limit_inf=limit_inf, limit_sup=limit_sup) compute_groups(corpus,limit_inf=limit_inf, limit_sup=limit_sup)
compute_mapList(corpus,limit=part) # size compute_mapList(corpus,limit=part) # size
#compute_tfidf(corpus) compute_tfidf(corpus)
#corpus=session.query(Node).filter(Node.id==540420).first() #corpus=session.query(Node).filter(Node.id==540420).first()
#corpus=session.query(Node).filter(Node.id==559637).first() #corpus=session.query(Node).filter(Node.id==559637).first()
#ngram_workflow(corpus)
#update_processing(corpus, 0) #update_processing(corpus, 0)
#cvalue = get_or_create_node(corpus=corpus,nodetype='Cvalue') check_stop = False
#print(session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==cvalue.id).count())
if check_stop:
stop = get_or_create_node(corpus=corpus,nodetype='StopList')
#session.query(NodeNgram).filter(NodeNgram.node_id==stop.id).delete()
#session.commit()
stop_ngrams = (session.query(Ngram)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.filter(NodeNgram.node_id==stop.id)
.all()
)
print([n for n in stop_ngrams])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment