workflow.py 1.68 KB
from admin.env import *
from ngram.tfidf import compute_tfidf, compute_tfidf_global
from ngram.cvalue import compute_cvalue
from ngram.specificity import compute_specificity
from ngram.stop import compute_stop
from ngram.group import compute_groups
from gargantext_web.db import get_or_create_node
from ngram.mapList import compute_mapList

from gargantext_web.db import NodeNgram
#from gargantext_web.celery import update_processing


def ngram_workflow(corpus, n=5000):
    '''
    All the workflow to filter the ngrams.
    '''
    
    compute_stop(corpus)
    
    compute_tfidf_global(corpus)
    
    part = round(n * 0.9)

    compute_cvalue(corpus,limit=1000) # size
    
    part = round(part * 0.8)
    print('spec part:', part)

    compute_specificity(corpus,limit=part)
    
    part = round(part * 0.8)

    limit_inf = round(part * 1)
    limit_sup = round(part * 5)
    print(limit_inf,limit_sup)
    compute_groups(corpus,limit_inf=limit_inf, limit_sup=limit_sup)
    
    compute_mapList(corpus,limit=1000) # size
    
    compute_tfidf(corpus)
    

node_id = 1427298
#corpus=session.query(Node).filter(Node.id==540420).first()
corpus=session.query(Node).filter(Node.id==node_id).first()
ngram_workflow(corpus)


#update_processing(corpus, 0)

check_stop = False

if check_stop:
    stop = get_or_create_node(corpus=corpus,nodetype='StopList')
#session.query(NodeNgram).filter(NodeNgram.node_id==stop.id).delete()
#session.commit()
    stop_ngrams = (session.query(Ngram)
                          .join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
                          .filter(NodeNgram.node_id==stop.id)
                          .all()
                    )

    print([n for n in stop_ngrams])