Commit c7aa5060 authored by delanoe's avatar delanoe

Merge branch 'samuel' into unstable

parents 9ca95e04 73e3d6d5
...@@ -3,6 +3,7 @@ import linecache ...@@ -3,6 +3,7 @@ import linecache
from time import time from time import time
from gargantext_web.settings import MEDIA_ROOT from gargantext_web.settings import MEDIA_ROOT
from django.db import connection
class DebugTime: class DebugTime:
def __init__(self, prefix): def __init__(self, prefix):
...@@ -19,7 +20,6 @@ class DebugTime: ...@@ -19,7 +20,6 @@ class DebugTime:
self.message = message self.message = message
self.time = time() self.time = time()
def ensure_dir(user): def ensure_dir(user):
''' '''
If user is new, folder does not exist yet, create it then If user is new, folder does not exist yet, create it then
...@@ -46,3 +46,21 @@ def PrintException(): ...@@ -46,3 +46,21 @@ def PrintException():
print('EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj)) print('EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj))
class WorkflowTracking:
def __init__( self ):
self.hola = "mundo"
def processing_(self , corpus , step):
try:
the_query = """ UPDATE node_node SET hyperdata=\'{ \"%s\" : 1}\' WHERE id=%d """ % ( step , corpus.id )
# print(the_query)
# print(step)
# print(" = = = = = = =")
cursor = connection.cursor()
try:
cursor.execute(the_query)
finally:
connection.close()
except :
PrintException()
\ No newline at end of file
...@@ -23,32 +23,27 @@ def apply_sum(x, y): ...@@ -23,32 +23,27 @@ def apply_sum(x, y):
from parsing.corpustools import parse_resources, extract_ngrams #add_resource, from parsing.corpustools import parse_resources, extract_ngrams #add_resource,
from ngram.lists import ngrams2miam from ngram.lists import ngrams2miam
from admin.utils import PrintException from admin.utils import WorkflowTracking
def update_processing(corpus, step=0):
try:
corpus.hyperdata.update({'Processing' : step})
session.query(Node).filter(Node.id==corpus.id).update({'hyperdata' : corpus.hyperdata})
session.commit()
except :
PrintException()
@shared_task @shared_task
def apply_workflow(corpus_id): def apply_workflow(corpus_id):
update_state = WorkflowTracking()
corpus = session.query(Node).filter(Node.id==corpus_id).first() corpus = session.query(Node).filter(Node.id==corpus_id).first()
update_processing(corpus, 1) update_state.processing_(corpus, "Parsing")
#cProfile.runctx('parse_resources(corpus)', global,locals) #cProfile.runctx('parse_resources(corpus)', global,locals)
parse_resources(corpus) parse_resources(corpus)
update_processing(corpus, 2) update_state.processing_(corpus, "Terms extraction")
extract_ngrams(corpus, ['title', 'abstract'], nlp=True) extract_ngrams(corpus, ['title', 'abstract'], nlp=True)
update_processing(corpus, 3) # update_state.processing_(corpus, "")
ngram_workflow(corpus) ngram_workflow(corpus)
#ngrams2miam(user_id=corpus.user_id, corpus_id=corpus_id) #ngrams2miam(user_id=corpus.user_id, corpus_id=corpus_id)
update_processing(corpus, 0) update_state.processing_(corpus, "OK")
@shared_task @shared_task
def empty_trash(corpus_id): def empty_trash(corpus_id):
......
...@@ -7,17 +7,20 @@ from ngram.group import compute_groups ...@@ -7,17 +7,20 @@ from ngram.group import compute_groups
from gargantext_web.db import get_or_create_node from gargantext_web.db import get_or_create_node
from ngram.mapList import compute_mapList from ngram.mapList import compute_mapList
from gargantext_web.db import NodeNgram from gargantext_web.db import session , Node , NodeNgram
#from gargantext_web.celery import update_processing from admin.utils import WorkflowTracking
def ngram_workflow(corpus, n=5000): def ngram_workflow(corpus, n=5000):
''' '''
All the workflow to filter the ngrams. All the workflow to filter the ngrams.
''' '''
update_state = WorkflowTracking()
update_state.processing_(corpus, "Stop words")
compute_stop(corpus) compute_stop(corpus)
update_state.processing_(corpus, "TF-IDF global score")
compute_tfidf_global(corpus) compute_tfidf_global(corpus)
part = round(n * 0.9) part = round(n * 0.9)
...@@ -27,6 +30,7 @@ def ngram_workflow(corpus, n=5000): ...@@ -27,6 +30,7 @@ def ngram_workflow(corpus, n=5000):
# part = round(part * 0.8) # part = round(part * 0.8)
#print('spec part:', part) #print('spec part:', part)
update_state.processing_(corpus, "Specificity score")
compute_specificity(corpus,limit=part) compute_specificity(corpus,limit=part)
part = round(part * 0.8) part = round(part * 0.8)
...@@ -34,10 +38,13 @@ def ngram_workflow(corpus, n=5000): ...@@ -34,10 +38,13 @@ def ngram_workflow(corpus, n=5000):
limit_inf = round(part * 1) limit_inf = round(part * 1)
limit_sup = round(part * 5) limit_sup = round(part * 5)
#print(limit_inf,limit_sup) #print(limit_inf,limit_sup)
update_state.processing_(corpus, "Sinonims")
compute_groups(corpus,limit_inf=limit_inf, limit_sup=limit_sup) compute_groups(corpus,limit_inf=limit_inf, limit_sup=limit_sup)
update_state.processing_(corpus, "Map list")
compute_mapList(corpus,limit=1000) # size compute_mapList(corpus,limit=1000) # size
update_state.processing_(corpus, "TF-IDF local score")
compute_tfidf(corpus) compute_tfidf(corpus)
...@@ -46,7 +53,7 @@ def ngram_workflow(corpus, n=5000): ...@@ -46,7 +53,7 @@ def ngram_workflow(corpus, n=5000):
#update_processing(corpus, 0) #update_stateprocessing(corpus, 0)
check_stop = False check_stop = False
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment