Merge branch 'samuel' into unstable

c7aa5060 · delanoe · 9ca95e04 · 73e3d6d5 · c7aa5060 · c7aa5060
Commit c7aa5060 authored Nov 17, 2015 by delanoe
Hide whitespace changes
Inline Side-by-side

Showing with 38 additions and 18 deletions

utils.py admin/utils.py +19 -1

celery.py gargantext_web/celery.py +8 -13

workflow.py ngram/workflow.py +11 -4

No files found.
--- a/admin/utils.py
+++ b/admin/utils.py
@@ -3,6 +3,7 @@ import linecache
 from time import time

 from gargantext_web.settings import MEDIA_ROOT
+from django.db import connection

 class DebugTime:
    def __init__(self, prefix):
@@ -19,7 +20,6 @@ class DebugTime:
        self.message = message
        self.time = time()

-
 def ensure_dir(user):
    '''
    If user is new, folder does not exist yet, create it then
@@ -46,3 +46,21 @@ def PrintException():
    print('EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj))


+class WorkflowTracking:
+
+    def __init__( self ):
+        self.hola = "mundo"
+
+    def processing_(self , corpus , step):
+        try:
+            the_query = """ UPDATE node_node SET hyperdata=\'{ \"%s\" : 1}\' WHERE id=%d """ % ( step , corpus.id )
+            # print(the_query)
+            # print(step)
+            # print(" = = = = = = =")
+            cursor = connection.cursor()
+            try:
+                cursor.execute(the_query)
+            finally:
+                connection.close()
+        except :
+            PrintException()
\ No newline at end of file
--- a/gargantext_web/celery.py
+++ b/gargantext_web/celery.py
@@ -23,32 +23,27 @@ def apply_sum(x, y):
 from parsing.corpustools import  parse_resources, extract_ngrams #add_resource,
 from ngram.lists import ngrams2miam

-from admin.utils import PrintException
-
-def update_processing(corpus, step=0):
-    try:
-        corpus.hyperdata.update({'Processing' : step})
-        session.query(Node).filter(Node.id==corpus.id).update({'hyperdata' : corpus.hyperdata})
-        session.commit()
-    except :
-        PrintException()
+from admin.utils import WorkflowTracking

 @shared_task
 def apply_workflow(corpus_id):
+
+    update_state = WorkflowTracking()
+
    corpus = session.query(Node).filter(Node.id==corpus_id).first()

-    update_processing(corpus, 1)
+    update_state.processing_(corpus, "Parsing")
    #cProfile.runctx('parse_resources(corpus)', global,locals)
    parse_resources(corpus)

-    update_processing(corpus, 2)
+    update_state.processing_(corpus, "Terms extraction")
    extract_ngrams(corpus, ['title', 'abstract'], nlp=True)

-    update_processing(corpus, 3)
+    # update_state.processing_(corpus, "")
    ngram_workflow(corpus)

    #ngrams2miam(user_id=corpus.user_id, corpus_id=corpus_id)
-    update_processing(corpus, 0)
+    update_state.processing_(corpus, "OK")

 @shared_task
 def empty_trash(corpus_id):

--- a/ngram/workflow.py
+++ b/ngram/workflow.py
@@ -7,17 +7,20 @@ from ngram.group import compute_groups
 from gargantext_web.db import get_or_create_node
 from ngram.mapList import compute_mapList

-from gargantext_web.db import NodeNgram
-#from gargantext_web.celery import update_processing
+from gargantext_web.db import session , Node , NodeNgram
+from admin.utils import WorkflowTracking


 def ngram_workflow(corpus, n=5000):
    '''
    All the workflow to filter the ngrams.
    '''
-    
+    update_state = WorkflowTracking()
+
+    update_state.processing_(corpus, "Stop words")
    compute_stop(corpus)
    
+    update_state.processing_(corpus, "TF-IDF global score")
    compute_tfidf_global(corpus)
    
    part = round(n * 0.9)
@@ -27,6 +30,7 @@ def ngram_workflow(corpus, n=5000):
 #    part = round(part * 0.8)
    #print('spec part:', part)

+    update_state.processing_(corpus, "Specificity score")
    compute_specificity(corpus,limit=part)
    
    part = round(part * 0.8)
@@ -34,10 +38,13 @@ def ngram_workflow(corpus, n=5000):
    limit_inf = round(part * 1)
    limit_sup = round(part * 5)
    #print(limit_inf,limit_sup)
+    update_state.processing_(corpus, "Sinonims")
    compute_groups(corpus,limit_inf=limit_inf, limit_sup=limit_sup)
    
+    update_state.processing_(corpus, "Map list")
    compute_mapList(corpus,limit=1000) # size
    
+    update_state.processing_(corpus, "TF-IDF local score")
    compute_tfidf(corpus)
    

@@ -46,7 +53,7 @@ def ngram_workflow(corpus, n=5000):



-#update_processing(corpus, 0)
+#update_stateprocessing(corpus, 0)

 check_stop = False