[FEAT] Async graph working (needs to know the parameters in myGraph + need refact)

d071736a · delanoe · 3ce5f459 · d071736a · d071736a · d071736a
Commit d071736a authored Jul 18, 2016 by delanoe
Hide whitespace changes
Inline Side-by-side

Showing with 69 additions and 18 deletions

cooccurrences.py graph/cooccurrences.py +7 -1

graph.py graph/graph.py +46 -14

rest.py graph/rest.py +16 -3

No files found.
--- a/graph/cooccurrences.py
+++ b/graph/cooccurrences.py
@@ -9,6 +9,7 @@ from sqlalchemy            import desc, asc, or_, and_
 #import inspect
 import datetime
+from celery               import shared_task
 def filterMatrix(matrix, mapList_id, groupList_id):
    mapList   = UnweightedList( mapList_id  )
@@ -17,7 +18,8 @@ def filterMatrix(matrix, mapList_id, groupList_id):
    return cooc
-def countCooccurrences( corpus=None         , test= False
+@shared_task
+def countCooccurrences( corpus_id=None         , test= False
                      , field1='ngrams'     , field2='ngrams'
                      , start=None          , end=None
                      , mapList_id=None     , groupList_id=None
@@ -47,8 +49,12 @@ def countCooccurrences( corpus=None         , test= False
    # Security test
    field1,field2 = str(field1), str(field2)
+    # Get corpus as Python object
+    corpus = session.query(Node).filter(Node.id==corpus_id).first()
    # Get node
    if not coocNode_id:
        coocNode_id0  = ( session.query( Node.id )
                                .filter( Node.typename  == "COOCCURRENCES"
                                       , Node.name      == "GRAPH EXPLORER"

--- a/graph/graph.py
+++ b/graph/graph.py
 # Gargantext lib
-from gargantext.util.db           import session
+from gargantext.util.db           import session, aliased
 from gargantext.util.lists        import WeightedMatrix, UnweightedList, Translations
 from gargantext.util.http         import JsonHttpResponse
-from gargantext.models            import Node, Ngram, NodeNgram, NodeNgramNgram
+from gargantext.models            import Node, Ngram, NodeNgram, NodeNgramNgram, NodeHyperdata
 #from gargantext.util.toolchain.ngram_coocs import compute_coocs
 from graph.cooccurrences  import countCooccurrences, filterMatrix
 from graph.distances      import clusterByDistances
 from graph.bridgeness     import filterByBridgeness
+from gargantext.util.scheduling import scheduled
+from datetime import datetime
 def get_graph( request=None         , corpus=None
-            , test= False
            , field1='ngrams'       , field2='ngrams'
            , mapList_id = None     , groupList_id = None
            , cooc_id=None          , type='node_link'
@@ -37,18 +40,46 @@ def get_graph( request=None         , corpus=None
    '''
-    from datetime import datetime
    before_cooc = datetime.now()
-    # TODO change test here (always true)
-    #      to something like "if cooc.status threshold == required_threshold
-    #                         and group.creation_time < cooc.creation_time"
-    #      if False => read and give to clusterByDistances
-    #      if True => compute and give to clusterByDistances  <==
    if cooc_id == None:
-        if test = True:
+    # case of Cooccurrences have not been computed already
-           cooc_matrix = countCooccurrences( corpus=corpus, test=test
+        corpus_size_query = (session.query(Node)
+                                    .filter(Node.typename=="DOCUMENT")
+                                    .filter(Node.parent_id == corpus.id)
+                            )
+        if start is not None:
+            #date_start = datetime.datetime.strptime ("2001-2-3 10:11:12", "%Y-%m-%d %H:%M:%S")
+            date_start = datetime.strptime (str(start), "%Y-%m-%d")
+            date_start_utc = date_start.strftime("%Y-%m-%d %H:%M:%S")
+            Start=aliased(NodeHyperdata)
+            corpus_size_query = (corpus_size_query.join( Start
+                                         , Start.node_id == Node.id
+                                         )
+                                    .filter( Start.key == 'publication_date')
+                                    .filter( Start.value_utc >= date_start_utc)
+                          )
+        if end is not None:
+            date_end = datetime.strptime (str(end), "%Y-%m-%d")
+            date_end_utc = date_end.strftime("%Y-%m-%d %H:%M:%S")
+            End=aliased(NodeHyperdata)
+            corpus_size_query = (corpus_size_query.join( End
+                                         , End.node_id == Node.id
+                                         )
+                                    .filter( End.key == 'publication_date')
+                                    .filter( End.value_utc <= date_end_utc )
+                          )
+        if corpus_size_query.count() > 400:
+            scheduled(countCooccurrences)( corpus_id=corpus.id
                                       #, field1="ngrams", field2="ngrams"
                                        , start=start           , end =end
                                        , mapList_id=mapList_id , groupList_id=groupList_id
@@ -56,20 +87,21 @@ def get_graph( request=None         , corpus=None
                                        , save_on_db = True
                                       #, limit=size
                                        )
+            return {'nodes':[], 'links':[1]}  # Dic trick to inform user that graph is computed asynchronously
        else:
-            cooc_matrix = countCooccurrences( corpus=corpus, test=test
+            cooc_matrix = countCooccurrences( corpus_id=corpus.id
                                       #, field1="ngrams", field2="ngrams"
                                        , start=start           , end =end
                                        , mapList_id=mapList_id , groupList_id=groupList_id
                                        , isMonopartite=True    , threshold = threshold
-                                        , save_on_db = True
+                                        , save_on_db = False
                                       #, limit=size
                                        )
    else:
        print("Getting data for matrix %d", int(cooc_id))
        matrix      = WeightedMatrix(int(cooc_id))
-        print(matrix)
+        #print(matrix)
        cooc_matrix = filterMatrix(matrix, mapList_id, groupList_id)

--- a/graph/rest.py
+++ b/graph/rest.py
@@ -139,8 +139,10 @@ class Graph(APIView):
                                   , distance   = distance
                                   , bridgeness = bridgeness
                                   )
                # Test data length
-                if len(data['nodes']) > 0 and len(data['links']) > 0:
+                if len(data['nodes']) > 1 and len(data['links']) > 1 :
                    # normal case --------------------------------
                    if format_ == 'json':
                        return JsonHttpResponse(
@@ -148,6 +150,15 @@ class Graph(APIView):
                                 status=200
                               )
                    # --------------------------------------------
+                elif len(data['nodes']) == 0 and len(data['links']) == 1 :
+                    # async data case
+                    return JsonHttpResponse({
+                        'msg': '''Async graph generation
+                                  Wait a while and discover your graph 
+                                  http://%sgargantext.org/projects/%d/corpora/%d/myGraph
+                                  ''' % ("dev.", corpus.parent_id, corpus.id),
+                        }, status=400)
                else:
                    # empty data case
                    return JsonHttpResponse({
@@ -155,6 +166,7 @@ class Graph(APIView):
                                  No cooccurences found in this corpus for the words of this maplist
                                  (maybe add more terms to the maplist?)''',
                        }, status=400)
            else:
                # parameters error case
                return JsonHttpResponse({
@@ -166,7 +178,8 @@ class Graph(APIView):
                    }, status=400)
        # for any other errors that we forgot to test
-        except Exception as e:
+        except Exception as error:
+            print(error)
            return JsonHttpResponse({
-                'msg' : 'Unknown error (showing the trace):\n%s' % "\n".join(format_tb(e.__traceback__))
+                'msg' : 'Unknown error (showing the trace):\n%s' % "\n".join(format_tb(error.__traceback__))
                }, status=400)