Commit d071736a authored by delanoe's avatar delanoe

[FEAT] Async graph working (needs to know the parameters in myGraph + need refact)

parent 3ce5f459
...@@ -9,6 +9,7 @@ from sqlalchemy import desc, asc, or_, and_ ...@@ -9,6 +9,7 @@ from sqlalchemy import desc, asc, or_, and_
#import inspect #import inspect
import datetime import datetime
from celery import shared_task
def filterMatrix(matrix, mapList_id, groupList_id): def filterMatrix(matrix, mapList_id, groupList_id):
mapList = UnweightedList( mapList_id ) mapList = UnweightedList( mapList_id )
...@@ -17,7 +18,8 @@ def filterMatrix(matrix, mapList_id, groupList_id): ...@@ -17,7 +18,8 @@ def filterMatrix(matrix, mapList_id, groupList_id):
return cooc return cooc
def countCooccurrences( corpus=None , test= False @shared_task
def countCooccurrences( corpus_id=None , test= False
, field1='ngrams' , field2='ngrams' , field1='ngrams' , field2='ngrams'
, start=None , end=None , start=None , end=None
, mapList_id=None , groupList_id=None , mapList_id=None , groupList_id=None
...@@ -47,8 +49,12 @@ def countCooccurrences( corpus=None , test= False ...@@ -47,8 +49,12 @@ def countCooccurrences( corpus=None , test= False
# Security test # Security test
field1,field2 = str(field1), str(field2) field1,field2 = str(field1), str(field2)
# Get corpus as Python object
corpus = session.query(Node).filter(Node.id==corpus_id).first()
# Get node # Get node
if not coocNode_id: if not coocNode_id:
coocNode_id0 = ( session.query( Node.id ) coocNode_id0 = ( session.query( Node.id )
.filter( Node.typename == "COOCCURRENCES" .filter( Node.typename == "COOCCURRENCES"
, Node.name == "GRAPH EXPLORER" , Node.name == "GRAPH EXPLORER"
......
# Gargantext lib # Gargantext lib
from gargantext.util.db import session from gargantext.util.db import session, aliased
from gargantext.util.lists import WeightedMatrix, UnweightedList, Translations from gargantext.util.lists import WeightedMatrix, UnweightedList, Translations
from gargantext.util.http import JsonHttpResponse from gargantext.util.http import JsonHttpResponse
from gargantext.models import Node, Ngram, NodeNgram, NodeNgramNgram from gargantext.models import Node, Ngram, NodeNgram, NodeNgramNgram, NodeHyperdata
#from gargantext.util.toolchain.ngram_coocs import compute_coocs #from gargantext.util.toolchain.ngram_coocs import compute_coocs
from graph.cooccurrences import countCooccurrences, filterMatrix from graph.cooccurrences import countCooccurrences, filterMatrix
from graph.distances import clusterByDistances from graph.distances import clusterByDistances
from graph.bridgeness import filterByBridgeness from graph.bridgeness import filterByBridgeness
from gargantext.util.scheduling import scheduled
from datetime import datetime
def get_graph( request=None , corpus=None def get_graph( request=None , corpus=None
, test= False
, field1='ngrams' , field2='ngrams' , field1='ngrams' , field2='ngrams'
, mapList_id = None , groupList_id = None , mapList_id = None , groupList_id = None
, cooc_id=None , type='node_link' , cooc_id=None , type='node_link'
...@@ -37,18 +40,46 @@ def get_graph( request=None , corpus=None ...@@ -37,18 +40,46 @@ def get_graph( request=None , corpus=None
''' '''
from datetime import datetime
before_cooc = datetime.now() before_cooc = datetime.now()
# TODO change test here (always true)
# to something like "if cooc.status threshold == required_threshold
# and group.creation_time < cooc.creation_time"
# if False => read and give to clusterByDistances
# if True => compute and give to clusterByDistances <==
if cooc_id == None: if cooc_id == None:
if test = True: # case of Cooccurrences have not been computed already
cooc_matrix = countCooccurrences( corpus=corpus, test=test corpus_size_query = (session.query(Node)
.filter(Node.typename=="DOCUMENT")
.filter(Node.parent_id == corpus.id)
)
if start is not None:
#date_start = datetime.datetime.strptime ("2001-2-3 10:11:12", "%Y-%m-%d %H:%M:%S")
date_start = datetime.strptime (str(start), "%Y-%m-%d")
date_start_utc = date_start.strftime("%Y-%m-%d %H:%M:%S")
Start=aliased(NodeHyperdata)
corpus_size_query = (corpus_size_query.join( Start
, Start.node_id == Node.id
)
.filter( Start.key == 'publication_date')
.filter( Start.value_utc >= date_start_utc)
)
if end is not None:
date_end = datetime.strptime (str(end), "%Y-%m-%d")
date_end_utc = date_end.strftime("%Y-%m-%d %H:%M:%S")
End=aliased(NodeHyperdata)
corpus_size_query = (corpus_size_query.join( End
, End.node_id == Node.id
)
.filter( End.key == 'publication_date')
.filter( End.value_utc <= date_end_utc )
)
if corpus_size_query.count() > 400:
scheduled(countCooccurrences)( corpus_id=corpus.id
#, field1="ngrams", field2="ngrams" #, field1="ngrams", field2="ngrams"
, start=start , end =end , start=start , end =end
, mapList_id=mapList_id , groupList_id=groupList_id , mapList_id=mapList_id , groupList_id=groupList_id
...@@ -56,20 +87,21 @@ def get_graph( request=None , corpus=None ...@@ -56,20 +87,21 @@ def get_graph( request=None , corpus=None
, save_on_db = True , save_on_db = True
#, limit=size #, limit=size
) )
return {'nodes':[], 'links':[1]} # Dic trick to inform user that graph is computed asynchronously
else: else:
cooc_matrix = countCooccurrences( corpus=corpus, test=test cooc_matrix = countCooccurrences( corpus_id=corpus.id
#, field1="ngrams", field2="ngrams" #, field1="ngrams", field2="ngrams"
, start=start , end =end , start=start , end =end
, mapList_id=mapList_id , groupList_id=groupList_id , mapList_id=mapList_id , groupList_id=groupList_id
, isMonopartite=True , threshold = threshold , isMonopartite=True , threshold = threshold
, save_on_db = True , save_on_db = False
#, limit=size #, limit=size
) )
else: else:
print("Getting data for matrix %d", int(cooc_id)) print("Getting data for matrix %d", int(cooc_id))
matrix = WeightedMatrix(int(cooc_id)) matrix = WeightedMatrix(int(cooc_id))
print(matrix) #print(matrix)
cooc_matrix = filterMatrix(matrix, mapList_id, groupList_id) cooc_matrix = filterMatrix(matrix, mapList_id, groupList_id)
......
...@@ -139,8 +139,10 @@ class Graph(APIView): ...@@ -139,8 +139,10 @@ class Graph(APIView):
, distance = distance , distance = distance
, bridgeness = bridgeness , bridgeness = bridgeness
) )
# Test data length # Test data length
if len(data['nodes']) > 0 and len(data['links']) > 0: if len(data['nodes']) > 1 and len(data['links']) > 1 :
# normal case -------------------------------- # normal case --------------------------------
if format_ == 'json': if format_ == 'json':
return JsonHttpResponse( return JsonHttpResponse(
...@@ -148,6 +150,15 @@ class Graph(APIView): ...@@ -148,6 +150,15 @@ class Graph(APIView):
status=200 status=200
) )
# -------------------------------------------- # --------------------------------------------
elif len(data['nodes']) == 0 and len(data['links']) == 1 :
# async data case
return JsonHttpResponse({
'msg': '''Async graph generation
Wait a while and discover your graph
http://%sgargantext.org/projects/%d/corpora/%d/myGraph
''' % ("dev.", corpus.parent_id, corpus.id),
}, status=400)
else: else:
# empty data case # empty data case
return JsonHttpResponse({ return JsonHttpResponse({
...@@ -155,6 +166,7 @@ class Graph(APIView): ...@@ -155,6 +166,7 @@ class Graph(APIView):
No cooccurences found in this corpus for the words of this maplist No cooccurences found in this corpus for the words of this maplist
(maybe add more terms to the maplist?)''', (maybe add more terms to the maplist?)''',
}, status=400) }, status=400)
else: else:
# parameters error case # parameters error case
return JsonHttpResponse({ return JsonHttpResponse({
...@@ -166,7 +178,8 @@ class Graph(APIView): ...@@ -166,7 +178,8 @@ class Graph(APIView):
}, status=400) }, status=400)
# for any other errors that we forgot to test # for any other errors that we forgot to test
except Exception as e: except Exception as error:
print(error)
return JsonHttpResponse({ return JsonHttpResponse({
'msg' : 'Unknown error (showing the trace):\n%s' % "\n".join(format_tb(e.__traceback__)) 'msg' : 'Unknown error (showing the trace):\n%s' % "\n".join(format_tb(error.__traceback__))
}, status=400) }, status=400)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment