Commit d071736a authored by delanoe's avatar delanoe

[FEAT] Async graph working (needs to know the parameters in myGraph + need refact)

parent 3ce5f459
......@@ -9,6 +9,7 @@ from sqlalchemy import desc, asc, or_, and_
#import inspect
import datetime
from celery import shared_task
def filterMatrix(matrix, mapList_id, groupList_id):
mapList = UnweightedList( mapList_id )
......@@ -17,7 +18,8 @@ def filterMatrix(matrix, mapList_id, groupList_id):
return cooc
def countCooccurrences( corpus=None , test= False
@shared_task
def countCooccurrences( corpus_id=None , test= False
, field1='ngrams' , field2='ngrams'
, start=None , end=None
, mapList_id=None , groupList_id=None
......@@ -47,8 +49,12 @@ def countCooccurrences( corpus=None , test= False
# Security test
field1,field2 = str(field1), str(field2)
# Get corpus as Python object
corpus = session.query(Node).filter(Node.id==corpus_id).first()
# Get node
if not coocNode_id:
coocNode_id0 = ( session.query( Node.id )
.filter( Node.typename == "COOCCURRENCES"
, Node.name == "GRAPH EXPLORER"
......
# Gargantext lib
from gargantext.util.db import session
from gargantext.util.db import session, aliased
from gargantext.util.lists import WeightedMatrix, UnweightedList, Translations
from gargantext.util.http import JsonHttpResponse
from gargantext.models import Node, Ngram, NodeNgram, NodeNgramNgram
from gargantext.models import Node, Ngram, NodeNgram, NodeNgramNgram, NodeHyperdata
#from gargantext.util.toolchain.ngram_coocs import compute_coocs
from graph.cooccurrences import countCooccurrences, filterMatrix
from graph.distances import clusterByDistances
from graph.bridgeness import filterByBridgeness
from gargantext.util.scheduling import scheduled
from datetime import datetime
def get_graph( request=None , corpus=None
, test= False
, field1='ngrams' , field2='ngrams'
, mapList_id = None , groupList_id = None
, cooc_id=None , type='node_link'
......@@ -37,18 +40,46 @@ def get_graph( request=None , corpus=None
'''
from datetime import datetime
before_cooc = datetime.now()
# TODO change test here (always true)
# to something like "if cooc.status threshold == required_threshold
# and group.creation_time < cooc.creation_time"
# if False => read and give to clusterByDistances
# if True => compute and give to clusterByDistances <==
if cooc_id == None:
if test = True:
cooc_matrix = countCooccurrences( corpus=corpus, test=test
# case of Cooccurrences have not been computed already
corpus_size_query = (session.query(Node)
.filter(Node.typename=="DOCUMENT")
.filter(Node.parent_id == corpus.id)
)
if start is not None:
#date_start = datetime.datetime.strptime ("2001-2-3 10:11:12", "%Y-%m-%d %H:%M:%S")
date_start = datetime.strptime (str(start), "%Y-%m-%d")
date_start_utc = date_start.strftime("%Y-%m-%d %H:%M:%S")
Start=aliased(NodeHyperdata)
corpus_size_query = (corpus_size_query.join( Start
, Start.node_id == Node.id
)
.filter( Start.key == 'publication_date')
.filter( Start.value_utc >= date_start_utc)
)
if end is not None:
date_end = datetime.strptime (str(end), "%Y-%m-%d")
date_end_utc = date_end.strftime("%Y-%m-%d %H:%M:%S")
End=aliased(NodeHyperdata)
corpus_size_query = (corpus_size_query.join( End
, End.node_id == Node.id
)
.filter( End.key == 'publication_date')
.filter( End.value_utc <= date_end_utc )
)
if corpus_size_query.count() > 400:
scheduled(countCooccurrences)( corpus_id=corpus.id
#, field1="ngrams", field2="ngrams"
, start=start , end =end
, mapList_id=mapList_id , groupList_id=groupList_id
......@@ -56,20 +87,21 @@ def get_graph( request=None , corpus=None
, save_on_db = True
#, limit=size
)
return {'nodes':[], 'links':[1]} # Dic trick to inform user that graph is computed asynchronously
else:
cooc_matrix = countCooccurrences( corpus=corpus, test=test
cooc_matrix = countCooccurrences( corpus_id=corpus.id
#, field1="ngrams", field2="ngrams"
, start=start , end =end
, mapList_id=mapList_id , groupList_id=groupList_id
, isMonopartite=True , threshold = threshold
, save_on_db = True
, save_on_db = False
#, limit=size
)
else:
print("Getting data for matrix %d", int(cooc_id))
matrix = WeightedMatrix(int(cooc_id))
print(matrix)
#print(matrix)
cooc_matrix = filterMatrix(matrix, mapList_id, groupList_id)
......
......@@ -139,8 +139,10 @@ class Graph(APIView):
, distance = distance
, bridgeness = bridgeness
)
# Test data length
if len(data['nodes']) > 0 and len(data['links']) > 0:
if len(data['nodes']) > 1 and len(data['links']) > 1 :
# normal case --------------------------------
if format_ == 'json':
return JsonHttpResponse(
......@@ -148,6 +150,15 @@ class Graph(APIView):
status=200
)
# --------------------------------------------
elif len(data['nodes']) == 0 and len(data['links']) == 1 :
# async data case
return JsonHttpResponse({
'msg': '''Async graph generation
Wait a while and discover your graph
http://%sgargantext.org/projects/%d/corpora/%d/myGraph
''' % ("dev.", corpus.parent_id, corpus.id),
}, status=400)
else:
# empty data case
return JsonHttpResponse({
......@@ -155,6 +166,7 @@ class Graph(APIView):
No cooccurences found in this corpus for the words of this maplist
(maybe add more terms to the maplist?)''',
}, status=400)
else:
# parameters error case
return JsonHttpResponse({
......@@ -166,7 +178,8 @@ class Graph(APIView):
}, status=400)
# for any other errors that we forgot to test
except Exception as e:
except Exception as error:
print(error)
return JsonHttpResponse({
'msg' : 'Unknown error (showing the trace):\n%s' % "\n".join(format_tb(e.__traceback__))
'msg' : 'Unknown error (showing the trace):\n%s' % "\n".join(format_tb(error.__traceback__))
}, status=400)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment