Commit bb1e553b authored by delanoe's avatar delanoe

[GRAPH] Now graph as json is saved according to its distance as key (needs to...

[GRAPH] Now graph as json is saved according to its distance as key (needs to add bridgeness parameter as key).
parent 24f4d6a3
...@@ -392,7 +392,7 @@ DEFAULT_N_DOCS_HAVING_NGRAM = 5 ...@@ -392,7 +392,7 @@ DEFAULT_N_DOCS_HAVING_NGRAM = 5
# Graph constraints to compute the graph: # Graph constraints to compute the graph:
# Modes: live graph generation, graph asynchronously computed or errors detected # Modes: live graph generation, graph asynchronously computed or errors detected
# here are the maximum size of corpus and maplist required to compute the graph # here are the maximum size of corpus and maplist required to compute the graph
graph_constraints = {'corpusMax' : 100 graph_constraints = {'corpusMax' : 500
,'corpusMin' : 40 ,'corpusMin' : 40
,'mapList' : 50 ,'mapList' : 50
} }
...@@ -19,9 +19,13 @@ def filterMatrix(matrix, mapList_id, groupList_id): ...@@ -19,9 +19,13 @@ def filterMatrix(matrix, mapList_id, groupList_id):
cooc = matrix & (mapList * group_list) cooc = matrix & (mapList * group_list)
return cooc return cooc
# computeGraph
def cooc2graph( cooc_id, cooc_matrix, field1="ngrams", field2="ngrams", distance=None, bridgeness=None): def cooc2graph( cooc_id, cooc_matrix, field1="ngrams", field2="ngrams", distance=None, bridgeness=None):
print("GRAPH#%d ... Computing cooccurrences." % (cooc_id))
# Check if already computed cooc
# (cooc_id, cooc) = count(countCooccurrences)
print("GRAPH#%d ... Clustering with distance %s ." % (cooc_id,distance)) print("GRAPH#%d ... Clustering with distance %s ." % (cooc_id,distance))
G, partition, ids, weight = clusterByDistances ( cooc_matrix G, partition, ids, weight = clusterByDistances ( cooc_matrix
, field1="ngrams", field2="ngrams" , field1="ngrams", field2="ngrams"
...@@ -36,13 +40,16 @@ def cooc2graph( cooc_id, cooc_matrix, field1="ngrams", field2="ngrams", distance ...@@ -36,13 +40,16 @@ def cooc2graph( cooc_id, cooc_matrix, field1="ngrams", field2="ngrams", distance
node.hyperdata[distance] = dict() node.hyperdata[distance] = dict()
node.hyperdata[distance]["data"] = data node.hyperdata[distance]["data"] = data
node.save_hyperdata() node.save_hyperdata()
session.commit() session.commit()
print("GRAPH#%d ... Returning data as json." % cooc_id)
return data return data
@shared_task @shared_task
def countCooccurrences( corpus_id=None , test= False def countCooccurrences( corpus_id=None , test= False
, field1='ngrams' , field2='ngrams' , field1='ngrams' , field2='ngrams'
, start=None , end=None , start=None , end=None
, mapList_id=None , groupList_id=None , mapList_id=None , groupList_id=None
...@@ -70,9 +77,6 @@ def countCooccurrences( corpus_id=None , test= False ...@@ -70,9 +77,6 @@ def countCooccurrences( corpus_id=None , test= False
''' '''
# TODO : add hyperdata here # TODO : add hyperdata here
# Security test
field1,field2 = str(field1), str(field2)
# Parameters to save in hyperdata of the Node Cooc # Parameters to save in hyperdata of the Node Cooc
parameters = dict() parameters = dict()
parameters['field1'] = field1 parameters['field1'] = field1
...@@ -81,10 +85,10 @@ def countCooccurrences( corpus_id=None , test= False ...@@ -81,10 +85,10 @@ def countCooccurrences( corpus_id=None , test= False
# Get corpus as Python object # Get corpus as Python object
corpus = session.query(Node).filter(Node.id==corpus_id).first() corpus = session.query(Node).filter(Node.id==corpus_id).first()
# Get node # Get node of the Graph
if not coocNode_id: if not coocNode_id:
coocNode_id0 = ( session.query( Node.id ) coocNode_id = ( session.query( Node.id )
.filter( Node.typename == "COOCCURRENCES" .filter( Node.typename == "COOCCURRENCES"
, Node.name == "GRAPH EXPLORER" , Node.name == "GRAPH EXPLORER"
, Node.parent_id == corpus.id , Node.parent_id == corpus.id
...@@ -101,8 +105,8 @@ def countCooccurrences( corpus_id=None , test= False ...@@ -101,8 +105,8 @@ def countCooccurrences( corpus_id=None , test= False
session.commit() session.commit()
coocNode_id = coocNode.id coocNode_id = coocNode.id
else : else :
coocNode_id = coocNode_id[0] coocNode_id = int(coocNode_id[0])
if reset == True : if reset == True :
session.query( NodeNgramNgram ).filter( NodeNgramNgram.node_id == coocNode_id ).delete() session.query( NodeNgramNgram ).filter( NodeNgramNgram.node_id == coocNode_id ).delete()
session.commit() session.commit()
...@@ -233,18 +237,19 @@ def countCooccurrences( corpus_id=None , test= False ...@@ -233,18 +237,19 @@ def countCooccurrences( corpus_id=None , test= False
matrix = WeightedMatrix(cooc_query) matrix = WeightedMatrix(cooc_query)
print("Node #%d Filtering the matrix with Map and Group Lists." % coocNode_id) print("GRAPH #%s Filtering the matrix with Map and Group Lists." % coocNode_id)
cooc = filterMatrix(matrix, mapList_id, groupList_id) cooc = filterMatrix(matrix, mapList_id, groupList_id)
parameters['MapList_id'] = str(mapList_id) parameters['MapList_id'] = str(mapList_id)
parameters['GroupList_id'] = str(groupList_id) parameters['GroupList_id'] = str(groupList_id)
if save_on_db: if save_on_db:
# Saving the cooccurrences
cooc.save(coocNode_id) cooc.save(coocNode_id)
print("Node Cooccurrence Matrix saved") print("GRAPH#%s ... Node Cooccurrence Matrix saved" % coocNode_id)
# Saving the parameters # Saving the parameters
print("Saving parameters in Node %d" % coocNode_id) print("GRAPH#%s ... Parameters saved in Node." % coocNode_id)
coocNode = session.query(Node).filter(Node.id==coocNode_id).first() coocNode = session.query(Node).filter(Node.id==coocNode_id).first()
coocNode.hyperdata[distance] = dict() coocNode.hyperdata[distance] = dict()
coocNode.hyperdata[distance]["parameters"] = parameters coocNode.hyperdata[distance]["parameters"] = parameters
...@@ -252,8 +257,7 @@ def countCooccurrences( corpus_id=None , test= False ...@@ -252,8 +257,7 @@ def countCooccurrences( corpus_id=None , test= False
session.commit() session.commit()
data = cooc2graph(coocNode.id, cooc, distance=distance, bridgeness=bridgeness) data = cooc2graph(coocNode.id, cooc, distance=distance, bridgeness=bridgeness)
print(data) return data
# Log message
else: else:
data = cooc2graph(coocNode_id, cooc, distance=distance) data = cooc2graph(coocNode_id, cooc, distance=distance)
......
This diff is collapsed.
...@@ -3,59 +3,13 @@ ...@@ -3,59 +3,13 @@
from gargantext.util.db import session from gargantext.util.db import session
from gargantext.models.nodes import Node from gargantext.models.nodes import Node
from graph.graph import get_graph from graph.graph import get_graph
from graph.utils import compress_graph, format_html
from gargantext.util.http import APIView, APIException\ from gargantext.util.http import APIView, APIException\
, JsonHttpResponse, requires_auth , JsonHttpResponse, requires_auth
from gargantext.constants import graph_constraints from gargantext.constants import graph_constraints
from traceback import format_tb from traceback import format_tb
def compress_graph(graphdata):
"""
graph data is usually a dict with 2 slots:
"nodes": [{"id":4103, "type":"terms", "attributes":{"clust_default": 0}, "size":29, "label":"regard"},...]
"links": [{"t": 998,"s": 768,"w": 0.0425531914893617},...]
To send this data over the net, this function can reduce a lot of its size:
- keep less decimals for float value of each link's weight
- use shorter names for node properties (eg: s/clust_default/cl/)
result format:
"nodes": [{"id":4103, "at":{"cl": 0}, "s":29, "lb":"regard"},...]
"links": [{"t": 998,"s": 768,"w": 0.042},...]
"""
for link in graphdata['links']:
link['w'] = format(link['w'], '.3f') # keep only 3 decimals
for node in graphdata['nodes']:
node['lb'] = node['label']
del node['label']
node['at'] = node['attributes']
del node['attributes']
node['at']['cl'] = node['at']['clust_default']
del node['at']['clust_default']
node['s'] = node['size']
del node['size']
if node['type'] == "terms":
# its the default type for our format: so we don't need it
del node['type']
else:
node['t'] = node['type']
del node['type']
return graphdata
def format_html(link):
"""
Build an html link adapted to our json message format
"""
return "<a class='msglink' href='%s'>%s</a>" % (link, link)
# TODO check authentication
class Graph(APIView): class Graph(APIView):
''' '''
REST part for graphs. REST part for graphs.
...@@ -94,8 +48,7 @@ class Graph(APIView): ...@@ -94,8 +48,7 @@ class Graph(APIView):
type_ = str(request.GET.get ('type' , 'node_link' )) type_ = str(request.GET.get ('type' , 'node_link' ))
distance = str(request.GET.get ('distance' , 'conditional')) distance = str(request.GET.get ('distance' , 'conditional'))
# Get default value if no map list # Get default map List of corpus
if mapList_id == 0 : if mapList_id == 0 :
mapList_id = ( session.query ( Node.id ) mapList_id = ( session.query ( Node.id )
.filter( Node.typename == "MAPLIST" .filter( Node.typename == "MAPLIST"
...@@ -107,7 +60,6 @@ class Graph(APIView): ...@@ -107,7 +60,6 @@ class Graph(APIView):
mapList_id = mapList_id[0] mapList_id = mapList_id[0]
if mapList_id == None : if mapList_id == None :
# todo add as an error msg ?
raise ValueError("MAPLIST node needed for cooccurrences") raise ValueError("MAPLIST node needed for cooccurrences")
...@@ -135,23 +87,13 @@ class Graph(APIView): ...@@ -135,23 +87,13 @@ class Graph(APIView):
try: try:
# Check if parameters are accepted # Check if parameters are accepted
if (field1 in accepted_field1) and (field2 in accepted_field2): if (field1 in accepted_field1) and (field2 in accepted_field2):
if start is not None and end is not None : data = get_graph( corpus=corpus, cooc_id = cooc_id
data = get_graph( corpus=corpus, cooc_id = cooc_id , field1=field1 , field2=field2
#, field1=field1 , field2=field2 , mapList_id = mapList_id , groupList_id = groupList_id
, mapList_id = mapList_id , groupList_id = groupList_id , start=start , end=end
, start=start , end=end , threshold =threshold , distance=distance
, threshold =threshold , distance=distance , saveOnly=saveOnly
, saveOnly=saveOnly )
)
else:
data = get_graph( corpus = corpus, cooc_id = cooc_id
#, field1=field1, field2=field2
, mapList_id = mapList_id , groupList_id = groupList_id
, threshold = threshold
, distance = distance
, bridgeness = bridgeness
, saveOnly=saveOnly
)
# data :: Either (Dic Nodes Links) (Dic State Length) # data :: Either (Dic Nodes Links) (Dic State Length)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment