Commit 94e4f930 authored by delanoe's avatar delanoe

[GRAPH] Now graph as json is saved according to its distance as key (needs to...

[GRAPH] Now graph as json is saved according to its distance as key (needs to add bridgeness parameter as key).
parent cc0cecce
......@@ -392,7 +392,7 @@ DEFAULT_N_DOCS_HAVING_NGRAM = 5
# Graph constraints to compute the graph:
# Modes: live graph generation, graph asynchronously computed or errors detected
# here are the maximum size of corpus and maplist required to compute the graph
graph_constraints = {'corpusMax' : 100
graph_constraints = {'corpusMax' : 500
,'corpusMin' : 40
,'mapList' : 50
}
......@@ -19,9 +19,13 @@ def filterMatrix(matrix, mapList_id, groupList_id):
cooc = matrix & (mapList * group_list)
return cooc
# computeGraph
def cooc2graph( cooc_id, cooc_matrix, field1="ngrams", field2="ngrams", distance=None, bridgeness=None):
print("GRAPH#%d ... Computing cooccurrences." % (cooc_id))
# Check if already computed cooc
# (cooc_id, cooc) = count(countCooccurrences)
print("GRAPH#%d ... Clustering with distance %s ." % (cooc_id,distance))
G, partition, ids, weight = clusterByDistances ( cooc_matrix
, field1="ngrams", field2="ngrams"
......@@ -36,13 +40,16 @@ def cooc2graph( cooc_id, cooc_matrix, field1="ngrams", field2="ngrams", distance
node.hyperdata[distance] = dict()
node.hyperdata[distance]["data"] = data
node.save_hyperdata()
session.commit()
print("GRAPH#%d ... Returning data as json." % cooc_id)
return data
@shared_task
def countCooccurrences( corpus_id=None , test= False
def countCooccurrences( corpus_id=None , test= False
, field1='ngrams' , field2='ngrams'
, start=None , end=None
, mapList_id=None , groupList_id=None
......@@ -70,9 +77,6 @@ def countCooccurrences( corpus_id=None , test= False
'''
# TODO : add hyperdata here
# Security test
field1,field2 = str(field1), str(field2)
# Parameters to save in hyperdata of the Node Cooc
parameters = dict()
parameters['field1'] = field1
......@@ -81,10 +85,10 @@ def countCooccurrences( corpus_id=None , test= False
# Get corpus as Python object
corpus = session.query(Node).filter(Node.id==corpus_id).first()
# Get node
# Get node of the Graph
if not coocNode_id:
coocNode_id0 = ( session.query( Node.id )
coocNode_id = ( session.query( Node.id )
.filter( Node.typename == "COOCCURRENCES"
, Node.name == "GRAPH EXPLORER"
, Node.parent_id == corpus.id
......@@ -101,8 +105,8 @@ def countCooccurrences( corpus_id=None , test= False
session.commit()
coocNode_id = coocNode.id
else :
coocNode_id = coocNode_id[0]
coocNode_id = int(coocNode_id[0])
if reset == True :
session.query( NodeNgramNgram ).filter( NodeNgramNgram.node_id == coocNode_id ).delete()
session.commit()
......@@ -233,18 +237,19 @@ def countCooccurrences( corpus_id=None , test= False
matrix = WeightedMatrix(cooc_query)
print("Node #%d Filtering the matrix with Map and Group Lists." % coocNode_id)
print("GRAPH #%s Filtering the matrix with Map and Group Lists." % coocNode_id)
cooc = filterMatrix(matrix, mapList_id, groupList_id)
parameters['MapList_id'] = str(mapList_id)
parameters['GroupList_id'] = str(groupList_id)
if save_on_db:
# Saving the cooccurrences
cooc.save(coocNode_id)
print("Node Cooccurrence Matrix saved")
print("GRAPH#%s ... Node Cooccurrence Matrix saved" % coocNode_id)
# Saving the parameters
print("Saving parameters in Node %d" % coocNode_id)
print("GRAPH#%s ... Parameters saved in Node." % coocNode_id)
coocNode = session.query(Node).filter(Node.id==coocNode_id).first()
coocNode.hyperdata[distance] = dict()
coocNode.hyperdata[distance]["parameters"] = parameters
......@@ -252,8 +257,7 @@ def countCooccurrences( corpus_id=None , test= False
session.commit()
data = cooc2graph(coocNode.id, cooc, distance=distance, bridgeness=bridgeness)
print(data)
# Log message
return data
else:
data = cooc2graph(coocNode_id, cooc, distance=distance)
......
This diff is collapsed.
......@@ -3,59 +3,13 @@
from gargantext.util.db import session
from gargantext.models.nodes import Node
from graph.graph import get_graph
from graph.utils import compress_graph, format_html
from gargantext.util.http import APIView, APIException\
, JsonHttpResponse, requires_auth
from gargantext.constants import graph_constraints
from traceback import format_tb
def compress_graph(graphdata):
"""
graph data is usually a dict with 2 slots:
"nodes": [{"id":4103, "type":"terms", "attributes":{"clust_default": 0}, "size":29, "label":"regard"},...]
"links": [{"t": 998,"s": 768,"w": 0.0425531914893617},...]
To send this data over the net, this function can reduce a lot of its size:
- keep less decimals for float value of each link's weight
- use shorter names for node properties (eg: s/clust_default/cl/)
result format:
"nodes": [{"id":4103, "at":{"cl": 0}, "s":29, "lb":"regard"},...]
"links": [{"t": 998,"s": 768,"w": 0.042},...]
"""
for link in graphdata['links']:
link['w'] = format(link['w'], '.3f') # keep only 3 decimals
for node in graphdata['nodes']:
node['lb'] = node['label']
del node['label']
node['at'] = node['attributes']
del node['attributes']
node['at']['cl'] = node['at']['clust_default']
del node['at']['clust_default']
node['s'] = node['size']
del node['size']
if node['type'] == "terms":
# its the default type for our format: so we don't need it
del node['type']
else:
node['t'] = node['type']
del node['type']
return graphdata
def format_html(link):
"""
Build an html link adapted to our json message format
"""
return "<a class='msglink' href='%s'>%s</a>" % (link, link)
# TODO check authentication
class Graph(APIView):
'''
REST part for graphs.
......@@ -94,8 +48,7 @@ class Graph(APIView):
type_ = str(request.GET.get ('type' , 'node_link' ))
distance = str(request.GET.get ('distance' , 'conditional'))
# Get default value if no map list
# Get default map List of corpus
if mapList_id == 0 :
mapList_id = ( session.query ( Node.id )
.filter( Node.typename == "MAPLIST"
......@@ -107,7 +60,6 @@ class Graph(APIView):
mapList_id = mapList_id[0]
if mapList_id == None :
# todo add as an error msg ?
raise ValueError("MAPLIST node needed for cooccurrences")
......@@ -135,23 +87,13 @@ class Graph(APIView):
try:
# Check if parameters are accepted
if (field1 in accepted_field1) and (field2 in accepted_field2):
if start is not None and end is not None :
data = get_graph( corpus=corpus, cooc_id = cooc_id
#, field1=field1 , field2=field2
, mapList_id = mapList_id , groupList_id = groupList_id
, start=start , end=end
, threshold =threshold , distance=distance
, saveOnly=saveOnly
)
else:
data = get_graph( corpus = corpus, cooc_id = cooc_id
#, field1=field1, field2=field2
, mapList_id = mapList_id , groupList_id = groupList_id
, threshold = threshold
, distance = distance
, bridgeness = bridgeness
, saveOnly=saveOnly
)
data = get_graph( corpus=corpus, cooc_id = cooc_id
, field1=field1 , field2=field2
, mapList_id = mapList_id , groupList_id = groupList_id
, start=start , end=end
, threshold =threshold , distance=distance
, saveOnly=saveOnly
)
# data :: Either (Dic Nodes Links) (Dic State Length)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment