Commit 5e0e536f authored by Romain Loth's avatar Romain Loth

Merge testing (commit 'd9d93ae0') into romain-testing

(importing the graph advances in my branch)
parents 1e1fff88 5900edd5
......@@ -395,7 +395,7 @@ DEFAULT_N_DOCS_HAVING_NGRAM = 5
# Graph constraints to compute the graph:
# Modes: live graph generation, graph asynchronously computed or errors detected
# here are the maximum size of corpus and maplist required to compute the graph
graph_constraints = {'corpusMax' : 599
graph_constraints = {'corpusMax' : 100
,'corpusMin' : 40
,'mapList' : 50
......@@ -6,17 +6,14 @@ from rest_framework.views import APIView
from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from rest_framework.permissions import IsAuthenticated
from gargantext.models import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNode
from gargantext.constants import RESOURCETYPES, NODETYPES, get_resource
from gargantext.models import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNode
from gargantext.util.db import session, delete, func, bulk_insert
from gargantext.util.db_cache import cache, or_
from gargantext.util.validation import validate
from gargantext.models import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNode
from gargantext.constants import RESOURCETYPES, NODETYPES, get_resource
from gargantext.util.http import ValidationException, APIView, JsonHttpResponse, get_parameters
from gargantext.util.files import upload
from gargantext.util.db import session, delete, func, bulk_insert
from gargantext.util.http import ValidationException, APIView, JsonHttpResponse, get_parameters
from gargantext.util.scheduling import scheduled
from gargantext.util.validation import validate
Module Graph Explorer: from text to graph
## How to contribute ?
Some solutions:
1) please report to
2) fix with git repo and pull request
## Graph Explorer main
0) All of the Graph Explorer
1) Main view of the graph explorer:
2) Data are retrieved as REST:
3) Graph is generated ( through different steps
-> Graph Explorer
-> My graph View
-> REST API to get Data
2) Graph is generated ( through different steps
a) check the constraints (graph_constraints) in gargantext/
b) Cooccurences are computed (in live or asynchronously):
c) Thresold and distances :
d) clustering:
c) links between communities:
b) Data are retrieved as REST check REST parameters
get_graph: check Graph parameters
compute_graph: compute graph
1) Cooccurences are computed (in live or asynchronously):
2) Thresold and distances :
3) clustering:
4) links between communities:
d) compress graph before returning it:
4) Additional features:
a) intersection of graphs:
## How to contribute ?
Some solutions:
1) please report to
2) fix with git repo and pull request
1) save parameters in hyperdata
2) graph explorer:
* save current graph
2) myGraphs view:
myGraphs view:
* progress bar
* Show already computed graphs vs to be computed with parameters
* show parameters
* copy / paste and change some parameters to generate new graph
......@@ -8,22 +8,27 @@ from networkx.readwrite import json_graph
def filterByBridgeness(G,partition,ids,weight,bridgeness,type,field1,field2):
What is bridgeness ?
Bridgeness = measure to control links (bridges) between communities.
# Data are stored in a dict(), (== hashmap by default for Python)
# Data are stored in a dict(), (== hashmap by default with Python)
data = dict()
if type == "node_link":
nodesB_dict = {}
for node_id in G.nodes():
nodesB_dict [ ids[node_id][1] ] = True
# TODO the query below is not optimized (do it do_distance).
the_label = session.query(Ngram.terms).filter(
the_label = ", ".join(the_label)
G.node[node_id]['label'] = the_label
G.node[node_id]['size'] = weight[node_id]
G.node[node_id]['type'] = ids[node_id][0].replace("ngrams","terms")
G.node[node_id]['attributes'] = { "clust_default": partition[node_id]} # new format
# G.add_edge(node, "cluster " + str(partition[node]), weight=3)
......@@ -65,12 +70,20 @@ def filterByBridgeness(G,partition,ids,weight,bridgeness,type,field1,field2):
if bridgeness > 0:
for c1 in com_link.keys():
for c2 in com_link[c1].keys():
index = round(bridgeness*len(com_link[c1][c2]) / (len(com_ids[c1]) + len(com_ids[c2])))
index = round(
bridgeness * len( com_link[c1][c2] )
/ #----------------------------------#
( len(com_ids[c1]) + len(com_ids[c2] ))
#print((c1,len(com_ids[c1])), (c2,len(com_ids[c2])), index)
if index > 0:
for link in sorted(com_link[c1][c2], key=lambda x: x[2], reverse=True)[:index]:
for link in sorted( com_link[c1][c2]
, key=lambda x: x[2]
, reverse=True)[:index]:
#print(c1, c2, link[2])
info = {"s": link[0], "t": link[1], "w": link[2]}
from gargantext.models import Node, Ngram, NodeNgram, NodeNgramNgram, \
NodeHyperdata, HyperdataKey
from gargantext.util.db import session, aliased, bulk_insert, func
from gargantext.util.db import session, aliased, func
from gargantext.util.lists import WeightedMatrix, UnweightedList, Translations
from sqlalchemy import desc, asc, or_, and_
from datetime import datetime
#import inspect
import datetime
from celery import shared_task
def filterMatrix(matrix, mapList_id, groupList_id):
mapList = UnweightedList( mapList_id )
mapList = UnweightedList( mapList_id )
group_list = Translations ( groupList_id )
cooc = matrix & (mapList * group_list)
return cooc
def countCooccurrences( corpus_id=None , test= False
def countCooccurrences( corpus_id=None , cooc_id=None
, field1='ngrams' , field2='ngrams'
, start=None , end=None
, mapList_id=None , groupList_id=None
, distance=None , bridgeness=None
, n_min=1, n_max=None , limit=1000
, coocNode_id=None , reset=True
, isMonopartite=True , threshold = 3
, save_on_db= False, # just return the WeightedMatrix,
# (don't write to DB)
, save_on_db= True , reset=True
Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id
For the moment list of paramters are not supported because, lists need to
For the moment list of parameters are not supported because, lists need to
be merged before.
corpus :: Corpus
mapList_id :: Int
groupList_id :: Int
For the moment, start and end are simple, only year is implemented yet
start :: TimeStamp -- example: '2010-05-30 02:00:00+02'
end :: TimeStamp
limit :: Int
# TODO : add hyperdata here
# Security test
field1,field2 = str(field1), str(field2)
# Parameters to save in hyperdata of the Node Cooc
# FIXME remove the lines below after factorization of parameters
parameters = dict()
parameters['field1'] = field1
parameters['field2'] = field2
......@@ -57,17 +47,17 @@ def countCooccurrences( corpus_id=None , test= False
# Get corpus as Python object
corpus = session.query(Node).filter(
# Get node
if not coocNode_id:
coocNode_id0 = ( session.query( )
# Get node of the Graph
if not cooc_id:
cooc_id = ( session.query( )
.filter( Node.typename == "COOCCURRENCES"
, Node.parent_id ==
if not coocNode_id:
if not cooc_id:
coocNode = corpus.add_child(
typename = "COOCCURRENCES",
name = "GRAPH (in corpus %s)" %
......@@ -75,12 +65,16 @@ def countCooccurrences( corpus_id=None , test= False
coocNode_id =
cooc_id =
else :
coocNode_id = coocNode_id[0]
cooc_id = int(cooc_id[0])
print("GRAPH #%s ... Loading cooccurrences computed already." % cooc_id)
cooc = session.query( NodeNgramNgram.ngram1_id, NodeNgramNgram.ngram2_id, NodeNgramNgram.weight ).filter( NodeNgramNgram.node_id == cooc_id ).all()
if reset == True :
session.query( NodeNgramNgram ).filter( NodeNgramNgram.node_id == coocNode_id ).delete()
session.query( NodeNgramNgram ).filter( NodeNgramNgram.node_id == cooc_id ).delete()
......@@ -161,8 +155,8 @@ def countCooccurrences( corpus_id=None , test= False
# Cooc between the dates start and end
if start is not None:
#date_start = datetime.datetime.strptime ("2001-2-3 10:11:12", "%Y-%m-%d %H:%M:%S")
# TODO : more complexe date format here.
date_start = datetime.datetime.strptime (str(start), "%Y-%m-%d")
# TODO : more precise date format here (day is smaller grain actually).
date_start = datetime.strptime (str(start), "%Y-%m-%d")
date_start_utc = date_start.strftime("%Y-%m-%d %H:%M:%S")
......@@ -177,8 +171,8 @@ def countCooccurrences( corpus_id=None , test= False
if end is not None:
# TODO : more complexe date format here.
date_end = datetime.datetime.strptime (str(end), "%Y-%m-%d")
# TODO : more precise date format here (day is smaller grain actually).
date_end = datetime.strptime (str(end), "%Y-%m-%d")
date_end_utc = date_end.strftime("%Y-%m-%d %H:%M:%S")
......@@ -208,22 +202,30 @@ def countCooccurrences( corpus_id=None , test= False
#cooc_query = cooc_query.order_by(desc('cooc_score'))
matrix = WeightedMatrix(cooc_query)
print("GRAPH #%s Filtering the matrix with Map and Group Lists." % cooc_id)
cooc = filterMatrix(matrix, mapList_id, groupList_id)
parameters['MapList_id'] = str(mapList_id)
parameters['GroupList_id'] = str(mapList_id)
parameters['MapList_id'] = str(mapList_id)
parameters['GroupList_id'] = str(groupList_id)
# TODO factorize savings on db
if save_on_db:
# Saving cooc Matrix
# Saving the cooccurrences
print("GRAPH #%s ... Node Cooccurrence Matrix saved" % cooc_id)
# Saving the parameters
coocNode = session.query(Node).filter(
coocNode.hyperdata = parameters
print("GRAPH #%s ... Parameters saved in Node." % cooc_id)
coocNode = session.query(Node).filter(
coocNode.hyperdata[distance] = dict()
coocNode.hyperdata[distance]["parameters"] = parameters
# Log message
print("Cooccurrence Matrix saved")
#data = cooc2graph(, cooc, distance=distance, bridgeness=bridgeness)
#return data
return cooc
return(, cooc)
......@@ -16,16 +16,16 @@ import networkx as nx
def clusterByDistances( cooc_matrix
, field1=None, field2=None
, distance='conditional'):
, distance=None):
do_distance :: Coocs[nga, ngb => ccweight] -> (Graph, Partition, {ids}, {weight})
clusterByDistance :: Coocs[nga, ngb => ccweight] -> (Graph, Partition, {ids}, {weight})
# implicit global session
authorized = ['conditional', 'distributional', 'cosine']
if distance not in authorized:
distance = 'conditional'
raise ValueError("Distance must be in %s" % str(authorized))
matrix = defaultdict(lambda : defaultdict(float))
ids = defaultdict(lambda : defaultdict(int))
......@@ -4,28 +4,88 @@ from gargantext.util.lists import WeightedMatrix, UnweightedList, Transla
from gargantext.util.http import JsonHttpResponse
from gargantext.models import Node, Ngram, NodeNgram, NodeNgramNgram, NodeHyperdata
#from gargantext.util.toolchain.ngram_coocs import compute_coocs
from graph.cooccurrences import countCooccurrences, filterMatrix
from graph.cooccurrences import countCooccurrences
from graph.distances import clusterByDistances
from graph.bridgeness import filterByBridgeness
from gargantext.util.scheduling import scheduled
from gargantext.constants import graph_constraints
from datetime import datetime
from celery import shared_task
from datetime import datetime
def compute_graph( corpus_id=None , cooc_id=None
, field1='ngrams' , field2='ngrams'
, start=None , end=None
, mapList_id=None , groupList_id=None
, distance=None , bridgeness=None
, n_min=1, n_max=None , limit=1000
, isMonopartite=True , threshold = 3
, save_on_db= True , reset=True
) :
All steps to compute a graph:
1) count Cooccurrences (function countCooccurrences)
main parameters: threshold, isMonopartite
2) filter and cluster By Distances (function clusterByDistances)
main parameter: distance
TODO option clustering='louvain'
or 'percolation' or 'random walk' or ...
3) filter By Bridgeness (function filterByBridgeness)
main parameter: bridgeness
4) format the graph (formatGraph)
main parameter: format_
print("GRAPH # ... Computing cooccurrences.")
(cooc_id, cooc_matrix) = countCooccurrences( corpus_id=corpus_id, cooc_id=cooc_id
, field1=field1, field2=field2
, start=start , end =end
, mapList_id=mapList_id , groupList_id=groupList_id
, isMonopartite=True , threshold = threshold
, distance=distance , bridgeness=bridgeness
, save_on_db = True
print("GRAPH #%d ... Cooccurrences computed." % (cooc_id))
print("GRAPH #%d ... Clustering with %s distance." % (cooc_id,distance))
G, partition, ids, weight = clusterByDistances ( cooc_matrix
, field1="ngrams", field2="ngrams"
, distance=distance
print("GRAPH #%d ... Filtering by bridgeness %d." % (cooc_id, bridgeness))
data = filterByBridgeness(G,partition,ids,weight,bridgeness,"node_link",field1,field2)
print("GRAPH #%d ... Saving Graph in hyperdata as json." % cooc_id)
node = session.query(Node).filter( == cooc_id).first()
if node.hyperdata.get(distance, None) is None:
node.hyperdata[distance] = dict()
node.hyperdata[distance][bridgeness] = data
print("GRAPH #%d ... Returning data as json." % cooc_id)
return data
def get_graph( request=None , corpus=None
, field1='ngrams' , field2='ngrams'
, mapList_id = None , groupList_id = None
, cooc_id=None , type='node_link'
, start=None , end=None
, threshold=1
, distance='conditional'
, isMonopartite=True # By default, we compute terms/terms graph
, bridgeness=5
, saveOnly=None
#, size=1000
, distance='conditional', bridgeness=5
, threshold=1 , isMonopartite=True
, saveOnly=True
) :
Get_graph : main steps:
0) Check the parameters
......@@ -33,157 +93,141 @@ def get_graph( request=None , corpus=None
get_graph :: GraphParameters -> Either (Dic Nodes Links) (Dic State Length)
where type Length = Int
get_graph first checks the parameters and return either graph data or a dic with
get_graph first checks the parameters and return either graph data or a dict with
state "type" with an integer to indicate the size of the parameter
(maybe we could add a String in that step to factor and give here the error message)
1) count Cooccurrences (function countCooccurrences)
main parameters: threshold
2) filter and cluster By Distances (function clusterByDistances)
main parameter: distance
3) filter By Bridgeness (function filterByBridgeness)
main parameter: bridgeness
4) format the graph (formatGraph)
main parameter: format_
1) compute_graph (see function above)
2) return graph
before_cooc =
# case of Cooccurrences have not been computed already
if cooc_id == None:
# case of mapList not big enough
# ==============================
# if we do not have any mapList_id already
if mapList_id is None:
mapList_id = session.query( == "MAPLIST").first()[0]
mapList_size_query = session.query(NodeNgram).filter(NodeNgram.node_id == mapList_id)
mapList_size = mapList_size_query.count()
if mapList_size < graph_constraints['mapList']:
# Do not compute the graph if mapList is not big enough
return {'state': "mapListError", "length" : mapList_size}
# Instantiate query for case of corpus not big enough
# ===================================================
corpus_size_query = (session.query(Node)
.filter(Node.parent_id ==
# filter by date if any start date
# --------------------------------
if start is not None:
#date_start = datetime.datetime.strptime ("2001-2-3 10:11:12", "%Y-%m-%d %H:%M:%S")
date_start = datetime.strptime (str(start), "%Y-%m-%d")
date_start_utc = date_start.strftime("%Y-%m-%d %H:%M:%S")
corpus_size_query = (corpus_size_query.join( Start
, Start.node_id ==
.filter( Start.key == 'publication_date')
.filter( Start.value_utc >= date_start_utc)
# filter by date if any end date
# --------------------------------
if end is not None:
date_end = datetime.strptime (str(end), "%Y-%m-%d")
date_end_utc = date_end.strftime("%Y-%m-%d %H:%M:%S")
corpus_size_query = (corpus_size_query.join( End
, End.node_id ==
.filter( End.key == 'publication_date')
.filter( End.value_utc <= date_end_utc )
# Case of graph has been computed already
if cooc_id is not None:
print("GRAPH#%d ... Loading data already computed." % int(cooc_id))
node = session.query(Node).filter( == cooc_id).first()
# Structure of the Node.hyperdata[distance][bridbeness]
# All parameters (but distance and bridgeness)
# are in Node.hyperdata["parameters"]
# Check distance of the graph
if node.hyperdata.get(distance, None) is not None:
graph = node.hyperdata[distance]
# Check bridgeness of the graph
if graph.get(str(bridgeness), None) is not None:
return graph[str(bridgeness)]
# Case of graph has not been computed already
# First, check the parameters
# Case of mapList not big enough
# ==============================
# if we do not have any mapList_id already
if mapList_id is None:
mapList_id = session.query( == "MAPLIST").first()[0]
mapList_size = session.query(NodeNgram).filter(NodeNgram.node_id == mapList_id).count()
if mapList_size < graph_constraints['mapList']:
# Do not compute the graph if mapList is not big enough
return {'state': "mapListError", "length" : mapList_size}
# Instantiate query for case of corpus not big enough
# ===================================================
corpus_size_query = (session.query(Node)
.filter(Node.parent_id ==
# Filter corpus by date if any start date
# ---------------------------------------
if start is not None:
#date_start = datetime.datetime.strptime ("2001-2-3 10:11:12", "%Y-%m-%d %H:%M:%S")
date_start = datetime.strptime (str(start), "%Y-%m-%d")
date_start_utc = date_start.strftime("%Y-%m-%d %H:%M:%S")
corpus_size_query = (corpus_size_query.join( Start
, Start.node_id ==
.filter( Start.key == 'publication_date')
.filter( Start.value_utc >= date_start_utc)
# Filter corpus by date if any end date
# -------------------------------------
if end is not None:
date_end = datetime.strptime (str(end), "%Y-%m-%d")
date_end_utc = date_end.strftime("%Y-%m-%d %H:%M:%S")
corpus_size_query = (corpus_size_query.join( End
, End.node_id ==
.filter( End.key == 'publication_date')
.filter( End.value_utc <= date_end_utc )
# Finally test if the size of the corpora is big enough
# --------------------------------
corpus_size = corpus_size_query.count()
if saveOnly is not None and saveOnly == "True":
scheduled(compute_graph)(, cooc_id=cooc_id
#, field1="ngrams", field2="ngrams"
, start=start , end =end
, mapList_id=mapList_id , groupList_id=groupList_id
, isMonopartite=True , threshold = threshold
, distance=distance , bridgeness=bridgeness
, save_on_db = True
#, limit=size
return {"state" : "saveOnly"}
elif corpus_size > graph_constraints['corpusMax']:
# Then compute cooc asynchronously with celery
scheduled(compute_graph)(, cooc_id=cooc_id
#, field1="ngrams", field2="ngrams"
, start=start , end =end
, mapList_id=mapList_id , groupList_id=groupList_id
, isMonopartite=True , threshold = threshold
, distance=distance , bridgeness=bridgeness
, save_on_db = True
#, limit=size
# Dict to inform user that corpus maximum is reached
# then graph is computed asynchronously
return {"state" : "corpusMax", "length" : corpus_size}
elif corpus_size <= graph_constraints['corpusMin']:
# Do not compute the graph if corpus is not big enough
return {"state" : "corpusMin", "length" : corpus_size}
# Finally test if the size of the corpora is big enough
# --------------------------------
corpus_size = corpus_size_query.count()
if saveOnly is not None and saveOnly == "True":
#, field1="ngrams", field2="ngrams"
, start=start , end =end
, mapList_id=mapList_id , groupList_id=groupList_id
, isMonopartite=True , threshold = threshold
, save_on_db = True
#, limit=size
return {"state" : "saveOnly"}
if corpus_size > graph_constraints['corpusMax']:
# Then compute cooc asynchronously with celery
#, field1="ngrams", field2="ngrams"
, start=start , end =end
, mapList_id=mapList_id , groupList_id=groupList_id
, isMonopartite=True , threshold = threshold
, save_on_db = True
#, limit=size
# Dic to inform user that corpus maximum is reached then
# graph is computed asynchronously
return {"state" : "corpusMax", "length" : corpus_size}
elif corpus_size <= graph_constraints['corpusMin']:
# Do not compute the graph if corpus is not big enough
return {"state" : "corpusMin", "length" : corpus_size}
# If graph_constraints are ok then compute the graph in live
cooc_matrix = countCooccurrences(
#, field1="ngrams", field2="ngrams"
, start=start , end =end
, mapList_id=mapList_id , groupList_id=groupList_id
, isMonopartite=True , threshold = threshold
, save_on_db = True
#, limit=size
print("Getting data for matrix %d", int(cooc_id))
matrix = WeightedMatrix(int(cooc_id))
cooc_matrix = filterMatrix(matrix, mapList_id, groupList_id)
# fyi
after_cooc =
print("... Cooccurrences took %f s." % (after_cooc - before_cooc).total_seconds())
# If graph_constraints are ok then compute the graph in live
data = compute_graph(, cooc_id=cooc_id
#, field1="ngrams", field2="ngrams"
, start=start , end =end
, mapList_id=mapList_id , groupList_id=groupList_id
, isMonopartite=True , threshold = threshold
, distance=distance , bridgeness=bridgeness
, save_on_db = True
#, limit=size
# case when 0 coocs are observed (usually b/c not enough ngrams in maplist)
if len(cooc_matrix.items) == 0:
print("GET_GRAPH: 0 coocs in matrix")
data = {'nodes':[], 'links':[]} # empty data
# normal case
G, partition, ids, weight = clusterByDistances ( cooc_matrix
, field1="ngrams", field2="ngrams"
, distance=distance
after_cluster =
print("... Clustering took %f s." % (after_cluster - after_cooc).total_seconds())
data = filterByBridgeness(G,partition,ids,weight,bridgeness,type,field1,field2)
after_filter =
print("... Filtering took %f s." % (after_filter - after_cluster).total_seconds())
if len(data) == 0:
print("GRAPH # ... GET_GRAPH: 0 coocs in matrix")
data = {'nodes':[], 'links':[]} # empty data
return data
#from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from gargantext.util.db import session
from gargantext.models.nodes import Node
from graph.graph import get_graph
from graph.utils import compress_graph, format_html
from gargantext.util.http import APIView, APIException\
, JsonHttpResponse, requires_auth
from gargantext.constants import graph_constraints
from traceback import format_tb
def compress_graph(graphdata):
graph data is usually a dict with 2 slots:
"nodes": [{"id":4103, "type":"terms", "attributes":{"clust_default": 0}, "size":29, "label":"regard"},...]
"links": [{"t": 998,"s": 768,"w": 0.0425531914893617},...]
To send this data over the net, this function can reduce a lot of its size:
- keep less decimals for float value of each link's weight
- use shorter names for node properties (eg: s/clust_default/cl/)
result format:
"nodes": [{"id":4103, "at":{"cl": 0}, "s":29, "lb":"regard"},...]
"links": [{"t": 998,"s": 768,"w": 0.042},...]
for link in graphdata['links']:
link['w'] = format(link['w'], '.3f') # keep only 3 decimals
for node in graphdata['nodes']:
node['lb'] = node['label']
del node['label']
node['at'] = node['attributes']
del node['attributes']
node['at']['cl'] = node['at']['clust_default']
del node['at']['clust_default']
node['s'] = node['size']
del node['size']
from traceback import format_tb
if node['type'] == "terms":
# its the default type for our format: so we don't need it
del node['type']
node['t'] = node['type']
del node['type']
return graphdata
def format_html(link):
Build an html link adapted to our json message format
return "<a class='msglink' href='%s'>%s</a>" % (link, link)
# TODO check authentication
class Graph(APIView):
REST part for graphs.
......@@ -75,6 +27,16 @@ class Graph(APIView):
# Get the node we are working with
corpus = session.query(Node).filter(
# TODO Parameters to save in hyperdata of the Node Cooc
# WARNING: we could factorize the parameters as dict but ...
# ... it causes a bug in asynchronous function !
# Check celery upgrades before.
# Example (for the future):
# parameters = dict()
# parameters['field1'] = field1
# parameters['field2'] = field2
# Get all the parameters in the URL
cooc_id = request.GET.get ('cooc_id' , None )
saveOnly = request.GET.get ('saveOnly' , None )
......@@ -94,8 +56,8 @@ class Graph(APIView):
type_ = str(request.GET.get ('type' , 'node_link' ))
distance = str(request.GET.get ('distance' , 'conditional'))
# Get default value if no map list
# Get default map List of corpus
if mapList_id == 0 :
mapList_id = ( session.query ( )
.filter( Node.typename == "MAPLIST"
......@@ -107,7 +69,6 @@ class Graph(APIView):
mapList_id = mapList_id[0]
if mapList_id == None :
# todo add as an error msg ?
raise ValueError("MAPLIST node needed for cooccurrences")
......@@ -123,36 +84,26 @@ class Graph(APIView):
groupList_id = groupList_id[0]
if groupList_id == None :
# todo add as an error msg ?
raise ValueError("GROUPLIST node needed for cooccurrences")
# Check the options
# Declare accepted fields
accepted_field1 = ['ngrams', 'journal', 'source', 'authors']
accepted_field2 = ['ngrams', ]
options = ['start', 'end', 'threshold', 'distance', 'cooc_id' ]
# Test params
# Check if parameters are accepted
if (field1 in accepted_field1) and (field2 in accepted_field2):
if start is not None and end is not None :
data = get_graph( corpus=corpus, cooc_id = cooc_id
#, field1=field1 , field2=field2
, mapList_id = mapList_id , groupList_id = groupList_id
, start=start , end=end
, threshold =threshold , distance=distance
, saveOnly=saveOnly
data = get_graph( corpus = corpus, cooc_id = cooc_id
#, field1=field1, field2=field2
, mapList_id = mapList_id , groupList_id = groupList_id
, threshold = threshold
, distance = distance
, bridgeness = bridgeness
, saveOnly=saveOnly
data = get_graph( corpus=corpus, cooc_id = cooc_id
, field1=field1 , field2=field2
, mapList_id = mapList_id , groupList_id = groupList_id
, start=start , end=end
, threshold =threshold
, distance=distance , bridgeness=bridgeness
, saveOnly=saveOnly
# data :: Either (Dic Nodes Links) (Dic State Length)
def compress_graph(graphdata):
graph data is usually a dict with 2 slots:
"nodes": [{"id":4103, "type":"terms", "attributes":{"clust_default": 0}, "size":29, "label":"regard"},...]
"links": [{"t": 998,"s": 768,"w": 0.0425531914893617},...]
To send this data over the net, this function can reduce a lot of its size:
- keep less decimals for float value of each link's weight
- use shorter names for node properties (eg: s/clust_default/cl/)
result format:
"nodes": [{"id":4103, "at":{"cl": 0}, "s":29, "lb":"regard"},...]
"links": [{"t": 998,"s": 768,"w": 0.042},...]
for link in graphdata['links']:
link['w'] = format(link['w'], '.3f') # keep only 3 decimals
for node in graphdata['nodes']:
node['lb'] = node['label']
del node['label']
node['at'] = node['attributes']
del node['attributes']
node['at']['cl'] = node['at']['clust_default']
del node['at']['clust_default']
node['s'] = node['size']
del node['size']
if node['type'] == "terms":
# its the default type for our format: so we don't need it
del node['type']
node['t'] = node['type']
del node['type']
return graphdata
def format_html(link):
Build an html link adapted to our json message format
return "<a class='msglink' href='%s'>%s</a>" % (link, link)
......@@ -14,6 +14,8 @@ def explorer(request, project_id, corpus_id):
Graph explorer, also known as TinaWebJS, using SigmaJS.
Nodes are ngrams (from title or abstract or journal name.
Links represent proximity measure.
Data are received in RESTfull mode (see
# we pass our corpus
......@@ -46,7 +48,10 @@ def explorer(request, project_id, corpus_id):
def myGraphs(request, project_id, corpus_id):
List all of my Graphs
List all of my Graphs.
Each Graphs as one Node of Cooccurrences.
Each Graph is save in hyperdata of each Node.
user = cache.User[]
......@@ -26,7 +26,7 @@
<div class="col-md-5 content">
{% for key, value in coocs_count.items %}
{% if key == %}
......@@ -40,7 +40,7 @@
<li> ~{{ value }} nodes with distances:
<a href="/projects/{{}}/corpora/{{}}/explorer?cooc_id={{}}&distance=distributional&bridgeness=5">
<a href="/projects/{{}}/corpora/{{}}/explorer?cooc_id={{}}&distance=conditional&bridgeness=5">
<span class="glyphicon glyphicon-eye-open" aria-hidden="true"></span>
......@@ -102,10 +102,13 @@
<div class="panel-body">
<div class="container">
Newsletters : <a target="blank" href="">subscribe</a>
<li>User mailing-list: soon</li>
<li>User mailing-list: soon </li>
<li>Devel mailing-list: soon</li>
......@@ -294,7 +294,7 @@
<span class="glyphicon glyphicon-registration-mark" aria-hidden="true"></span>
, version,
, version,
<a href="" target="blank" title="Institution that enables this project.">
<span class="glyphicon glyphicon-copyright-mark" aria-hidden="true"></span>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment