Commit ce761204 authored by sim's avatar sim

Remove graph module

parent 6a0506a4
Module Graph Explorer: from text to graph
=========================================
## Graph Explorer main
0) All urls.py of the Graph Explorer
1) Main view of the graph explorer: views.py
-> Graph Explorer
-> My graph View
-> REST API to get Data
2) Graph is generated (graph.py) through different steps
a) check the constraints (graph_constraints) in gargantext/constants.py
b) Data are retrieved as REST
rest.py: check REST parameters
c) graph.py:
get_graph: check Graph parameters
compute_graph: compute graph
1) Cooccurences are computed (in live or asynchronously): cooccurrences.py
2) Thresold and distances : distances.py
3) clustering: louvain.py
4) links between communities: bridgeness.py
d) compress graph before returning it: utils.py
4) Additional features:
a) intersection of graphs: intersection.py
## How to contribute ?
Some solutions:
1) please report to dev@gargantext.org
2) fix with git repo and pull request
## TODO
myGraphs view:
* progress bar
* Show already computed graphs vs to be computed with parameters
* show parameters
* copy / paste and change some parameters to generate new graph
# Article coming soon
from gargantext.util.db import session
from gargantext.models.ngrams import Ngram
from collections import defaultdict
from networkx.readwrite import json_graph
def filterByBridgeness(G,partition,ids,weight,bridgeness,type,field1,field2):
'''
Bridgeness = measure to control links (bridges) between communities.
'''
# Data are stored in a dict(), (== hashmap by default with Python)
data = dict()
if type == "node_link":
nodesB_dict = {}
for node_id in G.nodes():
#node,type(labels[node])
nodesB_dict [ ids[node_id][1] ] = True
# TODO the query below is not optimized (do it do_distance).
the_label = session.query(Ngram.terms).filter(Ngram.id==node_id).first()
the_label = ", ".join(the_label)
G.node[node_id]['label'] = the_label
G.node[node_id]['size'] = weight[node_id]
G.node[node_id]['type'] = ids[node_id][0].replace("ngrams","terms")
G.node[node_id]['attributes'] = { "clust_default": partition[node_id]} # new format
# G.add_edge(node, "cluster " + str(partition[node]), weight=3)
links = []
i=1
if bridgeness > 0:
com_link = defaultdict(lambda: defaultdict(list))
com_ids = defaultdict(list)
for k, v in partition.items():
com_ids[v].append(k)
for e in G.edges_iter():
s = e[0]
t = e[1]
weight = G[ids[s][1]][ids[t][1]]["weight"]
if bridgeness < 0:
info = { "s": ids[s][1]
, "t": ids[t][1]
, "w": weight
}
links.append(info)
else:
if partition[s] == partition[t]:
info = { "s": ids[s][1]
, "t": ids[t][1]
, "w": weight
}
links.append(info)
if bridgeness > 0:
if partition[s] < partition[t]:
com_link[partition[s]][partition[t]].append((s,t,weight))
if bridgeness > 0:
for c1 in com_link.keys():
for c2 in com_link[c1].keys():
index = round(
bridgeness * len( com_link[c1][c2] )
/ #----------------------------------#
( len(com_ids[c1]) + len(com_ids[c2] ))
)
#print((c1,len(com_ids[c1])), (c2,len(com_ids[c2])), index)
if index > 0:
for link in sorted( com_link[c1][c2]
, key=lambda x: x[2]
, reverse=True)[:index]:
#print(c1, c2, link[2])
info = {"s": link[0], "t": link[1], "w": link[2]}
links.append(info)
B = json_graph.node_link_data(G)
B["links"] = []
B["links"] = links
if field1 == field2 == 'ngrams' :
data["nodes"] = B["nodes"]
data["links"] = B["links"]
else:
A = get_graphA( "journal" , nodesB_dict , B["links"] , corpus )
print("#nodesA:",len(A["nodes"]))
print("#linksAA + #linksAB:",len(A["links"]))
print("#nodesB:",len(B["nodes"]))
print("#linksBB:",len(B["links"]))
data["nodes"] = A["nodes"] + B["nodes"]
data["links"] = A["links"] + B["links"]
print(" total nodes :",len(data["nodes"]))
print(" total links :",len(data["links"]))
print("")
elif type == "adjacency":
for node in G.nodes():
try:
#node,type(labels[node])
#G.node[node]['label'] = node
G.node[node]['name'] = node
#G.node[node]['size'] = weight[node]
G.node[node]['group'] = partition[node]
#G.add_edge(node, partition[node], weight=3)
except Exception as error:
print("error02: ",error)
data = json_graph.node_link_data(G)
elif type == 'bestpartition':
return(partition)
return(data)
from gargantext.models import Node, Ngram, NodeNgram, NodeNgramNgram, \
NodeHyperdata, HyperdataKey
from gargantext.util.db import session, aliased, func
from gargantext.util.lists import WeightedMatrix, UnweightedList, Translations
from sqlalchemy import desc, asc, or_, and_
from datetime import datetime
def filterMatrix(matrix, mapList_id, groupList_id):
mapList = UnweightedList( mapList_id )
group_list = Translations ( groupList_id )
cooc = matrix & (mapList * group_list)
return cooc
def countCooccurrences( corpus_id=None , cooc_id=None
, field1='ngrams' , field2='ngrams'
, start=None , end=None
, mapList_id=None , groupList_id=None
, distance=None , bridgeness=None
, n_min=1, n_max=None , limit=1000
, isMonopartite=True , threshold = 3
, save_on_db= True , reset=True
):
'''
Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id
For the moment list of parameters are not supported because, lists need to
be merged before.
corpus :: Corpus
mapList_id :: Int
groupList_id :: Int
start :: TimeStamp -- example: '2010-05-30 02:00:00+02'
end :: TimeStamp
limit :: Int
'''
# FIXME remove the lines below after factorization of parameters
parameters = dict()
parameters['field1'] = field1
parameters['field2'] = field2
# Get corpus as Python object
corpus = session.query(Node).filter(Node.id==corpus_id).first()
# Get node of the Graph
if not cooc_id:
cooc_id = ( session.query( Node.id )
.filter( Node.typename == "COOCCURRENCES"
, Node.name == "GRAPH EXPLORER"
, Node.parent_id == corpus.id
)
.first()
)
if not cooc_id:
coocNode = corpus.add_child(
typename = "COOCCURRENCES",
name = "GRAPH (in corpus %s)" % corpus.id
)
session.add(coocNode)
session.commit()
cooc_id = coocNode.id
else :
cooc_id = int(cooc_id[0])
# when cooc_id preexisted, but we want to continue (reset = True)
# (to give new contents to this cooc_id)
elif reset:
print("GRAPH #%s ... Counting new cooccurrences data." % cooc_id)
session.query( NodeNgramNgram ).filter( NodeNgramNgram.node_id == cooc_id ).delete()
session.commit()
# when cooc_id preexisted and we just want to load it (reset = False)
else:
print("GRAPH #%s ... Loading cooccurrences computed already." % cooc_id)
cooc = session.query( NodeNgramNgram.ngram1_id, NodeNgramNgram.ngram2_id, NodeNgramNgram.weight ).filter( NodeNgramNgram.node_id == cooc_id ).all()
return(int(cooc_id),WeightedMatrix(cooc))
NodeNgramX = aliased(NodeNgram)
# Simple Cooccurrences
cooc_score = func.count(NodeNgramX.node_id).label('cooc_score')
# A kind of Euclidean distance cooccurrences
#cooc_score = func.sqrt(func.sum(NodeNgramX.weight * NodeNgramY.weight)).label('cooc_score')
if isMonopartite :
NodeNgramY = aliased(NodeNgram)
cooc_query = (session.query( NodeNgramX.ngram_id
, NodeNgramY.ngram_id
, cooc_score
)
.join( Node
, Node.id == NodeNgramX.node_id
)
.join( NodeNgramY
, NodeNgramY.node_id == Node.id
)
.filter( Node.parent_id==corpus.id
, Node.typename=="DOCUMENT"
)
)
else :
NodeNgramY = aliased(NodeNgram)
cooc_query = (session.query( NodeHyperdataNgram.ngram_id
, NodeNgramY.ngram_id
, cooc_score
)
.join( Node
, Node.id == NodeHyperdataNgram.node_id
)
.join( NodeNgramY
, NodeNgramY.node_id == Node.id
)
.join( Hyperdata
, Hyperdata.id == NodeHyperdataNgram.hyperdata_id
)
.filter( Node.parent_id == corpus.id
, Node.typename == "DOCUMENT"
)
.filter( Hyperdata.name == field1 )
)
# Size of the ngrams between n_min and n_max
if n_min is not None or n_max is not None:
if isMonopartite:
NgramX = aliased(Ngram)
cooc_query = cooc_query.join ( NgramX
, NgramX.id == NodeNgramX.ngram_id
)
NgramY = aliased(Ngram)
cooc_query = cooc_query.join ( NgramY
, NgramY.id == NodeNgramY.ngram_id
)
if n_min is not None:
cooc_query = (cooc_query
.filter(NgramY.n >= n_min)
)
if isMonopartite:
cooc_query = cooc_query.filter(NgramX.n >= n_min)
if n_max is not None:
cooc_query = (cooc_query
.filter(NgramY.n >= n_min)
)
if isMonopartite:
cooc_query = cooc_query.filter(NgramX.n >= n_min)
# Cooc between the dates start and end
if start is not None:
#date_start = datetime.datetime.strptime ("2001-2-3 10:11:12", "%Y-%m-%d %H:%M:%S")
# TODO : more precise date format here (day is smaller grain actually).
date_start = datetime.strptime (str(start), "%Y-%m-%d")
date_start_utc = date_start.strftime("%Y-%m-%d %H:%M:%S")
Start=aliased(NodeHyperdata)
cooc_query = (cooc_query.join( Start
, Start.node_id == Node.id
)
.filter( Start.key == 'publication_date')
.filter( Start.value_utc >= date_start_utc)
)
parameters['start'] = date_start_utc
if end is not None:
# TODO : more precise date format here (day is smaller grain actually).
date_end = datetime.strptime (str(end), "%Y-%m-%d")
date_end_utc = date_end.strftime("%Y-%m-%d %H:%M:%S")
End=aliased(NodeHyperdata)
cooc_query = (cooc_query.join( End
, End.node_id == Node.id
)
.filter( End.key == 'publication_date')
.filter( End.value_utc <= date_end_utc )
)
parameters['end'] = date_end_utc
if isMonopartite:
# Cooc is symetric, take only the main cooccurrences and cut at the limit
cooc_query = cooc_query.filter(NodeNgramX.ngram_id < NodeNgramY.ngram_id)
cooc_query = cooc_query.having(cooc_score >= threshold)
if isMonopartite:
cooc_query = cooc_query.group_by(NodeNgramX.ngram_id, NodeNgramY.ngram_id)
else:
cooc_query = cooc_query.group_by(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id)
# Order according some scores
# If ordering is really needed, use Ordered Index (faster)
#cooc_query = cooc_query.order_by(desc('cooc_score'))
matrix = WeightedMatrix(cooc_query)
print("GRAPH #%s Filtering the matrix with Map and Group Lists." % cooc_id)
cooc = filterMatrix(matrix, mapList_id, groupList_id)
parameters['MapList_id'] = str(mapList_id)
parameters['GroupList_id'] = str(groupList_id)
# TODO factorize savings on db
if save_on_db:
# Saving the cooccurrences
cooc.save(cooc_id)
print("GRAPH #%s ... Node Cooccurrence Matrix saved" % cooc_id)
# Saving the parameters
print("GRAPH #%s ... Parameters saved in Node." % cooc_id)
coocNode = session.query(Node).filter(Node.id==cooc_id).first()
coocNode.hyperdata["parameters"] = dict()
coocNode.hyperdata["parameters"] = parameters
coocNode.save_hyperdata()
session.commit()
#data = cooc2graph(coocNode.id, cooc, distance=distance, bridgeness=bridgeness)
else:
return cooc
return(coocNode.id, cooc)
import math
import numpy as np
import pandas as pd
import networkx as nx
from copy import copy
from collections import defaultdict
from math import log,sqrt
#from operator import itemgetter
from gargantext.models import Node, NodeNgram, NodeNgramNgram, \
NodeHyperdata
from gargantext.util.db import session, aliased
from .louvain import best_partition
def clusterByDistances( cooc_matrix
, field1=None, field2=None
, distance=None):
'''
clusterByDistance :: Coocs[nga, ngb => ccweight] -> (Graph, Partition, {ids}, {weight})
'''
# implicit global session
authorized = ['conditional', 'distributional', 'cosine']
if distance not in authorized:
raise ValueError("Distance must be in %s" % str(authorized))
matrix = defaultdict(lambda : defaultdict(float))
ids = defaultdict(lambda : defaultdict(int))
labels = dict()
weight = dict()
for cooc in cooc_matrix.items:
ngram1_id = cooc[0]
ngram2_id = cooc[1]
ccweight = cooc_matrix.items[cooc]
matrix[ngram1_id][ngram2_id] = ccweight
matrix[ngram2_id][ngram1_id] = ccweight
ids[ngram1_id] = (field1, ngram1_id)
ids[ngram2_id] = (field2, ngram2_id)
weight[ngram1_id] = weight.get(ngram1_id, 0) + ccweight
weight[ngram2_id] = weight.get(ngram2_id, 0) + ccweight
x = pd.DataFrame(matrix).fillna(0)
if distance == 'conditional':
x = x / x.sum(axis=1)
#y = y / y.sum(axis=0)
xs = x.sum(axis=1) - x
ys = x.sum(axis=0) - x
# top inclus ou exclus
n = ( xs + ys) / (2 * (x.shape[0] - 1))
# top generic or specific
m = ( xs - ys) / (2 * (x.shape[0] - 1))
n = n.sort_index(inplace=False)
m = m.sort_index(inplace=False)
nodes_included = 10000 #int(round(size/20,0))
#nodes_excluded = int(round(size/10,0))
nodes_specific = 10000 #int(round(size/10,0))
#nodes_generic = int(round(size/10,0))
# TODO use the included score for the node size
n_index = pd.Index.intersection(x.index, n.index[:nodes_included])
# Generic:
#m_index = pd.Index.intersection(x.index, m.index[:nodes_generic])
# Specific:
m_index = pd.Index.intersection(x.index, m.index[-nodes_specific:])
#m_index = pd.Index.intersection(x.index, n.index[:nodes_included])
x_index = pd.Index.union(n_index, m_index)
xx = x[list(x_index)].T[list(x_index)]
# Removing unconnected nodes
xxx = xx.values
threshold = min(xxx.max(axis=1))
matrix_filtered = np.where(xxx >= threshold, xxx, 0)
#matrix_filtered = matrix_filtered.resize((90,90))
G = nx.from_numpy_matrix(np.matrix(matrix_filtered))
G = nx.relabel_nodes(G, dict(enumerate([ ids[id_][1] for id_ in list(xx.columns)])))
elif distance == 'cosine':
scd = defaultdict(lambda : defaultdict(int))
for i in matrix.keys():
for j in matrix.keys():
numerator = sum(
[
matrix[i][k] * matrix[j][k]
for k in matrix.keys()
if i != j and k != i and k != j
]
)
denominator = sqrt(
sum([
matrix[i][k]
for k in matrix.keys()
if k != i and k != j #and matrix[i][k] > 0
])
*
sum([
matrix[i][k]
for k in matrix.keys()
if k != i and k != j #and matrix[i][k] > 0
])
)
try:
scd[i][j] = numerator / denominator
except Exception as error:
scd[i][j] = 0
minmax = min([ max([ scd[i][j] for i in scd.keys()]) for j in scd.keys()])
G = nx.DiGraph()
G.add_edges_from(
[
(i, j, {'weight': scd[i][j]})
for i in scd.keys() for j in scd.keys()
if i != j and scd[i][j] > minmax and scd[i][j] > scd[j][i]
]
)
elif distance == 'distributional':
mi = defaultdict(lambda : defaultdict(int))
total_cooc = x.sum().sum()
for i in matrix.keys():
si = sum([matrix[i][j] for j in matrix[i].keys() if i != j])
for j in matrix[i].keys():
sj = sum([matrix[j][k] for k in matrix[j].keys() if j != k])
if i!=j :
mi[i][j] = log( matrix[i][j] / ((si * sj) / total_cooc) )
r = defaultdict(lambda : defaultdict(int))
for i in matrix.keys():
for j in matrix.keys():
sumMin = sum(
[
min(mi[i][k], mi[j][k])
for k in matrix.keys()
if i != j and k != i and k != j and mi[i][k] > 0
]
)
sumMi = sum(
[
mi[i][k]
for k in matrix.keys()
if k != i and k != j and mi[i][k] > 0
]
)
try:
r[i][j] = sumMin / sumMi
except Exception as error:
r[i][j] = 0
# Need to filter the weak links, automatic threshold here
minmax = min([ max([ r[i][j] for i in r.keys()]) for j in r.keys()])
G = nx.DiGraph()
G.add_edges_from(
[
(i, j, {'weight': r[i][j]})
for i in r.keys() for j in r.keys()
if i != j and r[i][j] > minmax and r[i][j] > r[j][i]
]
)
# degree_max = max([(n, d) for n,d in G.degree().items()], key=itemgetter(1))[1]
# nodes_to_remove = [n for (n,d) in G.degree().items() if d <= round(degree_max/2)]
# G.remove_nodes_from(nodes_to_remove)
# Removing too connected nodes (find automatic way to do it)
#edges_to_remove = [ e for e in G.edges_iter() if
# nodes_to_remove = [n for n in degree if degree[n] <= 1]
# G.remove_nodes_from(nodes_to_remove)
def getWeight(item):
return item[1]
#
# node_degree = sorted(G.degree().items(), key=getWeight, reverse=True)
# #print(node_degree)
# nodes_too_connected = [n[0] for n in node_degree[0:(round(len(node_degree)/5))]]
#
# for n in nodes_too_connected:
# n_edges = list()
# for v in nx.neighbors(G,n):
# #print((n, v), G[n][v]['weight'], ":", (v,n), G[v][n]['weight'])
# n_edges.append(((n, v), G[n][v]['weight']))
#
# n_edges_sorted = sorted(n_edges, key=getWeight, reverse=True)
# #G.remove_edges_from([ e[0] for e in n_edges_sorted[round(len(n_edges_sorted)/2):]])
# #G.remove_edges_from([ e[0] for e in n_edges_sorted[(round(len(nx.neighbors(G,n))/3)):]])
# G.remove_edges_from([ e[0] for e in n_edges_sorted[10:]])
G.remove_nodes_from(nx.isolates(G))
partition = best_partition(G.to_undirected())
return(G,partition,ids,weight)
from celery import shared_task
from datetime import datetime
from gargantext.util.db import session, aliased
from gargantext.util.lists import WeightedMatrix, UnweightedList, Translations
from gargantext.util.http import JsonHttpResponse
from gargantext.models import Node, Ngram, NodeNgram, NodeNgramNgram, NodeHyperdata
from gargantext.util.scheduling import scheduled
from gargantext.constants import graph_constraints
from .cooccurrences import countCooccurrences
from .distances import clusterByDistances
from .bridgeness import filterByBridgeness
from .mail_notification import notify_owner
from .growth import compute_growth
@shared_task
def compute_graph( corpus_id=None , cooc_id=None
, field1='ngrams' , field2='ngrams'
, start=None , end=None
, mapList_id=None , groupList_id=None
, distance=None , bridgeness=None
, n_min=1, n_max=None , limit=1000
, isMonopartite=True , threshold = 3
, save_on_db= True , reset=True
) :
'''
All steps to compute a graph:
1) count Cooccurrences (function countCooccurrences)
main parameters: threshold, isMonopartite
2) filter and cluster By Distances (function clusterByDistances)
main parameter: distance
TODO option clustering='louvain'
or 'percolation' or 'random walk' or ...
3) filter By Bridgeness (function filterByBridgeness)
main parameter: bridgeness
4) format the graph (formatGraph)
main parameter: format_
'''
print("GRAPH # ... Computing cooccurrences.")
(cooc_id, cooc_matrix) = countCooccurrences( corpus_id=corpus_id, cooc_id=cooc_id
, field1=field1, field2=field2
, start=start , end =end
, mapList_id=mapList_id , groupList_id=groupList_id
, isMonopartite=True , threshold = threshold
, distance=distance , bridgeness=bridgeness
, save_on_db = True , reset = reset
)
print("GRAPH #%d ... Cooccurrences computed." % (cooc_id))
print("GRAPH #%d ... Clustering with %s distance." % (cooc_id,distance))
G, partition, ids, weight = clusterByDistances ( cooc_matrix
, field1="ngrams", field2="ngrams"
, distance=distance
)
print("GRAPH #%d ... Filtering by bridgeness %d." % (cooc_id, bridgeness))
data = filterByBridgeness(G,partition,ids,weight,bridgeness,"node_link",field1,field2)
if start is not None and end is not None:
growth= dict()
for (ng_id, score) in compute_growth(corpus_id, groupList_id, mapList_id, start, end):
growth[ng_id] = float(score) + 100 # for the normalization, should not be negativ
for node in data['nodes']:
node['attributes']['growth'] = growth[node['id']]
print("GRAPH #%d ... Saving Graph in hyperdata as json." % cooc_id)
node = session.query(Node).filter(Node.id == cooc_id).first()
if node.hyperdata.get(distance, None) is None:
print("GRAPH #%d ... Distance %s has not been computed already." % (cooc_id, distance))
node.hyperdata[distance] = dict()
node.hyperdata[distance][bridgeness] = data
node.hyperdata[distance]["nodes"] = len(G.nodes())
node.hyperdata[distance]["edges"] = len(G.edges())
node.save_hyperdata()
session.commit()
print("GRAPH #%d ... Notify by email owner of the graph." % cooc_id)
corpus = session.query(Node).filter(Node.id==corpus_id).first()
#notify_owner(corpus, cooc_id, distance, bridgeness)
print("GRAPH #%d ... Returning data as json." % cooc_id)
return data
def get_graph( request=None , corpus=None
, field1='ngrams' , field2='ngrams'
, mapList_id = None , groupList_id = None
, cooc_id=None , type='node_link'
, start=None , end=None
, distance='conditional', bridgeness=5
, threshold=1 , isMonopartite=True
, saveOnly=True
) :
'''
Get_graph : main steps:
0) Check the parameters
get_graph :: GraphParameters -> Either (Dic Nodes Links) (Dic State Length)
where type Length = Int
get_graph first checks the parameters and return either graph data or a dict with
state "type" with an integer to indicate the size of the parameter
(maybe we could add a String in that step to factor and give here the error message)
1) compute_graph (see function above)
2) return graph
'''
overwrite_node_contents = False
# Case of graph has been computed already
if cooc_id is not None:
print("GRAPH#%d ... Loading data already computed." % int(cooc_id))
node = session.query(Node).filter(Node.id == cooc_id).first()
# Structure of the Node.hyperdata[distance][bridbeness]
# All parameters (but distance and bridgeness)
# are in Node.hyperdata["parameters"]
# Check distance of the graph
if node.hyperdata.get(distance, None) is not None:
graph = node.hyperdata[distance]
# Check bridgeness of the graph
if graph.get(str(bridgeness), None) is not None:
return graph[str(bridgeness)]
# new graph: we give it an empty node with new id and status
elif saveOnly:
# NB: we do creation already here (instead of same in countCooccurrences)
# to guarantee a unique ref id to the saveOnly graph (async generation)
new_node = corpus.add_child(
typename = "COOCCURRENCES",
name = "GRAPH (in corpus %s)" % corpus.id
)
session.add(new_node)
session.commit()
cooc_id = new_node.id
cooc_name = new_node.name
cooc_date = new_node.date
# and the empty content will need redoing by countCooccurrences
overwrite_node_contents = True
print("GRAPH #%d ... Created new empty data node for saveOnly" % int(cooc_id))
# Case of graph has not been computed already
# First, check the parameters
# Case of mapList not big enough
# ==============================
# if we do not have any mapList_id already
if mapList_id is None:
mapList_id = session.query(Node.id).filter(Node.typename == "MAPLIST").first()[0]
mapList_size = session.query(NodeNgram).filter(NodeNgram.node_id == mapList_id).count()
if mapList_size < graph_constraints['mapList']:
# Do not compute the graph if mapList is not big enough
return {'state': "mapListError", "length" : mapList_size}
# Instantiate query for case of corpus not big enough
# ===================================================
corpus_size_query = (session.query(Node)
.filter(Node.typename=="DOCUMENT")
.filter(Node.parent_id == corpus.id)
)
# Filter corpus by date if any start date
# ---------------------------------------
if start is not None:
#date_start = datetime.datetime.strptime ("2001-2-3 10:11:12", "%Y-%m-%d %H:%M:%S")
date_start = datetime.strptime (str(start), "%Y-%m-%d")
date_start_utc = date_start.strftime("%Y-%m-%d %H:%M:%S")
Start=aliased(NodeHyperdata)
corpus_size_query = (corpus_size_query.join( Start
, Start.node_id == Node.id
)
.filter( Start.key == 'publication_date')
.filter( Start.value_utc >= date_start_utc)
)
# Filter corpus by date if any end date
# -------------------------------------
if end is not None:
date_end = datetime.strptime (str(end), "%Y-%m-%d")
date_end_utc = date_end.strftime("%Y-%m-%d %H:%M:%S")
End=aliased(NodeHyperdata)
corpus_size_query = (corpus_size_query.join( End
, End.node_id == Node.id
)
.filter( End.key == 'publication_date')
.filter( End.value_utc <= date_end_utc )
)
# Finally test if the size of the corpora is big enough
# --------------------------------
corpus_size = corpus_size_query.count()
if saveOnly is not None and saveOnly == "True":
scheduled(compute_graph)( corpus_id=corpus.id, cooc_id=cooc_id
#, field1="ngrams", field2="ngrams"
, start=start , end =end
, mapList_id=mapList_id , groupList_id=groupList_id
, isMonopartite=True , threshold = threshold
, distance=distance , bridgeness=bridgeness
, save_on_db = True , reset=overwrite_node_contents
#, limit=size
)
return { "state" : "saveOnly"
, "target_id" : cooc_id
, "target_name": cooc_name
, "target_date": cooc_date
}
elif corpus_size > graph_constraints['corpusMax']:
# Then compute cooc asynchronously with celery
scheduled(compute_graph)( corpus_id=corpus.id, cooc_id=cooc_id
#, field1="ngrams", field2="ngrams"
, start=start , end =end
, mapList_id=mapList_id , groupList_id=groupList_id
, isMonopartite=True , threshold = threshold
, distance=distance , bridgeness=bridgeness
, save_on_db = True , reset=overwrite_node_contents
#, limit=size
)
# Dict to inform user that corpus maximum is reached
# then graph is computed asynchronously
return {"state" : "corpusMax", "length" : corpus_size}
elif corpus_size <= graph_constraints['corpusMin']:
# Do not compute the graph if corpus is not big enough
return {"state" : "corpusMin", "length" : corpus_size}
else:
# If graph_constraints are ok then compute the graph in live
data = compute_graph( corpus_id=corpus.id, cooc_id=cooc_id
#, field1="ngrams", field2="ngrams"
, start=start , end =end
, mapList_id=mapList_id , groupList_id=groupList_id
, isMonopartite=True , threshold = threshold
, distance=distance , bridgeness=bridgeness
, save_on_db = True , reset=overwrite_node_contents
#, limit=size
)
# case when 0 coocs are observed (usually b/c not enough ngrams in maplist)
if len(data) == 0:
print("GRAPH # ... GET_GRAPH: 0 coocs in matrix")
data = {'nodes':[], 'links':[]} # empty data
return data
"""
Computes ngram growth on periods
"""
from gargantext.models import Node, NodeNgram, NodeNodeNgram, NodeNgramNgram
from gargantext.util.db_cache import cache
from gargantext.util.db import session, bulk_insert, aliased, \
func, get_engine # = sqlalchemy.func like sum() or count()
from datetime import datetime
def timeframes(start, end):
"""
timeframes :: String -> String -> (UTCTime, UTCTime, UTCTime)
"""
start = datetime.strptime (str(start), "%Y-%m-%d")
end = datetime.strptime (str(end), "%Y-%m-%d")
date_0 = start - (end - start)
date_1 = start
date_2 = end
return (date_0, date_1, date_2)
def compute_growth(corpus_id, groupList_id, mapList_id, start, end):
"""
compute_graph :: Int -> UTCTime -> UTCTime -> Int -> Int
-> [(Int, Numeric)]
this function uses SQL function in
/srv/gargantext/install/gargamelle/sqlFunctions.sql
First compute occurrences of ngrams in mapList (with groups) on the first
period, then on the second and finally returns growth.
Directly computed with Postgres Database (C) for optimization.
"""
connection = get_engine()
(date_0, date_1, date_2) = timeframes(start, end)
query = """SELECT * FROM OCC_HIST( {corpus_id}
, {groupList_id}
, {mapList_id}
, '{date_0}'
, '{date_1}'
, '{date_2}'
)
""".format( corpus_id = corpus_id
, groupList_id = groupList_id
, mapList_id = mapList_id
, date_0 = date_0
, date_1 = date_1
, date_2 = date_2
)
return(connection.execute(query))
from gargantext.models import Node, Ngram, NodeNgram, NodeNgramNgram, \
HyperdataKey
from gargantext.util.db import session, aliased, bulk_insert, func
from gargantext.util.lists import WeightedMatrix, UnweightedList, Translations
from gargantext.util.http import JsonHttpResponse
from sqlalchemy import desc, asc, or_, and_, func
import datetime
import ast
import networkx as nx
def doc_freq(corpus_id, node_ids):
'''
doc_freq :: Corpus_id -> [(Ngram_id, Int)]
Given a corpus, compute number of documents that have the ngram in it.
'''
return ( session.query(NodeNgram.ngram_id, func.count(NodeNgram.node_id))
.join(Node, NodeNgram.node_id == Node.id)
.filter( Node.parent_id == corpus_id
, Node.typename== 'DOCUMENT')
.filter( NodeNgram.weight > 0
, NodeNgram.ngram_id.in_(node_ids) )
.group_by(NodeNgram.ngram_id)
.all()
)
def doc_ngram_representativity(corpus_id, node_ids):
'''
doc_ngram_representativity :: Corpus_ID -> Dict Ngram_id Float
Given a corpus, compute part of of documents that have the ngram it it.
'''
nodes_count = ( session.query(Node)
.filter( Node.parent_id == corpus_id
, Node.typename == 'DOCUMENT'
)
.count()
)
result = dict()
for ngram_id, somme in doc_freq(corpus_id, node_ids):
result[ngram_id] = somme / nodes_count
return result
def compare_corpora(Corpus_id_A, Corpus_id_B, node_ids):
'''
compare_corpora :: Corpus_id -> Corpus_id -> Dict Ngram_id Float
Given two corpus :
- if corpora are the same, it return :
(dict of document frequency per ngram as key)
- if corpora are different, it returns :
doc_ngram_representativit(Corpus_id_A) / doc_ngram_representativity(Corpus_id_B)
(as dict per ngram as key)
'''
result = dict()
if int(Corpus_id_A) == int(Corpus_id_B):
for ngram_id, somme in doc_freq(Corpus_id_A, node_ids):
result[ngram_id] = somme
else:
data_A = doc_ngram_representativity(Corpus_id_A, node_ids)
data_B = doc_ngram_representativity(Corpus_id_B, node_ids)
queue = list()
for k in data_A.keys():
if k not in data_B.keys():
queue.append(k)
else:
result[k] = data_B[k] / data_A[k]
maximum = max([ result[k] for k in result.keys()])
minimum = min([ result[k] for k in result.keys()])
for k in queue:
result[k] = minimum
return result
def intersection(request , corpuses_ids, measure='cooc'):
'''
intersection :: (str(Int) + "a" str(Int)) -> Dict(Ngram.id :: Int, Score :: Int)
intersection = returns as Json Http Response the intersection of two graphs
'''
if request.method == 'POST' and "nodeids" in request.POST and len(request.POST["nodeids"])>0 :
node_ids = [int(i) for i in (ast.literal_eval( request.POST["nodeids"] )) ]
# Here are the visible nodes of the initial semantic map.
corpuses_ids = corpuses_ids.split('a')
corpuses_ids = [int(i) for i in corpuses_ids]
# corpus[1] will be the corpus to compare
return JsonHttpResponse(compare_corpora(corpuses_ids[0], corpuses_ids[1], node_ids))
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
This module implements community detection.
"""
__all__ = ["partition_at_level", "modularity", "best_partition", "generate_dendogram", "induced_graph"]
__author__ = """Thomas Aynaud (thomas.aynaud@lip6.fr)"""
# Copyright (C) 2009 by
# Thomas Aynaud <thomas.aynaud@lip6.fr>
# All rights reserved.
# BSD license.
# Adapted to python 3 by anoe
__PASS_MAX = -1
__MIN = 0.0000001
import networkx as nx
import sys
import types
import array
def partition_at_level(dendogram, level) :
"""Return the partition of the nodes at the given level
A dendogram is a tree and each level is a partition of the graph
nodes. Level 0 is the first partition, which contains the smallest
communities, and the best is len(dendogram) - 1. The higher the
level is, the bigger are the communities
Parameters
----------
dendogram : list of dict
a list of partitions, ie dictionnaries where keys of the i+1 are the values of the i.
level : int
the level which belongs to [0..len(dendogram)-1]
Returns
-------
partition : dictionnary
A dictionary where keys are the nodes and the values are the set it belongs to
Raises
------
KeyError
If the dendogram is not well formed or the level is too high
See Also
--------
best_partition which directly combines partition_at_level and
generate_dendogram to obtain the partition of highest modularity
Examples
--------
>>> G=nx.erdos_renyi_graph(100, 0.01)
>>> dendo = generate_dendogram(G)
>>> for level in range(len(dendo) - 1) :
>>> print("partition at level", level, "is", partition_at_level(dendo, level))
"""
partition = dendogram[0].copy()
for index in range(1, level + 1) :
for node, community in tuple(partition.items()) :
partition[node] = dendogram[index][community]
return(partition)
def modularity(partition, graph) :
"""Compute the modularity of a partition of a graph
Parameters
----------
partition : dict
the partition of the nodes, i.e a dictionary where keys are their nodes and values the communities
graph : networkx.Graph
the networkx graph which is decomposed
Returns
-------
modularity : float
The modularity
Raises
------
KeyError
If the partition is not a partition of all graph nodes
ValueError
If the graph has no link
TypeError
If graph is not a networkx.Graph
References
----------
.. 1. Newman, M.E.J. & Girvan, M. Finding and evaluating community structure in networks. Physical Review E 69, 26113(2004).
Examples
--------
>>> G=nx.erdos_renyi_graph(100, 0.01)
>>> part = best_partition(G)
>>> modularity(part, G)
"""
if type(graph) != nx.Graph :
raise TypeError("Bad graph type, use only non directed graph")
inc = dict([])
deg = dict([])
links = graph.size(weight='weight')
if links == 0 :
raise ValueError("A graph without link has an undefined modularity")
for node in graph :
com = partition[node]
deg[com] = deg.get(com, 0.) + graph.degree(node, weight = 'weight')
for neighbor, datas in tuple(graph[node].items()) :
weight = datas.get("weight", 1)
if partition[neighbor] == com :
if neighbor == node :
inc[com] = inc.get(com, 0.) + float(weight)
else :
inc[com] = inc.get(com, 0.) + float(weight) / 2.
res = 0.
for com in set(partition.values()) :
res += (inc.get(com, 0.) / links) - (deg.get(com, 0.) / (2.*links))**2
return res
def best_partition(graph, partition = None) :
"""Compute the partition of the graph nodes which maximises the modularity
(or try..) using the Louvain heuristices
This is the partition of highest modularity, i.e. the highest partition of the dendogram
generated by the Louvain algorithm.
Parameters
----------
graph : networkx.Graph
the networkx graph which is decomposed
partition : dict, optionnal
the algorithm will start using this partition of the nodes. It's a dictionary where keys are their nodes and values the communities
Returns
-------
partition : dictionnary
The partition, with communities numbered from 0 to number of communities
Raises
------
NetworkXError
If the graph is not Eulerian.
See Also
--------
generate_dendogram to obtain all the decompositions levels
Notes
-----
Uses Louvain algorithm
References
----------
.. 1. Blondel, V.D. et al. Fast unfolding of communities in large networks. J. Stat. Mech 10008, 1-12(2008).
Examples
--------
>>> #Basic usage
>>> G=nx.erdos_renyi_graph(100, 0.01)
>>> part = best_partition(G)
>>> #other example to display a graph with its community :
>>> #better with karate_graph() as defined in networkx examples
>>> #erdos renyi don't have true community structure
>>> G = nx.erdos_renyi_graph(30, 0.05)
>>> #first compute the best partition
>>> partition = G.best_partition(G)
>>> #drawing
>>> size = float(len(set(partition.values())))
>>> pos = nx.spring_layout(G)
>>> count = 0.
>>> for com in set(partition.values()) :
>>> count = count + 1.
>>> list_nodes = [nodes for nodes in partition.keys()
>>> if partition[nodes] == com]
>>> nx.draw_networkx_nodes(G, pos, list_nodes, node_size = 20,
node_color = str(count / size))
>>> nx.draw_networkx_edges(G,pos, alpha=0.5)
>>> plt.show()
"""
dendo = generate_dendogram(graph, partition)
return partition_at_level(dendo, len(dendo) - 1 )
def generate_dendogram(graph, part_init = None) :
"""Find communities in the graph and return the associated dendogram
A dendogram is a tree and each level is a partition of the graph
nodes. Level 0 is the first partition, which contains the smallest
communities, and the best is len(dendogram) - 1. The higher the level
is, the bigger are the communities
Parameters
----------
graph : networkx.Graph
the networkx graph which will be decomposed
part_init : dict, optionnal
the algorithm will start using this partition of the nodes. It's a
dictionary where keys are their nodes and values the communities
Returns
-------
dendogram : list of dictionaries
a list of partitions, ie dictionnaries where keys of the i+1 are the
values of the i. and where keys of the first are the nodes of graph
Raises
------
TypeError
If the graph is not a networkx.Graph
See Also
--------
best_partition
Notes
-----
Uses Louvain algorithm
References
----------
.. 1. Blondel, V.D. et al. Fast unfolding of communities in large networks. J. Stat. Mech 10008, 1-12(2008).
Examples
--------
>>> G=nx.erdos_renyi_graph(100, 0.01)
>>> dendo = generate_dendogram(G)
>>> for level in range(len(dendo) - 1) :
>>> print "partition at level", level, "is", partition_at_level(dendo, level)
"""
if type(graph) != nx.Graph :
raise TypeError("Bad graph type, use only non directed graph")
#special case, when there is no link
#the best partition is everyone in its community
if graph.number_of_edges() == 0 :
part = dict([])
for node in graph.nodes() :
part[node] = node
return part
current_graph = graph.copy()
status = Status()
status.init(current_graph, part_init)
mod = __modularity(status)
status_list = list()
__one_level(current_graph, status)
new_mod = __modularity(status)
partition = __renumber(status.node2com)
status_list.append(partition)
mod = new_mod
current_graph = induced_graph(partition, current_graph)
status.init(current_graph)
while True :
__one_level(current_graph, status)
new_mod = __modularity(status)
if new_mod - mod < __MIN :
break
partition = __renumber(status.node2com)
status_list.append(partition)
mod = new_mod
current_graph = induced_graph(partition, current_graph)
status.init(current_graph)
return status_list[:]
def induced_graph(partition, graph) :
"""Produce the graph where nodes are the communities
there is a link of weight w between communities if the sum of the
weights of the links between their elements is w
Parameters
----------
partition : dict
a dictionary where keys are graph nodes and values the part the node belongs to
graph : networkx.Graph
the initial graph
Returns
-------
g : networkx.Graph
a networkx graph where nodes are the parts
Examples
--------
>>> n = 5
>>> g = nx.complete_graph(2*n)
>>> part = dict([])
>>> for node in g.nodes() :
>>> part[node] = node % 2
>>> ind = induced_graph(part, g)
>>> goal = nx.Graph()
>>> goal.add_weighted_edges_from([(0,1,n*n),(0,0,n*(n-1)/2), (1, 1, n*(n-1)/2)])
>>> nx.is_isomorphic(int, goal)
True
"""
ret = nx.Graph()
ret.add_nodes_from(partition.values())
for node1, node2, datas in graph.edges_iter(data = True) :
weight = datas.get("weight", 1)
com1 = partition[node1]
com2 = partition[node2]
w_prec = ret.get_edge_data(com1, com2, {"weight":0}).get("weight", 1)
ret.add_edge(com1, com2, weight = w_prec + weight)
return ret
def __renumber(dictionary) :
"""Renumber the values of the dictionary from 0 to n
"""
count = 0
ret = dictionary.copy()
new_values = dict([])
for key in dictionary.keys() :
value = dictionary[key]
new_value = new_values.get(value, -1)
if new_value == -1 :
new_values[value] = count
new_value = count
count = count + 1
ret[key] = new_value
return ret
def __load_binary(data) :
"""Load binary graph as used by the cpp implementation of this algorithm
"""
if type(data) == types.StringType :
data = open(data, "rb")
reader = array.array("I")
reader.fromfile(data, 1)
num_nodes = reader.pop()
reader = array.array("I")
reader.fromfile(data, num_nodes)
cum_deg = reader.tolist()
num_links = reader.pop()
reader = array.array("I")
reader.fromfile(data, num_links)
links = reader.tolist()
graph = nx.Graph()
graph.add_nodes_from(range(num_nodes))
prec_deg = 0
for index in range(num_nodes) :
last_deg = cum_deg[index]
neighbors = links[prec_deg:last_deg]
graph.add_edges_from([(index, int(neigh)) for neigh in neighbors])
prec_deg = last_deg
return graph
def __one_level(graph, status) :
"""Compute one level of communities
"""
modif = True
nb_pass_done = 0
cur_mod = __modularity(status)
new_mod = cur_mod
while modif and nb_pass_done != __PASS_MAX :
cur_mod = new_mod
modif = False
nb_pass_done += 1
for node in graph.nodes() :
com_node = status.node2com[node]
degc_totw = status.gdegrees.get(node, 0.) / (status.total_weight*2.)
neigh_communities = __neighcom(node, graph, status)
__remove(node, com_node,
neigh_communities.get(com_node, 0.), status)
best_com = com_node
best_increase = 0
for com, dnc in tuple(neigh_communities.items()) :
incr = dnc - status.degrees.get(com, 0.) * degc_totw
if incr > best_increase :
best_increase = incr
best_com = com
__insert(node, best_com,
neigh_communities.get(best_com, 0.), status)
if best_com != com_node :
modif = True
new_mod = __modularity(status)
if new_mod - cur_mod < __MIN :
break
class Status :
"""
To handle several data in one struct.
Could be replaced by named tuple, but don't want to depend on python 2.6
"""
node2com = {}
total_weight = 0
internals = {}
degrees = {}
gdegrees = {}
def __init__(self) :
self.node2com = dict([])
self.total_weight = 0
self.degrees = dict([])
self.gdegrees = dict([])
self.internals = dict([])
self.loops = dict([])
def __str__(self) :
return ("node2com : " + str(self.node2com) + " degrees : "
+ str(self.degrees) + " internals : " + str(self.internals)
+ " total_weight : " + str(self.total_weight))
def copy(self) :
"""Perform a deep copy of status"""
new_status = Status()
new_status.node2com = self.node2com.copy()
new_status.internals = self.internals.copy()
new_status.degrees = self.degrees.copy()
new_status.gdegrees = self.gdegrees.copy()
new_status.total_weight = self.total_weight
def init(self, graph, part = None) :
"""Initialize the status of a graph with every node in one community"""
count = 0
self.node2com = dict([])
self.total_weight = 0
self.degrees = dict([])
self.gdegrees = dict([])
self.internals = dict([])
try:
self.total_weight = graph.size(weighted = True)
except:
self.total_weight = graph.size(weight='weight')
if part == None :
for node in graph.nodes() :
self.node2com[node] = count
try:
deg = float(graph.degree(node, weighted = True))
except:
deg = float(graph.degree(node, weight = 'weight'))
if deg < 0 :
raise ValueError("Bad graph type, use positive weights")
self.degrees[count] = deg
self.gdegrees[node] = deg
self.loops[node] = float(graph.get_edge_data(node, node,
{"weight":0}).get("weight", 1))
self.internals[count] = self.loops[node]
count = count + 1
else :
for node in graph.nodes() :
com = part[node]
self.node2com[node] = com
deg = float(graph.degree(node, weigh = 'weight'))
self.degrees[com] = self.degrees.get(com, 0) + deg
self.gdegrees[node] = deg
inc = 0.
for neighbor, datas in tuple(graph[node].items()) :
weight = datas.get("weight", 1)
if weight <= 0 :
raise ValueError("Bad graph type, use positive weights")
if part[neighbor] == com :
if neighbor == node :
inc += float(weight)
else :
inc += float(weight) / 2.
self.internals[com] = self.internals.get(com, 0) + inc
def __neighcom(node, graph, status) :
"""
Compute the communities in the neighborood of node in the graph given
with the decomposition node2com
"""
weights = {}
for neighbor, datas in tuple(graph[node].items()):
if neighbor != node :
weight = datas.get("weight", 1)
neighborcom = status.node2com[neighbor]
weights[neighborcom] = weights.get(neighborcom, 0) + weight
return weights
def __remove(node, com, weight, status) :
""" Remove node from community com and modify status"""
status.degrees[com] = ( status.degrees.get(com, 0.)
- status.gdegrees.get(node, 0.) )
status.internals[com] = float( status.internals.get(com, 0.) -
weight - status.loops.get(node, 0.) )
status.node2com[node] = -1
def __insert(node, com, weight, status) :
""" Insert node into community and modify status"""
status.node2com[node] = com
status.degrees[com] = ( status.degrees.get(com, 0.) +
status.gdegrees.get(node, 0.) )
status.internals[com] = float( status.internals.get(com, 0.) +
weight + status.loops.get(node, 0.) )
def __modularity(status) :
"""
Compute the modularity of the partition of the graph faslty using status precomputed
"""
links = float(status.total_weight)
result = 0.
for community in set(status.node2com.values()) :
in_degree = status.internals.get(community, 0.)
degree = status.degrees.get(community, 0.)
if links > 0 :
result = result + in_degree / links - ((degree / (2.*links))**2)
return result
def __main() :
"""Main function to mimic C++ version behavior"""
try :
filename = sys.argv[1]
graphfile = __load_binary(filename)
partition = best_partition(graphfile)
print >> sys.stderr, str(modularity(partition, graphfile))
for elem, part in tuple(partition.items()) :
print(str(elem) + " " + str(part))
except (IndexError, IOError):
print("Usage : ./community filename")
print("find the communities in graph filename and display the dendogram")
print("Parameters:")
print("filename is a binary file as generated by the ")
print("convert utility distributed with the C implementation")
if __name__ == "__main__" :
__main()
from gargantext.models.users import User
from gargantext.util.db import session
from django.core.mail import send_mail
from gargantext.settings import BASE_URL
def notify_owner(corpus,cooc_id,distance,bridgeness):
user = session.query(User).filter(User.id == corpus.user_id).first()
message = '''
Bonjour,
votre graph vient de se terminer dans votre corpus intitulé:
%s
Vous pouvez accéder et renommer votre Graph à l'adresse:
http://%s/projects/%d/corpora/%d/explorer?cooc_id=%d&distance=%s&bridgeness=%d
Nous restons à votre disposition pour tout complément d'information.
Cordialement
--
L'équipe de Gargantext (CNRS)
''' % (corpus.name, BASE_URL, corpus.parent_id, corpus.id, cooc_id, distance, bridgeness)
if user.email != "" :
send_mail('[Gargantext] Votre Graph est calculé'
, message
, 'team@gargantext.org'
, [user.email], fail_silently=False )
else:
print("User %s (%d), has no email" % (user.username, user.id) )
from traceback import format_tb
from gargantext.util.db import session
from gargantext.models.nodes import Node
from gargantext.util.http import APIView, APIException, \
JsonHttpResponse, requires_auth
from gargantext.constants import graph_constraints
from .graph import get_graph
from .utils import compress_graph, format_html
class Graph(APIView):
'''
REST part for graphs.
'''
def get(self, request, project_id, corpus_id):
'''
Graph.get :: Get graph data as REST api.
Get all the parameters first
graph?field1=ngrams&field2=ngrams&
graph?field1=ngrams&field2=ngrams&start=''&end=''
NB save new graph mode
(option saveOnly=True without a cooc_id)
can return the new cooc id in the json
before counting + filling data in async
'''
if not request.user.is_authenticated():
# can't use @requires_auth because of positional 'self' within class
return HttpResponse('Unauthorized', status=401)
# Get the node we are working with
corpus = session.query(Node).filter(Node.id==corpus_id).first()
# TODO Parameters to save in hyperdata of the Node Cooc
# WARNING: we could factorize the parameters as dict but ...
# ... it causes a bug in asynchronous function !
# Check celery upgrades before.
# Example (for the future):
# parameters = dict()
# parameters['field1'] = field1
# parameters['field2'] = field2
# Get all the parameters in the URL
cooc_id = request.GET.get ('cooc_id' , None )
saveOnly = request.GET.get ('saveOnly' , None )
field1 = str(request.GET.get ('field1' , 'ngrams' ))
field2 = str(request.GET.get ('field2' , 'ngrams' ))
start = request.GET.get ('start' , None )
end = request.GET.get ('end' , None )
mapList_id = int(request.GET.get ('mapList' , 0 ))
groupList_id = int(request.GET.get ('groupList' , 0 ))
threshold = int(request.GET.get ('threshold' , 1 ))
bridgeness = int(request.GET.get ('bridgeness', -1 ))
format_ = str(request.GET.get ('format' , 'json' ))
type_ = str(request.GET.get ('type' , 'node_link' ))
distance = str(request.GET.get ('distance' , 'conditional'))
# Get default map List of corpus
if mapList_id == 0 :
mapList_id = ( session.query ( Node.id )
.filter( Node.typename == "MAPLIST"
, Node.parent_id == corpus.id
)
.first()
)
mapList_id = mapList_id[0]
if mapList_id == None :
raise ValueError("MAPLIST node needed for cooccurrences")
# Get default value if no group list
if groupList_id == 0 :
groupList_id = ( session.query ( Node.id )
.filter( Node.typename == "GROUPLIST"
, Node.parent_id == corpus.id
)
.first()
)
groupList_id = groupList_id[0]
if groupList_id == None :
raise ValueError("GROUPLIST node needed for cooccurrences")
# Declare accepted fields
accepted_field1 = ['ngrams', 'journal', 'source', 'authors']
accepted_field2 = ['ngrams', ]
options = ['start', 'end', 'threshold', 'distance', 'cooc_id' ]
try:
# Check if parameters are accepted
if (field1 in accepted_field1) and (field2 in accepted_field2):
data = get_graph( corpus=corpus, cooc_id = cooc_id
, field1=field1 , field2=field2
, mapList_id = mapList_id , groupList_id = groupList_id
, start=start , end=end
, threshold =threshold
, distance=distance , bridgeness=bridgeness
, saveOnly=saveOnly
)
# data :: Either (Dic Nodes Links) (Dic State Length)
# data_test :: Either String Bool
data_test = data.get("state", True)
if data_test is True:
# normal case --------------------------------
if format_ == 'json':
return JsonHttpResponse(
compress_graph(data),
status=200
)
# --------------------------------------------
else:
# All other cases (more probable are higher in the if list)
if data["state"] == "saveOnly":
# async data case
link = "http://%s/projects/%d/corpora/%d/myGraphs" % (request.get_host(), corpus.parent_id, corpus.id)
return JsonHttpResponse({
'id': data["target_id"],
'name': data["target_name"],
'date': data["target_date"],
'msg': '''Your graph is being saved:
%s
''' % format_html(link)
}, status=200)
elif data["state"] == "corpusMin":
# async data case
link = "http://%s/projects/%d/" % (request.get_host(), corpus.parent_id)
return JsonHttpResponse({
'msg': '''Problem: your corpus is too small (only %d documents).
Solution: Add more documents (more than %d documents)
in order to get a graph.
You can manage your corpus here:
%s
''' % ( data["length"]
, graph_constraints['corpusMin']
, format_html(link)
),
}, status=400)
elif data["state"] == "mapListError":
# async data case
link = 'http://%s/projects/%d/corpora/%d/terms' % (request.get_host(), corpus.parent_id, corpus.id)
return JsonHttpResponse({
'msg': '''Problem: your map list is too small (currently %d terms).
Solution: Add some terms (more than %d terms)
in order to get a graph.
You can manage your map terms here:
%s
''' % ( data["length"]
, graph_constraints['mapList']
, format_html(link)
),
}, status=400)
elif data["state"] == "corpusMax":
# async data case
link = 'http://%s/projects/%d/corpora/%d/myGraphs' % (request.get_host(), corpus.parent_id, corpus.id)
return JsonHttpResponse({
'msg': '''Warning: Async graph generation since your corpus is
big (about %d documents).
Wait a while and discover your graph very soon.
Click on the link below and see your current graph
processing on top of the list:
%s
''' % (data["length"], format_html(link)),
}, status=200)
else :
return JsonHttpResponse({
'msg': '''Programming error.''',
}, status=400)
elif len(data["nodes"]) < 2 and len(data["links"]) < 2:
# empty data case
return JsonHttpResponse({
'msg': '''Empty graph warning
No cooccurences found in this corpus for the words of this maplist
(maybe add more terms to the maplist or increase the size of your
corpus ?)''',
}, status=400)
else:
# parameters error case
return JsonHttpResponse({
'msg': '''Usage warning
Please choose only one field from each range:
- "field1": %s
- "field2": %s
- "options": %s''' % (accepted_field1, accepted_field2, options)
}, status=400)
# for any other errors that we forgot to test
except Exception as error:
print(error)
return JsonHttpResponse({
'msg' : 'Unknown error (showing the trace):\n%s' % "\n".join(format_tb(error.__traceback__))
}, status=400)
{% extends "pages/menu.html" %}
{% load staticfiles %}
{% block css %}
<link rel="stylesheet" href="{% static "lib/jquery/1.11.2/jquery-ui.css" %}" media="screen">
<link rel="stylesheet" href="{% static "lib/graphExplorer/libs/css2/freshslider.css" %}" media="screen">
<link rel="stylesheet" href="{% static "lib/graphExplorer/libs/css2/custom.css" %}" media="screen">
<link rel="stylesheet" href="{% static "lib/graphExplorer/libs/css2/sidebar.css" %}" media="screen">
<style>
#topPapers{
margin: 7px;
padding: 5px 0px 5px 5px;
}
#topPapers ul {
list-style-type: none;
}
#ctlzoom {
position: absolute;
left: 1%;
width: 7%;
bottom: auto;
list-style: none;
padding: 0;
margin-top: 1em;
}
#category0 ul li { margin: 0 12px 12px 0; }
#category1 ul li { margin: 0 12px 12px 0; }
/* will be adjusted later to match rendered height of dafixedtop from menu template */
#topspace {
height: 90px;
}
</style>
<!--
<link rel="stylesheet" href="{% static "lib/graphExplorer/libs/bootswatch/css/bootswatch.css" %}">
<link rel="stylesheet" href="{% static "lib/graphExplorer/libs/css2/font.css" %}" type="text/css">
-->
</head>
{% endblock %}
{% block content %}
<!-- this is a vertical spacer used to start everything below dafixedtop -->
<div id="topspace">
</div>
<!-- this is the tweakbar -->
<div class="container-fluid navbar-default">
<div id="defaultop" class="row">
<div id="left" class="col-sm-8 col-md-8 col-lg-7">
<ul class="nav navbar-nav">
<li class="basicitem"><a class="help" style="display:inline-block;padding:15px;" id="edition">
</a></li>
<!--
<li>
<a>
<select id="aselector" onchange="console.log('salut monde')" class="selectpicker" data-style="btn btn-success btn-sm" data-width="auto">
<option value="Document" selected>Scholars</option>
<option value="NGram">Keywords</option>
</select>
</a>
</li>
-->
<li class='basicitem'><a>
<button type="button" id="changetype" class="btn btn-success btn-sm">Change Type</button>
</a></li>
<!-- <li class='basicitem'><a>
<button type="button" id="changelevel" class="btn btn-info btn-sm" disabled>Change Level</button>
</a></li> -->
<!-- TODO fix: category0 -> category1 switching -->
<li class='basicitem'><a>
<!-- Create a subgraph -->
<ul id="category0" class="nav">
<li><small class="help" id="nodeweight">Nodes</small> <div id="slidercat0nodesweight" class="settingslider"></div></li>
<li><small class="help" id="edgeweight">Edges</small> <div id="slidercat0edgesweight" class="settingslider"></div></li>
</ul>
</a></li>
<!-- <li class='basicitem'><a>
Compare<br>
<img width="30" title="Compare with other corpus!" onclick="GetUserPortfolio(); $('#corpuses').modal('show');" src="{% static "img/INTER.png" %}"></img>
</a></li> -->
<li class='basicitem'><a>
<!-- TODO fix: category0 -> category1 switching -->
<small class="help" id="labelsize">Label size<br></small>
<div id="slidercat0nodessize" class="settingslider"></div>
</a></li>
<li class='basicitem'><a >
<small class='help' id="colorgraph">Colors<br></small>
<div class="colorgraph_div"></div>
</a></li>
<li class='basicitem'><a>
<small class='help' id="sizegraph">Sizes<br></small>
<div class="sizegraph_div"></div>
</a></li>
<li class='basicitem'><a>
<small class='help' id="selectorsize">Selector size<br></small>
<div id="unranged-value" class="settingslider"></div>
</a></li>
<!--
<li>
<a>
<button type="button" onclick="partialGraph.stopForceAtlas2();" class="btn btn-sm">wu</button>
</a>
</li>
-->
</ul>
</div><!-- /div#left -->
<div id="right" class="col-sm-3 col-md-3 col-lg-4">
<div class='row' id="searchnav">
<div class="col-sm-1 col-md-1 col-lg-1" style="font-size:75%; line-height:90%; padding:0">
<input id="checkboxdiv" onclick="alertCheckBox(this);"
title="Add next search results to current selection"
class="btn btn-info"
type="checkbox"></input>
<small class='help' id="addgraph">Add</small>
</div>
<!-- the smaller the viewport, the larger the relative search box size -->
<div class="col-sm-11 col-md-11 col-lg-11" >
<div id="search_input_group" class="input-group input-group-sm">
<span class="input-group-btn">
<button id="searchbutton"
title="Search the topic in the map"
class="btn btn-info"
type="button">
<span class="glyphicon glyphicon-search">
</span>
</button>
</span>
<!-- ########## THE SEARCH BAR ########## -->
<input id="searchinput"
type="text"
class="form-control"
placeholder="Search" />
<!-- #################################### -->
<span id="search" class="input-group-btn help">
</span>
</div>
<!-- messages below the search bar -->
<div id="unused_msg"></div>
</div>
</div>
<!--
<ul class="nav navbar-nav navbar-right">
<li><a href="#">Link</a></li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown">Dropdown <b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a href="#">Action</a></li>
<li><a href="#">Another action</a></li>
<li><a href="#">Something else here</a></li>
<li class="divider"></li>
<li><a href="#">Separated link</a></li>
</ul>
</li>
</ul>
-->
</div>
</div><!-- /.row#defaultop -->
</div><!-- /.container.fluid -->
<div id="graphid" style="display:none">{{graphurl}}</div>
<input type="hidden" id="maplist_id" value="{{ maplist_id }}"></input>
<div id="jquerytemplatenb" style="display:none">{{user.id}}</div>
<div id="wrapper">
<div id="zonecentre">
<!-- Page content -->
<div id="sigma-example"></div>
<div style="visibility: hidden;" id="sigma-othergraph"></div>
<img id="semLoader" style="position:absolute; top:50%; left:40%; width:80px;" src="{% static "img/loading-bar.gif" %}"></img>
<ul id="ctlzoom">
<li >
<a style="display:inline-block;padding:5px;" id="exploration" class="help"></a>
</li>
<!-- <div class="content-header">
<button id="menu-toggle">X</button>
</div> -->
<!--
<li>
<a href="#" id="geomapicon" onclick="$('#geomapmodal').modal('show'); callGeomap();">
<img title="World Map Distribution" width="34px" src="{% static "img/world.png" %}" ></img>
</a>
</li>
-->
<li>
<a href="#" id="edgesButton"> </a>
</li>
<li>
<a href="#" id="lensButton"> </a>
</li>
<li>
<a href="#" id="zoomPlusButton" title="S'approcher"> </a>
</li>
<li id="zoomSliderzone">
<div id="zoomSlider"></div>
</li>
<li>
<a href="#" id="zoomMinusButton" title="S'éloigner"> </a>
</li>
<li>
<a href="#" id="snapicon" onclick="saveGraphIMG();" >
<img title="Take a photo!" width="34px" src="{% static "img/camera.png" %}" ></img>
</a>
</li>
<li>
<a href="#" id="saveAs">
<img width="30px" title="Save As..." src="{% static "img/save.png" %}" ></img>
</a>
</li>
</ul>
</div>
<!-- Sidebar -->
<div id="sidecolumn">
<div id="unfold" class="help">
<!-- button to be replaced by handle when sidecolumn is hidden -->
<a href="#" id="aUnfold" class="rightarrow"></a>
</div>
<div id="tips"></div>
<div id="names"></div>
<div id="ngrams_actions" class="help"></div>
<br>
<div id="tab-container" class='tab-container' style="display: none;">
<ul class='etabs'>
<li id="taboppos" class='tab'><a href="#tabs1">Opposite-Neighbors</a></li>
<li id="tabneigh" class='tab'><a href="#tabs2">Neighbors</a></li>
</ul>
<div class='panel-container'>
<div id="tabs1">
<div id="opossiteNodes"></div>
</div>
<div id="tabs2">
<div id="sameNodes"></div>
</div>
</div>
</div>
<!-- <div id="topPapers"></div> -->
<div id="tab-container-top" class='tab-container' style="display: none;">
<ul class='etabs'>
<li id="tabmed" class='tab active'><a id="pubs-legend" href="#tabs3">Pubs</a></li>
<li id="tabgps" class='tab'><a href="#tabs3"></a></li>
</ul>
<div class='panel-container'>
<div id="tabs3">
<div id="topPapers"></div>
</div>
<div id="tabs4">
<div id="topProposals"></div>
</div>
</div>
</div>
<div id="information"></div>
</div>
</div>
<div id="savemodal" class="modal fade">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>
<h4 class="modal-title">Exporting GEXF file</h4>
</div>
<div class="modal-body form-horizontal">
What do you want to save?:
<div class="form-group">
<label class="col-lg-2 control-label"> </label>
<div class="col-lg-10">
<div class="radio">
<label>
<input type="radio" name="optionsRadios" id="fullgraph" value="option1" checked="true">
Full Graph
</label>
</div>
<div class="radio">
<label>
<input type="radio" name="optionsRadios" id="visgraph" value="option2">
Visible Graph
</label>
</div>
</div>
</div>
</div>
<div class="modal-body form-horizontal">
Which attributes do you want to keep?:
<div class="form-group">
<label class="col-lg-2 control-label"> </label>
<div class="col-lg-10">
<div class="checkbox">
<label>
<input type="checkbox" name="optionsRadios" id="check_size" value="option1">
Size
</label>
</div>
<div class="checkbox">
<label>
<input type="checkbox" name="optionsRadios" id="check_color" value="option2">
Color
</label>
</div>
</div>
</div>
</div>
<div class="modal-footer">
<button id="closesavemodal" type="button" class="btn btn-default" data-dismiss="modal">Close</button>
<button type="button" class="btn btn-primary" onclick="saveGraph();">Save to local file</button>
<button type="button" class="btn btn-primary" onclick="saveInServer();">Save to cloud</button>
</div>
<div class="modal-footer" id="savemsg"></div>
</div>
</div>
</div>
<div id="msgmodal" class="modal fade">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>
<h4 id="msgtitle" class="modal-title">
Graph generation message
</h4>
</div>
<div class="modal-body form-horizontal">
<div id="msgcontent"></div>
</div>
<!--
<div class="modal-footer">
<a class="btn btn-default" href="http://gargantext.org" >Ok</a>
</div>
--!>
</div>
</div>
</div>
<div id="corpuses" class="modal fade">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>
<h3 class="modal-title">
<span class="glyphicon glyphicon-transfer" aria-hidden="true"></span>
Corpus Comparison Tool
</h3>
</div>
<div class="modal-body form-horizontal">
<h4>
<span class="glyphicon glyphicon-hand-right" aria-hidden="true"></span>
Choose one corpus among your projects:
</h4>
<div style="color:red;" id="selected_corpus"></div>
<div id="user_portfolio">
</div>
<div class="modal-footer">
<button id="closecorpuses" type="button" class="btn btn-default" data-dismiss="modal">
<span class="glyphicon glyphicon-remove" aria-hidden="true" ></span>
Close
</button>
<button id="add_corpus_tab" type="button" class="btn btn-primary" disabled onclick='printCorpuses();'>
<span class="glyphicon glyphicon-ok" aria-hidden="true" ></span>
Compare
</button>
</div>
</div>
</div>
</div>
<div id="modalloader" class="modal fade">
<div id="loader" class="loader">
<img src="{% static "img/ajax-loader.gif" %}" ></img>
</div>
<div id="closeloader" data-dismiss="modal"></div>
</div>
<script src="{% static "lib/jquery/1.11.1/jquery.min.js" %}" type="text/javascript"></script>
<script src="{% static "lib/jquery/1.11.2/jquery-ui.js" %}" type="text/javascript"></script>
<script src="{% static "lib/graphExplorer/libs/jquery/jquery.ba-dotimeout.min.js" %}" type="text/javascript"></script>
<script src="{% static "lib/graphExplorer/libs/jquery/jquery.mousewheel.min.js" %}" type="text/javascript"></script>
<script type="text/javascript" src="{% static "lib/graphExplorer/libs/freshslider.1.0.js" %}"></script>
<script type="text/javascript" src="{% static "lib/graphExplorer/libs/readmore.js" %}"></script>
<script type="text/javascript" src="{% static "lib/graphExplorer/libs/jquery/jquery.easytabs.min.js" %}"></script>
<script src="{% static "lib/graphExplorer/libs/bootstrap/js/bootstrap-modal.js" %}" type="text/javascript"></script>
<script src="{% static "lib/graphExplorer/libs/bootstrap/js/bootstrap-hover-dropdown.min.js" %}" type="text/javascript"></script>
<script src="{% static "lib/graphExplorer/tinawebJS/globalUtils.js" %}" type="text/javascript"></script>
<script src="{% static "lib/graphExplorer/tinawebJS/plugins/jLouvain.js" %}" type="text/javascript"></script>
<script src="{% static "lib/graphExplorer/tinawebJS/sigma.min.js" %}" type="text/javascript" language="javascript"></script>
<script src="{% static "lib/graphExplorer/tinawebJS/sigma.forceatlas2.js" %}" type="text/javascript" language="javascript"></script>
<script src="{% static "lib/graphExplorer/settings_explorerjs.js" %}" type="text/javascript" language="javascript"></script>
<script src="{% static "lib/graphExplorer/tinawebJS/sigma.parseCustom.js" %}" type="text/javascript" language="javascript"></script>
<script src="{% static "lib/graphExplorer/extras_explorerjs.js" %}" type="text/javascript" language="javascript"></script>
<script src="{% static "lib/graphExplorer/tinawebJS/sigmaUtils.js" %}" type="text/javascript" language="javascript"></script>
<script src="{% static "lib/graphExplorer/tinawebJS/methods.js" %}" type="text/javascript" language="javascript"></script>
<script src="{% static "lib/graphExplorer/tinawebJS/minimap.js" %}" type="text/javascript" language="javascript"></script>
<script src="{% static "lib/graphExplorer/tinawebJS/enviroment.js" %}" type="text/javascript" language="javascript"></script>
<script src="{% static "lib/graphExplorer/tinawebJS/asyncFA2.js" %}" type="text/javascript" language="javascript"></script>
<script src="{% static "lib/graphExplorer/tinawebJS/Tinaweb.js" %}" type="text/javascript" language="javascript"></script>
<script src="{% static "lib/graphExplorer/tinawebJS/main.js" %}" type="text/javascript" language="javascript"></script>
<script type="text/javascript">
// set exact height of topspace
var dafixedtopHeight = $('#dafixedtop').height() ;
$('#topspace').height(dafixedtopHeight) ;
function newPopup(url) {
popupWindow = window.open(
url,'popUpWindow','height=700,width=800,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=no,menubar=no,location=no,directories=no,status=no')
}
$('#tab-container').easytabs({updateHash:false});
// $('#tab-container-top').easytabs({updateHash:false});
function saveInServer() {
// send an ajax GET to server API with same params as in URL
var currentPath = window.location.pathname + window.location.search.replace(/#$/,'')
console.warn("currentPath", currentPath)
// current path shows it's already in db
if (currentPath.match(/\explorer\?cooc_id=[0-9]+$/)) {
var supposedLink = 'http://'+window.location.host+'/api'+currentPath
$('#savemsg').html(
'Your graph has its own "cooc_id" so it must be already saved in the cloud at:'
+'<a href='+supposedLink+'>'+supposedLink+'</a>'
)
}
// current path has normal params (field1 field2, etc.)
else {
$('#savemsg').html(
'Saving in server...'
)
var apiPath = 'http://'+window.location.host+'/api'+currentPath+'&saveOnly=True'
$.ajax({
url: apiPath ,
type: 'GET',
success: function(response) {
// the return message
$('#savemsg').html(response.msg.replace(/\n/g, '<br/>'))
},
error: function(result) {
$('#savemsg').html('Saving failed !')
console.warn('saveInServer problem:', result);
},
});
}
}
</script>
{% endblock %}
from django.conf.urls import url
# Module "Graph Explorer"
from .rest import Graph
from .views import explorer, myGraphs
from .intersection import intersection
# TODO : factor urls
# url will have this pattern:
# ^explorer/$corpus_id/view
# ^explorer/$corpus_id/data.json
# ^explorer/$corpus_id/intersection
# GET ^api/projects/(\d+)/corpora/(\d+)/explorer$ -> data in json format
urlpatterns = [ url(r'^projects/(\d+)/corpora/(\d+)/explorer$' , explorer )
, url(r'^projects/(\d+)/corpora/(\d+)/myGraphs$' , myGraphs )
, url(r'^explorer/intersection/(\w+)$' , intersection )
]
def compress_graph(graphdata):
"""
graph data is usually a dict with 2 slots:
"nodes": [{"id":4103, "type":"terms", "attributes":{"clust_default": 0}, "size":29, "label":"regard"},...]
"links": [{"t": 998,"s": 768,"w": 0.0425531914893617},...]
To send this data over the net, this function can reduce a lot of its size:
- keep less decimals for float value of each link's weight
- use shorter names for node properties (eg: s/clust_default/cl/)
result format:
"nodes": [{"id":4103, "at":{"cl": 0}, "s":29, "lb":"regard"},...]
"links": [{"t": 998,"s": 768,"w": 0.042},...]
"""
for link in graphdata['links']:
link['w'] = format(link['w'], '.3f') # keep only 3 decimals
for node in graphdata['nodes']:
node['lb'] = node['label']
del node['label']
#node['attributes']['growth'] = 0.8
node['at'] = node['attributes']
del node['attributes']
node['at']['cl'] = node['at']['clust_default']
del node['at']['clust_default']
node['s'] = node['size']
del node['size']
if node['type'] == "terms":
# its the default type for our format: so we don't need it
del node['type']
else:
node['t'] = node['type']
del node['type']
return graphdata
def format_html(link):
"""
Build an html link adapted to our json message format
"""
return "<a class='msglink' href='%s'>%s</a>" % (link, link)
from gargantext.util.http import *
from gargantext.util.db import *
from gargantext.util.db_cache import cache
from gargantext.models import *
from gargantext.constants import *
from gargantext.settings import *
from gargantext.constants import USER_LANG
from datetime import datetime
from gargantext.views.pages.main import get_user_params
@requires_auth
def explorer(request, project_id, corpus_id):
'''
Graph explorer, also known as TinaWebJS, using SigmaJS.
Nodes are ngrams (from title or abstract or journal name.
Links represent proximity measure.
Data are received in RESTfull mode (see rest.py).
'''
# we pass our corpus
corpus = cache.Node[corpus_id]
# security check
user = cache.User[request.user.id]
if corpus is None:
raise Http404()
if not user.owns(corpus):
return HttpResponseForbidden()
# get the maplist_id for modifications
maplist_id = corpus.children(typename="MAPLIST").first().id
# and the project just for project.id in corpusBannerTop
project = cache.Node[project_id]
# rendered page : explorer.html
return render(
template_name = 'explorer.html',
request = request,
context = {
'debug' : settings.DEBUG ,
'request' : request ,
'user' : request.user ,
'date' : datetime.now() ,
'project' : project ,
'corpus' : corpus ,
'maplist_id': maplist_id ,
'view' : 'graph' ,
'user_parameters': get_user_params(request.user),
'languages': USER_LANG
},
)
@requires_auth
def myGraphs(request, project_id, corpus_id):
'''
List all of my Graphs.
Each Graphs as one Node of Cooccurrences.
Each Graph is save in hyperdata of each Node.
'''
user = cache.User[request.user.id]
# we pass our corpus
corpus = cache.Node[corpus_id]
# and the project just for project.id in corpusBannerTop
project = cache.Node[project_id]
coocs = corpus.children('COOCCURRENCES', order=True).all()
coocs_count = dict()
for cooc in coocs:
# FIXME : approximativ number of nodes (not exactly what user sees in explorer)
# Need to be connected with Graph Clustering
cooc_nodes = (session.query(Ngram.id,func.count(Ngram.id))
.join(NodeNgramNgram, NodeNgramNgram.ngram1_id == Ngram.id)
.filter(NodeNgramNgram.node_id==cooc.id)
.filter(NodeNgramNgram.weight >= 1)
.group_by(Ngram.id)
.all()
)
#coocs_count[cooc.id] = len(cooc_nodes)
coocs_count[cooc.id] = len([cooc_node for cooc_node in cooc_nodes if cooc_node[1] > 1])
print("coocs_count a posteriori", coocs_count)
return render(
template_name = 'pages/corpora/myGraphs.html',
request = request,
context = {
'debug' : settings.DEBUG,
'request' : request,
'user' : request.user,
'date' : datetime.now(),
'project' : project,
'resourcename' : get_resource_by_name(corpus),
'corpus' : corpus,
'view' : 'myGraph',
'coocs' : coocs,
'coocs_count' : coocs_count,
'user_parameters': get_user_params(request.user),
'languages': USER_LANG,
},
)
......@@ -43,7 +43,6 @@ CELERYBEAT_SCHEDULER = 'djcelery.schedulers.DatabaseScheduler'
CELERY_IMPORTS = (
"gargantext.util.toolchain",
"gargantext.util.crawlers",
"gargantext.graph.graph",
"gargantext.moissonneurs.pubmed",
"gargantext.moissonneurs.istex",
"gargantext.util.ngramlists_tools",
......@@ -65,7 +64,6 @@ INSTALLED_APPS = [
'rest_framework',
'djcelery',
'gargantext.annotations',
'gargantext.graph',
'gargantext.moissonneurs',
'gargantext',
]
......
......@@ -5,7 +5,6 @@ Views are shared between these modules:
- `pages`, to present HTML views to the user
- `contents`, for Python-generated contents
- `annotations`, to annotate local context of a corpus (as global context)
- `graph explorer`, to explore graphs
"""
from django.conf.urls import include, url
......@@ -21,9 +20,6 @@ import gargantext.views.pages.urls
from gargantext.annotations import urls as annotations_urls
from gargantext.annotations.views import main as annotations_main_view
# Module for graph service
import gargantext.graph.urls
# Module Scrapers
import gargantext.moissonneurs.urls
......@@ -34,9 +30,6 @@ urlpatterns = [ url(r'^admin/' , admin.site.urls
, url(r'^favicon.ico$', Redirect.as_view( url=static.url('favicon.ico')
, permanent=False), name="favicon" )
# Module Graph
, url(r'^' , include( gargantext.graph.urls ) )
# Module Annotation
# tempo: unchanged doc-annotations routes --
, url(r'^annotations/', include( annotations_urls ) )
......
......@@ -10,7 +10,7 @@ from . import ngrams
from . import metrics
from . import ngramlists
from . import analytics
from gargantext.graph.rest import Graph
urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view())
, url(r'^nodes/(\d+)$' , nodes.NodeResource.as_view())
......@@ -37,14 +37,6 @@ urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view()
# Metrics
, url(r'^projects/(\d+)/corpora/(\d+)/metrics$', metrics.CorpusMetrics.as_view())
# GraphExplorer
, url(r'^projects/(\d+)/corpora/(\d+)/explorer$', Graph.as_view())
# data for graph explorer (json)
# GET /api/projects/43198/corpora/111107/explorer?
# Corresponding view is : /projects/43198/corpora/111107/explorer?
# Parameters (example):
# explorer?field1=ngrams&field2=ngrams&distance=conditional&bridgeness=5&start=1996-6-1&end=2002-10-5
# Ngrams
, url(r'^ngrams/?$' , ngrams.ApiNgrams.as_view())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment