Commit a5449bd7 authored by delanoe's avatar delanoe

[URLS] Cleaning the urls, adding goals to factor graph urls (+ fix a...

[URLS] Cleaning the urls, adding goals to factor graph urls (+ fix a deprecated function in distances.py)
parent 41486f88
...@@ -8,9 +8,10 @@ Views are shared between these modules: ...@@ -8,9 +8,10 @@ Views are shared between these modules:
- `graph explorer`, to explore graphs - `graph explorer`, to explore graphs
""" """
from django.conf.urls import include, url from django.conf.urls import include, url
from django.contrib import admin
from django.contrib import admin from django.views.generic.base import RedirectView as Redirect
from django.contrib.staticfiles.storage import staticfiles_storage as static
import gargantext.views.api.urls import gargantext.views.api.urls
import gargantext.views.pages.urls import gargantext.views.pages.urls
...@@ -22,28 +23,26 @@ from annotations.views import main as annotations_main_view ...@@ -22,28 +23,26 @@ from annotations.views import main as annotations_main_view
# Module "Graph Explorer" # Module "Graph Explorer"
#from graphExplorer import urls as graphExplorer_urls #from graphExplorer import urls as graphExplorer_urls
from graphExplorer.rest import Graph import graphExplorer.urls
from graphExplorer.views import explorer
# Module Scrapers # Module Scrapers
from scrapers import urls as scrapers_urls import scrapers.urls
urlpatterns = [ url(r'^admin/' , admin.site.urls ) urlpatterns = [ url(r'^admin/' , admin.site.urls )
, url(r'^api/' , include( gargantext.views.api.urls ) ) , url(r'^api/' , include( gargantext.views.api.urls ) )
, url(r'^' , include( gargantext.views.pages.urls ) ) , url(r'^' , include( gargantext.views.pages.urls ) )
, url(r'^favicon.ico$', Redirect.as_view( url=static.url('favicon.ico')
, permanent=False), name="favicon")
# Module "Graph Explorer"
, url(r'^' , include( graphExplorer.urls ) )
# Module Annotation # Module Annotation
# tempo: unchanged doc-annotations routes -- # tempo: unchanged doc-annotations routes --
, url(r'^annotations/', include( annotations_urls ) ) , url(r'^annotations/', include( annotations_urls ) )
, url(r'^projects/(\d+)/corpora/(\d+)/documents/(\d+)/$', annotations_main_view) , url(r'^projects/(\d+)/corpora/(\d+)/documents/(\d+)/$', annotations_main_view)
# Module "Graph Explorer" # Module Scrapers
, url(r'^projects/(\d+)/corpora/(\d+)/explorer$', explorer ) , url(r'^scrapers/' , include( scrapers.urls ) )
, url(r'^projects/(\d+)/corpora/(\d+)/graph$' , Graph.as_view())
# to be removed:
, url(r'^projects/(\d+)/corpora/(\d+)/node_link.json$', Graph.as_view())
#url(r'^projects/(\d+)/corpora/(\d+)/explorer$', include(graphExplorer.urls))
# Scrapers module
, url(r'^scrapers/' , include( scrapers_urls ) )
] ]
...@@ -20,7 +20,7 @@ def clusterByDistances( cooc_id ...@@ -20,7 +20,7 @@ def clusterByDistances( cooc_id
''' '''
do_distance :: Int -> (Graph, Partition, {ids}, {weight}) do_distance :: Int -> (Graph, Partition, {ids}, {weight})
''' '''
# implicit global session # implicit global session
authorized = ['conditional', 'distributional', 'cosine'] authorized = ['conditional', 'distributional', 'cosine']
...@@ -35,7 +35,7 @@ def clusterByDistances( cooc_id ...@@ -35,7 +35,7 @@ def clusterByDistances( cooc_id
Cooc = aliased(NodeNgramNgram) Cooc = aliased(NodeNgramNgram)
query = session.query(Cooc).filter(Cooc.node_id==cooc_id).all() query = session.query(Cooc).filter(Cooc.node_id==cooc_id).all()
for cooc in query: for cooc in query:
matrix[cooc.ngram1_id][cooc.ngram2_id] = cooc.weight matrix[cooc.ngram1_id][cooc.ngram2_id] = cooc.weight
matrix[cooc.ngram2_id][cooc.ngram1_id] = cooc.weight matrix[cooc.ngram2_id][cooc.ngram1_id] = cooc.weight
...@@ -60,8 +60,8 @@ def clusterByDistances( cooc_id ...@@ -60,8 +60,8 @@ def clusterByDistances( cooc_id
# top generic or specific # top generic or specific
m = ( xs - ys) / (2 * (x.shape[0] - 1)) m = ( xs - ys) / (2 * (x.shape[0] - 1))
n = n.sort(inplace=False) n = n.sort_index(inplace=False)
m = m.sort(inplace=False) m = m.sort_index(inplace=False)
nodes_included = 500 #int(round(size/20,0)) nodes_included = 500 #int(round(size/20,0))
#nodes_excluded = int(round(size/10,0)) #nodes_excluded = int(round(size/10,0))
...@@ -88,7 +88,7 @@ def clusterByDistances( cooc_id ...@@ -88,7 +88,7 @@ def clusterByDistances( cooc_id
G = nx.from_numpy_matrix(np.matrix(matrix_filtered)) G = nx.from_numpy_matrix(np.matrix(matrix_filtered))
G = nx.relabel_nodes(G, dict(enumerate([ ids[id_][1] for id_ in list(xx.columns)]))) G = nx.relabel_nodes(G, dict(enumerate([ ids[id_][1] for id_ in list(xx.columns)])))
elif distance == 'cosine': elif distance == 'cosine':
scd = defaultdict(lambda : defaultdict(int)) scd = defaultdict(lambda : defaultdict(int))
...@@ -101,16 +101,16 @@ def clusterByDistances( cooc_id ...@@ -101,16 +101,16 @@ def clusterByDistances( cooc_id
if i != j and k != i and k != j if i != j and k != i and k != j
] ]
) )
denominator = sqrt( denominator = sqrt(
sum([ sum([
matrix[i][k] matrix[i][k]
for k in matrix.keys() for k in matrix.keys()
if k != i and k != j #and matrix[i][k] > 0 if k != i and k != j #and matrix[i][k] > 0
]) ])
* *
sum([ sum([
matrix[i][k] matrix[i][k]
for k in matrix.keys() for k in matrix.keys()
if k != i and k != j #and matrix[i][k] > 0 if k != i and k != j #and matrix[i][k] > 0
]) ])
...@@ -127,7 +127,7 @@ def clusterByDistances( cooc_id ...@@ -127,7 +127,7 @@ def clusterByDistances( cooc_id
G = nx.DiGraph() G = nx.DiGraph()
G.add_edges_from( G.add_edges_from(
[ [
(i, j, {'weight': scd[i][j]}) (i, j, {'weight': scd[i][j]})
for i in scd.keys() for j in scd.keys() for i in scd.keys() for j in scd.keys()
if i != j and scd[i][j] > minmax and scd[i][j] > scd[j][i] if i != j and scd[i][j] > minmax and scd[i][j] > scd[j][i]
] ]
...@@ -138,16 +138,16 @@ def clusterByDistances( cooc_id ...@@ -138,16 +138,16 @@ def clusterByDistances( cooc_id
elif distance == 'distributional': elif distance == 'distributional':
mi = defaultdict(lambda : defaultdict(int)) mi = defaultdict(lambda : defaultdict(int))
total_cooc = x.sum().sum() total_cooc = x.sum().sum()
for i in matrix.keys(): for i in matrix.keys():
si = sum([matrix[i][j] for j in matrix[i].keys() if i != j]) si = sum([matrix[i][j] for j in matrix[i].keys() if i != j])
for j in matrix[i].keys(): for j in matrix[i].keys():
sj = sum([matrix[j][k] for k in matrix[j].keys() if j != k]) sj = sum([matrix[j][k] for k in matrix[j].keys() if j != k])
if i!=j : if i!=j :
mi[i][j] = log( matrix[i][j] / ((si * sj) / total_cooc) ) mi[i][j] = log( matrix[i][j] / ((si * sj) / total_cooc) )
r = defaultdict(lambda : defaultdict(int)) r = defaultdict(lambda : defaultdict(int))
for i in matrix.keys(): for i in matrix.keys():
for j in matrix.keys(): for j in matrix.keys():
sumMin = sum( sumMin = sum(
...@@ -157,10 +157,10 @@ def clusterByDistances( cooc_id ...@@ -157,10 +157,10 @@ def clusterByDistances( cooc_id
if i != j and k != i and k != j and mi[i][k] > 0 if i != j and k != i and k != j and mi[i][k] > 0
] ]
) )
sumMi = sum( sumMi = sum(
[ [
mi[i][k] mi[i][k]
for k in matrix.keys() for k in matrix.keys()
if k != i and k != j and mi[i][k] > 0 if k != i and k != j and mi[i][k] > 0
] ]
...@@ -170,19 +170,19 @@ def clusterByDistances( cooc_id ...@@ -170,19 +170,19 @@ def clusterByDistances( cooc_id
r[i][j] = sumMin / sumMi r[i][j] = sumMin / sumMi
except Exception as error: except Exception as error:
r[i][j] = 0 r[i][j] = 0
# Need to filter the weak links, automatic threshold here # Need to filter the weak links, automatic threshold here
minmax = min([ max([ r[i][j] for i in r.keys()]) for j in r.keys()]) minmax = min([ max([ r[i][j] for i in r.keys()]) for j in r.keys()])
G = nx.DiGraph() G = nx.DiGraph()
G.add_edges_from( G.add_edges_from(
[ [
(i, j, {'weight': r[i][j]}) (i, j, {'weight': r[i][j]})
for i in r.keys() for j in r.keys() for i in r.keys() for j in r.keys()
if i != j and r[i][j] > minmax and r[i][j] > r[j][i] if i != j and r[i][j] > minmax and r[i][j] > r[j][i]
] ]
) )
# degree_max = max([(n, d) for n,d in G.degree().items()], key=itemgetter(1))[1] # degree_max = max([(n, d) for n,d in G.degree().items()], key=itemgetter(1))[1]
# nodes_to_remove = [n for (n,d) in G.degree().items() if d <= round(degree_max/2)] # nodes_to_remove = [n for (n,d) in G.degree().items() if d <= round(degree_max/2)]
# G.remove_nodes_from(nodes_to_remove) # G.remove_nodes_from(nodes_to_remove)
...@@ -197,7 +197,7 @@ def clusterByDistances( cooc_id ...@@ -197,7 +197,7 @@ def clusterByDistances( cooc_id
def getWeight(item): def getWeight(item):
return item[1] return item[1]
# #
# node_degree = sorted(G.degree().items(), key=getWeight, reverse=True) # node_degree = sorted(G.degree().items(), key=getWeight, reverse=True)
# #print(node_degree) # #print(node_degree)
# nodes_too_connected = [n[0] for n in node_degree[0:(round(len(node_degree)/5))]] # nodes_too_connected = [n[0] for n in node_degree[0:(round(len(node_degree)/5))]]
......
from django.conf.urls import patterns, url from django.conf.urls import patterns, url
from graphExplorer import views
# /!\ urls patterns here are *without* the trailing slash #from graphExplorer import views
from graphExplorer.intersection import getCorpusIntersection
urlpatterns = patterns('', # Module "Graph Explorer"
url(r'^register/$', views.Register.as_view()), # Register from graphExplorer.rest import Graph
url(r'^login/$', views.Login.as_view()), # Login from graphExplorer.views import explorer
from graphExplorer.intersection import getCorpusIntersection
)
# TODO : factor urls
# url will have this pattern:
# ^explorer/$corpus_id/view
# ^explorer/$corpus_id/data.json
# ^explorer/$corpus_id/intersection
urlpatterns = [ url(r'^explorer/intersection/(\w+)$', getCorpusIntersection )
, url(r'^projects/(\d+)/corpora/(\d+)/explorer$', explorer )
, url(r'^projects/(\d+)/corpora/(\d+)/graph$' , Graph.as_view())
, url(r'^projects/(\d+)/corpora/(\d+)/node_link.json$', Graph.as_view())
]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment