Commit c0c0431c authored by PkSM3's avatar PkSM3

Merge branch 'unstable' of ssh://delanoe.org:1979/gargantext into samuel

parents 082984a9 8c0e06ad
......@@ -7,6 +7,7 @@ from gargantext_web.db import Node, Ngram, NodeNgram, NodeNgramNgram, \
NodeNodeNgram, NodeHyperdata, Hyperdata
from gargantext_web.db import session, cache, get_or_create_node, bulk_insert
from analysis.lists import WeightedMatrix, UnweightedList, Translations
import inspect
# keep list
......@@ -27,7 +28,7 @@ def cooc(corpus=None
stop_id :: Int
group_id :: Int
For the moment, start and ens are simple, only year is implemented yet
For the moment, start and end are simple, only year is implemented yet
start :: TimeStamp -- example: '2010-05-30 02:00:00+02'
end :: TimeStamp
limit :: Int
......@@ -37,17 +38,17 @@ def cooc(corpus=None
, name_str="Cooccurrences corpus " + str(corpus.id) + "list_id: " + str(miam_id)
)
# TODO : save parameters in Node
# args, _, _, parameters = inspect.getargvalues(inspect.currentframe())
# print(parameters)
# for parameter in parameters.keys():
# print(parameters[parameter])
# node_cooc.hyperdata[parameter] = parameters[parameter]
#
# session.add(node_cooc)
# session.commit()
# print(node_cooc.hyperdata)
args, _, _, parameters = inspect.getargvalues(inspect.currentframe())
hyperdata = dict()
for parameter in parameters.keys():
if parameter != 'corpus' and parameter != 'node_cooc':
hyperdata[parameter] = parameters[parameter]
node_cooc.hyperdata = hyperdata
session.add(node_cooc)
session.commit()
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==node_cooc.id).delete()
session.commit()
......@@ -109,7 +110,8 @@ def cooc(corpus=None
# Cooc is symetric, take only the main cooccurrences and cut at the limit
cooc_query = (cooc_query
.filter(NodeNgramX.ngram_id < NodeNgramY.ngram_id)
.having(cooc_score > 1)
.having(cooc_score > 2)
#.having(cooc_score > 1)
.group_by(NodeNgramX.ngram_id, NodeNgramY.ngram_id)
.order_by(desc('cooc_score'))
......
......@@ -21,7 +21,8 @@ import networkx as nx
from networkx.readwrite import json_graph
from rest_v1_0.api import JsonHttpResponse
from analysis.louvain import best_partition
from analysis.louvain import best_partition, generate_dendogram, partition_at_level
from ngram.lists import listIds
......@@ -229,10 +230,10 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
#print(n)
#print(m)
nodes_included = 200 #int(round(size/20,0))
nodes_included = 300 #int(round(size/20,0))
#nodes_excluded = int(round(size/10,0))
nodes_specific = 200 #int(round(size/10,0))
nodes_specific = 300 #int(round(size/10,0))
#nodes_generic = int(round(size/10,0))
# TODO user the included score for the node size
......@@ -263,11 +264,11 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
#edges_to_remove = [ e for e in G.edges_iter() if
degree = G.degree()
nodes_to_remove = [n for n in degree if degree[n] ==0]
nodes_to_remove = [n for n in degree if degree[n] <= 1]
G.remove_nodes_from(nodes_to_remove)
uG = G.to_undirected()
partition = best_partition(uG)
print(partition)
print("Density of the graph:", nx.density(G))
except:
print("-" * 30)
......@@ -315,7 +316,8 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
except Exception as error:
print("error02: ",error)
data = json_graph.node_link_data(G)
elif type == 'bestpartition':
return(partition)
# data = json_graph.node_link_data(G, attrs={\
# 'source':'source',\
......@@ -325,5 +327,5 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
# #'color':'color',\
# 'id':'id',})
#print(data)
return data
return(data)
......@@ -24,9 +24,10 @@ import array
def partition_at_level(dendogram, level) :
"""Return the partition of the nodes at the given level
A dendogram is a tree and each level is a partition of the graph nodes.
Level 0 is the first partition, which contains the smallest communities, and the best is len(dendogram) - 1.
The higher the level is, the bigger are the communities
A dendogram is a tree and each level is a partition of the graph
nodes. Level 0 is the first partition, which contains the smallest
communities, and the best is len(dendogram) - 1. The higher the
level is, the bigger are the communities
Parameters
----------
......@@ -47,20 +48,22 @@ def partition_at_level(dendogram, level) :
See Also
--------
best_partition which directly combines partition_at_level and generate_dendogram to obtain the partition of highest modularity
best_partition which directly combines partition_at_level and
generate_dendogram to obtain the partition of highest modularity
Examples
--------
>>> G=nx.erdos_renyi_graph(100, 0.01)
>>> dendo = generate_dendogram(G)
>>> for level in range(len(dendo) - 1) :
>>> print "partition at level", level, "is", partition_at_level(dendo, level)
>>> print("partition at level", level, "is", partition_at_level(dendo, level))
"""
partition = dendogram[0].copy()
for index in range(1, level + 1) :
for node, community in tuple(partition.items()) :
partition[node] = dendogram[index][community]
return partition
return(partition)
def modularity(partition, graph) :
......@@ -191,7 +194,10 @@ def best_partition(graph, partition = None) :
def generate_dendogram(graph, part_init = None) :
"""Find communities in the graph and return the associated dendogram
A dendogram is a tree and each level is a partition of the graph nodes. Level 0 is the first partition, which contains the smallest communities, and the best is len(dendogram) - 1. The higher the level is, the bigger are the communities
A dendogram is a tree and each level is a partition of the graph
nodes. Level 0 is the first partition, which contains the smallest
communities, and the best is len(dendogram) - 1. The higher the level
is, the bigger are the communities
Parameters
......@@ -199,13 +205,17 @@ def generate_dendogram(graph, part_init = None) :
graph : networkx.Graph
the networkx graph which will be decomposed
part_init : dict, optionnal
the algorithm will start using this partition of the nodes. It's a dictionary where keys are their nodes and values the communities
the algorithm will start using this partition of the nodes. It's a
dictionary where keys are their nodes and values the communities
Returns
-------
dendogram : list of dictionaries
a list of partitions, ie dictionnaries where keys of the i+1 are the values of the i. and where keys of the first are the nodes of graph
a list of partitions, ie dictionnaries where keys of the i+1 are the
values of the i. and where keys of the first are the nodes of graph
Raises
------
TypeError
......@@ -270,7 +280,8 @@ def generate_dendogram(graph, part_init = None) :
def induced_graph(partition, graph) :
"""Produce the graph where nodes are the communities
there is a link of weight w between communities if the sum of the weights of the links between their elements is w
there is a link of weight w between communities if the sum of the
weights of the links between their elements is w
Parameters
----------
......@@ -383,11 +394,11 @@ def __one_level(graph, status) :
incr = dnc - status.degrees.get(com, 0.) * degc_totw
if incr > best_increase :
best_increase = incr
best_com = com
best_com = com
__insert(node, best_com,
neigh_communities.get(best_com, 0.), status)
if best_com != com_node :
modif = True
modif = True
new_mod = __modularity(status)
if new_mod - cur_mod < __MIN :
break
......
# Without this, we couldn't use the Django environment
from admin.env import *
from gargantext_web.views import empty_trash
empty_trash()
......@@ -50,7 +50,6 @@ def apply_workflow(corpus_id):
#ngrams2miam(user_id=corpus.user_id, corpus_id=corpus_id)
update_processing(corpus, 0)
#@transaction.commit_manually
@shared_task
def empty_trash(corpus_id):
nodes = models.Node.objects.filter(type_id=cache.NodeType['Trash'].id).all()
......
......@@ -430,11 +430,11 @@ def move_to_trash(node_id):
if DEBUG is False :
# TODO for the future maybe add id of node
empty_trash.apply_async("corpus_id")
empty_trash.apply_async([1,])
else:
empty_trash("corpus_id")
return(previous_type_id)
#return(previous_type_id)
except Exception as error:
print("can not move to trash Node" + str(node_id) + ":" + str(error))
......@@ -471,18 +471,16 @@ def delete_node(request, node_id):
if node.user_id != user.id:
return HttpResponseForbidden()
previous_type_id = move_to_trash(node_id)
previous_type_id = node.type_id
node_parent_id = node.parent_id
move_to_trash(node_id)
if previous_type_id == cache.NodeType['Corpus'].id:
return HttpResponseRedirect('/project/' + str(node.parent_id))
return HttpResponseRedirect('/project/' + str(node_parent_id))
else:
return HttpResponseRedirect('/projects/')
if settings.DEBUG == True:
empty_trash()
def delete_corpus(request, project_id, node_id):
# ORM Django
with transaction.atomic():
......
......@@ -13,12 +13,18 @@ from analysis.cooccurrences import cooc
from gargantext_web.db import session, cache, get_or_create_node, bulk_insert
from gargantext_web.db import NodeNgramNgram, NodeNodeNgram
from sqlalchemy import desc, asc, or_, and_, Date, cast, select
def specificity(cooc_id=None, corpus=None):
def specificity(cooc_id=None, corpus=None, limit=100):
'''
Compute the specificity, simple calculus.
'''
cooccurrences = session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).all()
cooccurrences = (session.query(NodeNgramNgram)
.filter(NodeNgramNgram.node_id==cooc_id)
.order_by(NodeNgramNgram.score)
.limit(limit)
)
matrix = defaultdict(lambda : defaultdict(float))
......@@ -61,7 +67,7 @@ def compute_specificity(corpus,limit=100):
list_cvalue = get_or_create_node(nodetype='Cvalue', corpus=corpus)
cooc_id = cooc(corpus=corpus, cvalue_id=list_cvalue.id,limit=limit)
specificity(cooc_id=cooc_id,corpus=corpus)
specificity(cooc_id=cooc_id,corpus=corpus,limit=limit)
dbg.show('specificity')
......
......@@ -6,6 +6,8 @@ from ngram.specificity import compute_specificity
from ngram.group import compute_groups
from ngram.miam import compute_miam
from gargantext_web.db import get_or_create_node
#from gargantext_web.celery import update_processing
def ngram_workflow(corpus, n=5000):
'''
......@@ -17,21 +19,27 @@ def ngram_workflow(corpus, n=5000):
compute_cvalue(corpus,limit=part) # size
part = round(part * 0.6)
part = round(part * 0.4)
print('spec part:', part)
compute_specificity(corpus,limit=part)
part = round(part * 0.5)
# compute_stop(corpus)
compute_groups(corpus,limit_inf=part, limit_sup=n)
limit_inf = round(part * 1)
limit_sup = round(part * 5)
print(limit_inf,limit_sup)
compute_groups(corpus,limit_inf=limit_inf, limit_sup=limit_sup)
# compute_miam(corpus,limit=part) # size
compute_tfidf(corpus)
#corpus=session.query(Node).filter(Node.id==244250).first()
#corpus=session.query(Node).filter(Node.id==257579).first()
#ngram_workflow(corpus)
#update_processing(corpus, 0)
#cvalue = get_or_create_node(corpus=corpus,nodetype='Cvalue')
#print(session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==cvalue.id).count())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment