Commit c0c0431c authored by PkSM3's avatar PkSM3

Merge branch 'unstable' of ssh://delanoe.org:1979/gargantext into samuel

parents 082984a9 8c0e06ad
...@@ -7,6 +7,7 @@ from gargantext_web.db import Node, Ngram, NodeNgram, NodeNgramNgram, \ ...@@ -7,6 +7,7 @@ from gargantext_web.db import Node, Ngram, NodeNgram, NodeNgramNgram, \
NodeNodeNgram, NodeHyperdata, Hyperdata NodeNodeNgram, NodeHyperdata, Hyperdata
from gargantext_web.db import session, cache, get_or_create_node, bulk_insert from gargantext_web.db import session, cache, get_or_create_node, bulk_insert
from analysis.lists import WeightedMatrix, UnweightedList, Translations from analysis.lists import WeightedMatrix, UnweightedList, Translations
import inspect
# keep list # keep list
...@@ -27,7 +28,7 @@ def cooc(corpus=None ...@@ -27,7 +28,7 @@ def cooc(corpus=None
stop_id :: Int stop_id :: Int
group_id :: Int group_id :: Int
For the moment, start and ens are simple, only year is implemented yet For the moment, start and end are simple, only year is implemented yet
start :: TimeStamp -- example: '2010-05-30 02:00:00+02' start :: TimeStamp -- example: '2010-05-30 02:00:00+02'
end :: TimeStamp end :: TimeStamp
limit :: Int limit :: Int
...@@ -37,17 +38,17 @@ def cooc(corpus=None ...@@ -37,17 +38,17 @@ def cooc(corpus=None
, name_str="Cooccurrences corpus " + str(corpus.id) + "list_id: " + str(miam_id) , name_str="Cooccurrences corpus " + str(corpus.id) + "list_id: " + str(miam_id)
) )
# TODO : save parameters in Node args, _, _, parameters = inspect.getargvalues(inspect.currentframe())
# args, _, _, parameters = inspect.getargvalues(inspect.currentframe())
# print(parameters)
# for parameter in parameters.keys():
# print(parameters[parameter])
# node_cooc.hyperdata[parameter] = parameters[parameter]
#
# session.add(node_cooc)
# session.commit()
# print(node_cooc.hyperdata)
hyperdata = dict()
for parameter in parameters.keys():
if parameter != 'corpus' and parameter != 'node_cooc':
hyperdata[parameter] = parameters[parameter]
node_cooc.hyperdata = hyperdata
session.add(node_cooc)
session.commit()
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==node_cooc.id).delete() session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==node_cooc.id).delete()
session.commit() session.commit()
...@@ -109,7 +110,8 @@ def cooc(corpus=None ...@@ -109,7 +110,8 @@ def cooc(corpus=None
# Cooc is symetric, take only the main cooccurrences and cut at the limit # Cooc is symetric, take only the main cooccurrences and cut at the limit
cooc_query = (cooc_query cooc_query = (cooc_query
.filter(NodeNgramX.ngram_id < NodeNgramY.ngram_id) .filter(NodeNgramX.ngram_id < NodeNgramY.ngram_id)
.having(cooc_score > 1) .having(cooc_score > 2)
#.having(cooc_score > 1)
.group_by(NodeNgramX.ngram_id, NodeNgramY.ngram_id) .group_by(NodeNgramX.ngram_id, NodeNgramY.ngram_id)
.order_by(desc('cooc_score')) .order_by(desc('cooc_score'))
......
...@@ -21,7 +21,8 @@ import networkx as nx ...@@ -21,7 +21,8 @@ import networkx as nx
from networkx.readwrite import json_graph from networkx.readwrite import json_graph
from rest_v1_0.api import JsonHttpResponse from rest_v1_0.api import JsonHttpResponse
from analysis.louvain import best_partition from analysis.louvain import best_partition, generate_dendogram, partition_at_level
from ngram.lists import listIds from ngram.lists import listIds
...@@ -229,10 +230,10 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz ...@@ -229,10 +230,10 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
#print(n) #print(n)
#print(m) #print(m)
nodes_included = 200 #int(round(size/20,0)) nodes_included = 300 #int(round(size/20,0))
#nodes_excluded = int(round(size/10,0)) #nodes_excluded = int(round(size/10,0))
nodes_specific = 200 #int(round(size/10,0)) nodes_specific = 300 #int(round(size/10,0))
#nodes_generic = int(round(size/10,0)) #nodes_generic = int(round(size/10,0))
# TODO user the included score for the node size # TODO user the included score for the node size
...@@ -263,11 +264,11 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz ...@@ -263,11 +264,11 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
#edges_to_remove = [ e for e in G.edges_iter() if #edges_to_remove = [ e for e in G.edges_iter() if
degree = G.degree() degree = G.degree()
nodes_to_remove = [n for n in degree if degree[n] ==0] nodes_to_remove = [n for n in degree if degree[n] <= 1]
G.remove_nodes_from(nodes_to_remove) G.remove_nodes_from(nodes_to_remove)
uG = G.to_undirected() uG = G.to_undirected()
partition = best_partition(uG) partition = best_partition(uG)
print(partition)
print("Density of the graph:", nx.density(G)) print("Density of the graph:", nx.density(G))
except: except:
print("-" * 30) print("-" * 30)
...@@ -315,7 +316,8 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz ...@@ -315,7 +316,8 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
except Exception as error: except Exception as error:
print("error02: ",error) print("error02: ",error)
data = json_graph.node_link_data(G) data = json_graph.node_link_data(G)
elif type == 'bestpartition':
return(partition)
# data = json_graph.node_link_data(G, attrs={\ # data = json_graph.node_link_data(G, attrs={\
# 'source':'source',\ # 'source':'source',\
...@@ -325,5 +327,5 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz ...@@ -325,5 +327,5 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
# #'color':'color',\ # #'color':'color',\
# 'id':'id',}) # 'id':'id',})
#print(data) #print(data)
return data return(data)
...@@ -24,9 +24,10 @@ import array ...@@ -24,9 +24,10 @@ import array
def partition_at_level(dendogram, level) : def partition_at_level(dendogram, level) :
"""Return the partition of the nodes at the given level """Return the partition of the nodes at the given level
A dendogram is a tree and each level is a partition of the graph nodes. A dendogram is a tree and each level is a partition of the graph
Level 0 is the first partition, which contains the smallest communities, and the best is len(dendogram) - 1. nodes. Level 0 is the first partition, which contains the smallest
The higher the level is, the bigger are the communities communities, and the best is len(dendogram) - 1. The higher the
level is, the bigger are the communities
Parameters Parameters
---------- ----------
...@@ -47,20 +48,22 @@ def partition_at_level(dendogram, level) : ...@@ -47,20 +48,22 @@ def partition_at_level(dendogram, level) :
See Also See Also
-------- --------
best_partition which directly combines partition_at_level and generate_dendogram to obtain the partition of highest modularity
best_partition which directly combines partition_at_level and
generate_dendogram to obtain the partition of highest modularity
Examples Examples
-------- --------
>>> G=nx.erdos_renyi_graph(100, 0.01) >>> G=nx.erdos_renyi_graph(100, 0.01)
>>> dendo = generate_dendogram(G) >>> dendo = generate_dendogram(G)
>>> for level in range(len(dendo) - 1) : >>> for level in range(len(dendo) - 1) :
>>> print "partition at level", level, "is", partition_at_level(dendo, level) >>> print("partition at level", level, "is", partition_at_level(dendo, level))
""" """
partition = dendogram[0].copy() partition = dendogram[0].copy()
for index in range(1, level + 1) : for index in range(1, level + 1) :
for node, community in tuple(partition.items()) : for node, community in tuple(partition.items()) :
partition[node] = dendogram[index][community] partition[node] = dendogram[index][community]
return partition return(partition)
def modularity(partition, graph) : def modularity(partition, graph) :
...@@ -191,7 +194,10 @@ def best_partition(graph, partition = None) : ...@@ -191,7 +194,10 @@ def best_partition(graph, partition = None) :
def generate_dendogram(graph, part_init = None) : def generate_dendogram(graph, part_init = None) :
"""Find communities in the graph and return the associated dendogram """Find communities in the graph and return the associated dendogram
A dendogram is a tree and each level is a partition of the graph nodes. Level 0 is the first partition, which contains the smallest communities, and the best is len(dendogram) - 1. The higher the level is, the bigger are the communities A dendogram is a tree and each level is a partition of the graph
nodes. Level 0 is the first partition, which contains the smallest
communities, and the best is len(dendogram) - 1. The higher the level
is, the bigger are the communities
Parameters Parameters
...@@ -199,13 +205,17 @@ def generate_dendogram(graph, part_init = None) : ...@@ -199,13 +205,17 @@ def generate_dendogram(graph, part_init = None) :
graph : networkx.Graph graph : networkx.Graph
the networkx graph which will be decomposed the networkx graph which will be decomposed
part_init : dict, optionnal part_init : dict, optionnal
the algorithm will start using this partition of the nodes. It's a dictionary where keys are their nodes and values the communities
the algorithm will start using this partition of the nodes. It's a
dictionary where keys are their nodes and values the communities
Returns Returns
------- -------
dendogram : list of dictionaries dendogram : list of dictionaries
a list of partitions, ie dictionnaries where keys of the i+1 are the values of the i. and where keys of the first are the nodes of graph
a list of partitions, ie dictionnaries where keys of the i+1 are the
values of the i. and where keys of the first are the nodes of graph
Raises Raises
------ ------
TypeError TypeError
...@@ -270,7 +280,8 @@ def generate_dendogram(graph, part_init = None) : ...@@ -270,7 +280,8 @@ def generate_dendogram(graph, part_init = None) :
def induced_graph(partition, graph) : def induced_graph(partition, graph) :
"""Produce the graph where nodes are the communities """Produce the graph where nodes are the communities
there is a link of weight w between communities if the sum of the weights of the links between their elements is w there is a link of weight w between communities if the sum of the
weights of the links between their elements is w
Parameters Parameters
---------- ----------
...@@ -383,11 +394,11 @@ def __one_level(graph, status) : ...@@ -383,11 +394,11 @@ def __one_level(graph, status) :
incr = dnc - status.degrees.get(com, 0.) * degc_totw incr = dnc - status.degrees.get(com, 0.) * degc_totw
if incr > best_increase : if incr > best_increase :
best_increase = incr best_increase = incr
best_com = com best_com = com
__insert(node, best_com, __insert(node, best_com,
neigh_communities.get(best_com, 0.), status) neigh_communities.get(best_com, 0.), status)
if best_com != com_node : if best_com != com_node :
modif = True modif = True
new_mod = __modularity(status) new_mod = __modularity(status)
if new_mod - cur_mod < __MIN : if new_mod - cur_mod < __MIN :
break break
......
# Without this, we couldn't use the Django environment
from admin.env import *
from gargantext_web.views import empty_trash
empty_trash()
...@@ -50,7 +50,6 @@ def apply_workflow(corpus_id): ...@@ -50,7 +50,6 @@ def apply_workflow(corpus_id):
#ngrams2miam(user_id=corpus.user_id, corpus_id=corpus_id) #ngrams2miam(user_id=corpus.user_id, corpus_id=corpus_id)
update_processing(corpus, 0) update_processing(corpus, 0)
#@transaction.commit_manually
@shared_task @shared_task
def empty_trash(corpus_id): def empty_trash(corpus_id):
nodes = models.Node.objects.filter(type_id=cache.NodeType['Trash'].id).all() nodes = models.Node.objects.filter(type_id=cache.NodeType['Trash'].id).all()
......
...@@ -430,11 +430,11 @@ def move_to_trash(node_id): ...@@ -430,11 +430,11 @@ def move_to_trash(node_id):
if DEBUG is False : if DEBUG is False :
# TODO for the future maybe add id of node # TODO for the future maybe add id of node
empty_trash.apply_async("corpus_id") empty_trash.apply_async([1,])
else: else:
empty_trash("corpus_id") empty_trash("corpus_id")
return(previous_type_id) #return(previous_type_id)
except Exception as error: except Exception as error:
print("can not move to trash Node" + str(node_id) + ":" + str(error)) print("can not move to trash Node" + str(node_id) + ":" + str(error))
...@@ -471,18 +471,16 @@ def delete_node(request, node_id): ...@@ -471,18 +471,16 @@ def delete_node(request, node_id):
if node.user_id != user.id: if node.user_id != user.id:
return HttpResponseForbidden() return HttpResponseForbidden()
previous_type_id = move_to_trash(node_id) previous_type_id = node.type_id
node_parent_id = node.parent_id
move_to_trash(node_id)
if previous_type_id == cache.NodeType['Corpus'].id: if previous_type_id == cache.NodeType['Corpus'].id:
return HttpResponseRedirect('/project/' + str(node.parent_id)) return HttpResponseRedirect('/project/' + str(node_parent_id))
else: else:
return HttpResponseRedirect('/projects/') return HttpResponseRedirect('/projects/')
if settings.DEBUG == True:
empty_trash()
def delete_corpus(request, project_id, node_id): def delete_corpus(request, project_id, node_id):
# ORM Django # ORM Django
with transaction.atomic(): with transaction.atomic():
......
...@@ -13,12 +13,18 @@ from analysis.cooccurrences import cooc ...@@ -13,12 +13,18 @@ from analysis.cooccurrences import cooc
from gargantext_web.db import session, cache, get_or_create_node, bulk_insert from gargantext_web.db import session, cache, get_or_create_node, bulk_insert
from gargantext_web.db import NodeNgramNgram, NodeNodeNgram from gargantext_web.db import NodeNgramNgram, NodeNodeNgram
from sqlalchemy import desc, asc, or_, and_, Date, cast, select
def specificity(cooc_id=None, corpus=None): def specificity(cooc_id=None, corpus=None, limit=100):
''' '''
Compute the specificity, simple calculus. Compute the specificity, simple calculus.
''' '''
cooccurrences = session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).all()
cooccurrences = (session.query(NodeNgramNgram)
.filter(NodeNgramNgram.node_id==cooc_id)
.order_by(NodeNgramNgram.score)
.limit(limit)
)
matrix = defaultdict(lambda : defaultdict(float)) matrix = defaultdict(lambda : defaultdict(float))
...@@ -61,7 +67,7 @@ def compute_specificity(corpus,limit=100): ...@@ -61,7 +67,7 @@ def compute_specificity(corpus,limit=100):
list_cvalue = get_or_create_node(nodetype='Cvalue', corpus=corpus) list_cvalue = get_or_create_node(nodetype='Cvalue', corpus=corpus)
cooc_id = cooc(corpus=corpus, cvalue_id=list_cvalue.id,limit=limit) cooc_id = cooc(corpus=corpus, cvalue_id=list_cvalue.id,limit=limit)
specificity(cooc_id=cooc_id,corpus=corpus) specificity(cooc_id=cooc_id,corpus=corpus,limit=limit)
dbg.show('specificity') dbg.show('specificity')
......
...@@ -6,6 +6,8 @@ from ngram.specificity import compute_specificity ...@@ -6,6 +6,8 @@ from ngram.specificity import compute_specificity
from ngram.group import compute_groups from ngram.group import compute_groups
from ngram.miam import compute_miam from ngram.miam import compute_miam
from gargantext_web.db import get_or_create_node from gargantext_web.db import get_or_create_node
#from gargantext_web.celery import update_processing
def ngram_workflow(corpus, n=5000): def ngram_workflow(corpus, n=5000):
''' '''
...@@ -17,21 +19,27 @@ def ngram_workflow(corpus, n=5000): ...@@ -17,21 +19,27 @@ def ngram_workflow(corpus, n=5000):
compute_cvalue(corpus,limit=part) # size compute_cvalue(corpus,limit=part) # size
part = round(part * 0.6) part = round(part * 0.4)
print('spec part:', part)
compute_specificity(corpus,limit=part) compute_specificity(corpus,limit=part)
part = round(part * 0.5) part = round(part * 0.5)
# compute_stop(corpus) # compute_stop(corpus)
compute_groups(corpus,limit_inf=part, limit_sup=n) limit_inf = round(part * 1)
limit_sup = round(part * 5)
print(limit_inf,limit_sup)
compute_groups(corpus,limit_inf=limit_inf, limit_sup=limit_sup)
# compute_miam(corpus,limit=part) # size # compute_miam(corpus,limit=part) # size
compute_tfidf(corpus) compute_tfidf(corpus)
#corpus=session.query(Node).filter(Node.id==244250).first() #corpus=session.query(Node).filter(Node.id==257579).first()
#ngram_workflow(corpus) #ngram_workflow(corpus)
#update_processing(corpus, 0)
#cvalue = get_or_create_node(corpus=corpus,nodetype='Cvalue') #cvalue = get_or_create_node(corpus=corpus,nodetype='Cvalue')
#print(session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==cvalue.id).count()) #print(session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==cvalue.id).count())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment