Merge branch 'unstable' of ssh://delanoe.org:1979/gargantext into samuel

c0c0431c · PkSM3 · 082984a9 · 8c0e06ad · c0c0431c · c0c0431c
Commit c0c0431c authored Oct 14, 2015 by PkSM3
8 changed files
--- a/analysis/cooccurrences.py
+++ b/analysis/cooccurrences.py
@@ -7,6 +7,7 @@ from gargantext_web.db import Node, Ngram, NodeNgram, NodeNgramNgram, \
        NodeNodeNgram, NodeHyperdata, Hyperdata
 from gargantext_web.db import session, cache, get_or_create_node, bulk_insert
 from analysis.lists import WeightedMatrix, UnweightedList, Translations
+import inspect

 # keep list

@@ -27,7 +28,7 @@ def cooc(corpus=None
    stop_id :: Int
    group_id :: Int

-    For the moment, start and ens are simple, only year is implemented yet
+    For the moment, start and end are simple, only year is implemented yet
    start :: TimeStamp -- example: '2010-05-30 02:00:00+02'
    end   :: TimeStamp
    limit :: Int
@@ -37,17 +38,17 @@ def cooc(corpus=None
                                   , name_str="Cooccurrences corpus " + str(corpus.id) + "list_id: " + str(miam_id)
                                   )

-# TODO : save parameters in Node
-#    args, _, _, parameters = inspect.getargvalues(inspect.currentframe())
-#    print(parameters)
-#    for parameter in parameters.keys():
-#        print(parameters[parameter])
-#        node_cooc.hyperdata[parameter] = parameters[parameter]
-#
-#    session.add(node_cooc)
-#    session.commit()
-#    print(node_cooc.hyperdata)
+    args, _, _, parameters = inspect.getargvalues(inspect.currentframe())

+    hyperdata = dict()
+    for parameter in parameters.keys():
+        if parameter != 'corpus' and parameter != 'node_cooc':
+            hyperdata[parameter] = parameters[parameter]
+            
+    node_cooc.hyperdata = hyperdata
+    session.add(node_cooc)
+    session.commit()
+    
    session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==node_cooc.id).delete()
    session.commit()

@@ -109,7 +110,8 @@ def cooc(corpus=None
 # Cooc is symetric, take only the main cooccurrences and cut at the limit
    cooc_query = (cooc_query
             .filter(NodeNgramX.ngram_id < NodeNgramY.ngram_id)
-             .having(cooc_score > 1)
+             .having(cooc_score > 2)
+             #.having(cooc_score > 1)
             
             .group_by(NodeNgramX.ngram_id, NodeNgramY.ngram_id)
             .order_by(desc('cooc_score'))

--- a/analysis/functions.py
+++ b/analysis/functions.py
@@ -21,7 +21,8 @@ import networkx as nx
 from networkx.readwrite import json_graph
 from rest_v1_0.api import JsonHttpResponse

-from analysis.louvain import best_partition
+from analysis.louvain import best_partition, generate_dendogram, partition_at_level
+
 from ngram.lists import listIds


@@ -229,10 +230,10 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
    #print(n)
    #print(m)

-    nodes_included = 200 #int(round(size/20,0))
+    nodes_included = 300 #int(round(size/20,0))
    #nodes_excluded = int(round(size/10,0))

-    nodes_specific = 200 #int(round(size/10,0))
+    nodes_specific = 300 #int(round(size/10,0))
    #nodes_generic = int(round(size/10,0))

    # TODO user the included score for the node size
@@ -263,11 +264,11 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
        #edges_to_remove = [ e for e in G.edges_iter() if

        degree = G.degree()
-        nodes_to_remove = [n for n in degree if degree[n] ==0]
+        nodes_to_remove = [n for n in degree if degree[n] <= 1]
        G.remove_nodes_from(nodes_to_remove)
        uG = G.to_undirected()
        partition = best_partition(uG)
-
+        print(partition)
        print("Density of the graph:", nx.density(G))
    except:
        print("-" * 30)
@@ -315,7 +316,8 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
            except Exception as error:
                print("error02: ",error)
        data = json_graph.node_link_data(G)
-
+    elif type == 'bestpartition':
+        return(partition)

    #    data = json_graph.node_link_data(G, attrs={\
    #            'source':'source',\
@@ -325,5 +327,5 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
    #            #'color':'color',\
    #            'id':'id',})
    #print(data)
-    return data
+    return(data)

--- a/analysis/louvain.py
+++ b/analysis/louvain.py
@@ -24,9 +24,10 @@ import array
 def partition_at_level(dendogram, level) :
    """Return the partition of the nodes at the given level

-    A dendogram is a tree and each level is a partition of the graph nodes.
-    Level 0 is the first partition, which contains the smallest communities, and the best is len(dendogram) - 1.
-    The higher the level is, the bigger are the communities
+    A dendogram is a tree and each level is a partition of the graph
+    nodes. Level 0 is the first partition, which contains the smallest
+    communities, and the best is len(dendogram) - 1. The higher the
+    level is, the bigger are the communities

    Parameters
    ----------
@@ -47,20 +48,22 @@ def partition_at_level(dendogram, level) :

    See Also
    --------
-    best_partition which directly combines partition_at_level and generate_dendogram to obtain the partition of highest modularity
+
+    best_partition which directly combines partition_at_level and
+    generate_dendogram to obtain the partition of highest modularity

    Examples
    --------
    >>> G=nx.erdos_renyi_graph(100, 0.01)
    >>> dendo = generate_dendogram(G)
    >>> for level in range(len(dendo) - 1) :
-    >>>     print "partition at level", level, "is", partition_at_level(dendo, level)
+    >>>     print("partition at level", level, "is", partition_at_level(dendo, level))
    """
    partition = dendogram[0].copy()
    for index in range(1, level + 1) :
        for node, community in tuple(partition.items()) :
            partition[node] = dendogram[index][community]
-    return partition
+    return(partition)


 def modularity(partition, graph) :
@@ -191,7 +194,10 @@ def best_partition(graph, partition = None) :
 def generate_dendogram(graph, part_init = None) :
    """Find communities in the graph and return the associated dendogram

-    A dendogram is a tree and each level is a partition of the graph nodes.  Level 0 is the first partition, which contains the smallest communities, and the best is len(dendogram) - 1. The higher the level is, the bigger are the communities
+        A dendogram is a tree and each level is a partition of the graph
+    nodes. Level 0 is the first partition, which contains the smallest
+    communities, and the best is len(dendogram) - 1. The higher the level
+    is, the bigger are the communities


    Parameters
@@ -199,13 +205,17 @@ def generate_dendogram(graph, part_init = None) :
    graph : networkx.Graph
        the networkx graph which will be decomposed
    part_init : dict, optionnal
-        the algorithm will start using this partition of the nodes. It's a dictionary where keys are their nodes and values the communities
+
+    the algorithm will start using this partition of the nodes. It's a
+    dictionary where keys are their nodes and values the communities

    Returns
    -------
    dendogram : list of dictionaries
-        a list of partitions, ie dictionnaries where keys of the i+1 are the values of the i. and where keys of the first are the nodes of graph
-    
+
+    a list of partitions, ie dictionnaries where keys of the i+1 are the
+    values of the i. and where keys of the first are the nodes of graph
+
    Raises
    ------
    TypeError
@@ -270,7 +280,8 @@ def generate_dendogram(graph, part_init = None) :
 def induced_graph(partition, graph) :
    """Produce the graph where nodes are the communities

-    there is a link of weight w between communities if the sum of the weights of the links between their elements is w
+        there is a link of weight w between communities if the sum of the
+    weights of the links between their elements is w

    Parameters
    ----------
@@ -383,11 +394,11 @@ def __one_level(graph, status) :
                incr =  dnc  - status.degrees.get(com, 0.) * degc_totw
                if incr > best_increase :
                    best_increase = incr
-                    best_com = com                    
+                    best_com = com
            __insert(node, best_com,
                    neigh_communities.get(best_com, 0.), status)
            if best_com != com_node :
-                modif = True                
+                modif = True
        new_mod = __modularity(status)
        if new_mod - cur_mod < __MIN :
            break

--- a/emptyTrash.py
+++ b/emptyTrash.py
-# Without this, we couldn't use the Django environment
-from admin.env import *
-
-from gargantext_web.views import empty_trash
-empty_trash()
-
--- a/gargantext_web/celery.py
+++ b/gargantext_web/celery.py
@@ -50,7 +50,6 @@ def apply_workflow(corpus_id):
    #ngrams2miam(user_id=corpus.user_id, corpus_id=corpus_id)
    update_processing(corpus, 0)

-#@transaction.commit_manually
 @shared_task
 def empty_trash(corpus_id):
    nodes = models.Node.objects.filter(type_id=cache.NodeType['Trash'].id).all()

--- a/gargantext_web/views.py
+++ b/gargantext_web/views.py
@@ -430,11 +430,11 @@ def move_to_trash(node_id):
        
        if DEBUG is False :
            # TODO for the future maybe add id of node
-            empty_trash.apply_async("corpus_id")
+            empty_trash.apply_async([1,])
        else:
            empty_trash("corpus_id")

-        return(previous_type_id)
+        #return(previous_type_id)
    except Exception as error:
        print("can not move to trash Node" + str(node_id) + ":" + str(error))

@@ -471,18 +471,16 @@ def delete_node(request, node_id):
    if node.user_id != user.id:
        return HttpResponseForbidden()

-    previous_type_id = move_to_trash(node_id)
+    previous_type_id = node.type_id
+    node_parent_id   = node.parent_id
+    move_to_trash(node_id)

    if previous_type_id == cache.NodeType['Corpus'].id:
-        return HttpResponseRedirect('/project/' + str(node.parent_id))
+        return HttpResponseRedirect('/project/' + str(node_parent_id))
    else:
        return HttpResponseRedirect('/projects/')


-    if settings.DEBUG == True:
-        empty_trash()
-
-
 def delete_corpus(request, project_id, node_id):
    # ORM Django
    with transaction.atomic():

--- a/ngram/specificity.py
+++ b/ngram/specificity.py
@@ -13,12 +13,18 @@ from analysis.cooccurrences import cooc
 from gargantext_web.db import session, cache, get_or_create_node, bulk_insert
 from gargantext_web.db import NodeNgramNgram, NodeNodeNgram

+from sqlalchemy import desc, asc, or_, and_, Date, cast, select

-def specificity(cooc_id=None, corpus=None):
+def specificity(cooc_id=None, corpus=None, limit=100):
    '''
    Compute the specificity, simple calculus.
    '''
-    cooccurrences = session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).all()
+
+    cooccurrences = (session.query(NodeNgramNgram)
+                    .filter(NodeNgramNgram.node_id==cooc_id)
+                    .order_by(NodeNgramNgram.score)
+                    .limit(limit)
+                    )

    matrix = defaultdict(lambda : defaultdict(float))

@@ -61,7 +67,7 @@ def compute_specificity(corpus,limit=100):
    list_cvalue = get_or_create_node(nodetype='Cvalue', corpus=corpus)
    cooc_id = cooc(corpus=corpus, cvalue_id=list_cvalue.id,limit=limit)

-    specificity(cooc_id=cooc_id,corpus=corpus)
+    specificity(cooc_id=cooc_id,corpus=corpus,limit=limit)
    dbg.show('specificity')



--- a/ngram/workflow.py
+++ b/ngram/workflow.py
@@ -6,6 +6,8 @@ from ngram.specificity import compute_specificity
 from ngram.group import compute_groups
 from ngram.miam import compute_miam
 from gargantext_web.db import get_or_create_node
+#from gargantext_web.celery import update_processing
+

 def ngram_workflow(corpus, n=5000):
    '''
@@ -17,21 +19,27 @@ def ngram_workflow(corpus, n=5000):

    compute_cvalue(corpus,limit=part) # size
    
-    part = round(part * 0.6)
+    part = round(part * 0.4)
+    print('spec part:', part)

    compute_specificity(corpus,limit=part)
    
    part = round(part * 0.5)

 #    compute_stop(corpus)
-    compute_groups(corpus,limit_inf=part, limit_sup=n)
+    limit_inf = round(part * 1)
+    limit_sup = round(part * 5)
+    print(limit_inf,limit_sup)
+    compute_groups(corpus,limit_inf=limit_inf, limit_sup=limit_sup)
    
 #    compute_miam(corpus,limit=part) # size
    
    compute_tfidf(corpus)
+    

-#corpus=session.query(Node).filter(Node.id==244250).first()
+#corpus=session.query(Node).filter(Node.id==257579).first()
 #ngram_workflow(corpus)
+#update_processing(corpus, 0)

 #cvalue = get_or_create_node(corpus=corpus,nodetype='Cvalue')
 #print(session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==cvalue.id).count())