Merge testing (commit 'd9d93ae0') into romain-testing

(importing the graph advances in my branch)

Merge testing (commit 'd9d93ae0') into romain-testing
(importing the graph advances in my branch)
cb3e51d9 · Romain Loth · d5dfc9c4 · d9d93ae0 · cb3e51d9 · cb3e51d9
Commit cb3e51d9 authored Oct 02, 2016 by Romain Loth
13 changed files
--- a/gargantext/constants.py
+++ b/gargantext/constants.py
@@ -395,7 +395,7 @@ DEFAULT_N_DOCS_HAVING_NGRAM = 5
 # Graph constraints to compute the graph:
 # Modes: live graph generation, graph asynchronously computed or errors detected
 # here are the maximum size of corpus and maplist required to compute the graph
-graph_constraints = {'corpusMax' : 599
+graph_constraints = {'corpusMax' : 100
                    ,'corpusMin' : 40
                    ,'mapList'   : 50
                    }
--- a/gargantext/views/api/api.py
+++ b/gargantext/views/api/api.py
@@ -6,17 +6,14 @@ from rest_framework.views       import APIView
 from rest_framework.authentication import SessionAuthentication, BasicAuthentication
 from rest_framework.permissions import IsAuthenticated
-from gargantext.models          import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNode
 from gargantext.constants       import RESOURCETYPES, NODETYPES, get_resource
+from gargantext.models          import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNode
 from gargantext.util.db         import session, delete, func, bulk_insert
 from gargantext.util.db_cache   import cache, or_
-from gargantext.util.validation import validate
-from gargantext.models          import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNode
-from gargantext.constants       import RESOURCETYPES, NODETYPES, get_resource
-from gargantext.util.http       import ValidationException, APIView, JsonHttpResponse, get_parameters
 from gargantext.util.files      import upload
-from gargantext.util.db         import session, delete, func, bulk_insert
+from gargantext.util.http       import ValidationException, APIView, JsonHttpResponse, get_parameters
 from gargantext.util.scheduling import scheduled
+from gargantext.util.validation import validate
 #import

--- a/graph/README.md
+++ b/graph/README.md
 Module Graph Explorer: from text to graph
 =========================================
-## How to contribute ?
-Some solutions:
-1) please report to dev@gargantext.org
-2) fix with git repo and pull request
 ## Graph Explorer main 
 0) All urls.py of the Graph Explorer
 1) Main view of the graph explorer:  views.py
-2) Data are retrieved as REST: rest.py
+    -> Graph Explorer
-3) Graph is generated (graph.py) through different steps
+    -> My graph View
+    -> REST API to get Data
+2) Graph is generated (graph.py) through different steps
    a) check the constraints (graph_constraints) in gargantext/constants.py
-    b) Cooccurences are computed (in live or asynchronously): cooccurrences.py
+    b) Data are retrieved as REST
-    c) Thresold and distances : distances.py
+            rest.py: check REST parameters
-    d) clustering: louvain.py
+    c) graph.py:
-    c) links between communities: bridgeness.py
+        get_graph: check Graph parameters
+        compute_graph: compute graph
+            1) Cooccurences are computed (in live or asynchronously): cooccurrences.py
+            2) Thresold and distances : distances.py
+            3) clustering: louvain.py
+            4) links between communities: bridgeness.py
+    d) compress graph before returning it: utils.py
 4) Additional features:
    a) intersection of graphs: intersection.py
+## How to contribute ?
+Some solutions:
+1) please report to dev@gargantext.org
+2) fix with git repo and pull request
 ## TODO
-1) save parameters in hyperdata
+myGraphs view:
-2) graph explorer: 
-    * save current graph
-2) myGraphs view:
    * progress bar
+    * Show already computed graphs vs to be computed with parameters
    * show parameters
    * copy / paste and change some parameters to generate new graph
--- a/graph/bridgeness.py
+++ b/graph/bridgeness.py
@@ -8,22 +8,27 @@ from networkx.readwrite           import json_graph
 def filterByBridgeness(G,partition,ids,weight,bridgeness,type,field1,field2):
    '''
-    What is bridgeness ?
+    Bridgeness = measure to control links (bridges) between communities.
    '''
-    # Data are stored in a dict(), (== hashmap by default for Python)
+    # Data are stored in a dict(), (== hashmap by default with Python)
    data = dict()
    if type == "node_link":
        nodesB_dict = {}
        for node_id in G.nodes():
            #node,type(labels[node])
            nodesB_dict [ ids[node_id][1] ] = True
            # TODO the query below is not optimized (do it do_distance).
            the_label = session.query(Ngram.terms).filter(Ngram.id==node_id).first()
            the_label = ", ".join(the_label)
            G.node[node_id]['label']        = the_label
            G.node[node_id]['size']         = weight[node_id]
            G.node[node_id]['type']         = ids[node_id][0].replace("ngrams","terms")
            G.node[node_id]['attributes']   = { "clust_default": partition[node_id]} # new format
            # G.add_edge(node, "cluster " + str(partition[node]), weight=3)
@@ -65,12 +70,20 @@ def filterByBridgeness(G,partition,ids,weight,bridgeness,type,field1,field2):
        if bridgeness > 0:
            for c1 in com_link.keys():
                for c2 in com_link[c1].keys():
-                    index = round(bridgeness*len(com_link[c1][c2]) / (len(com_ids[c1]) + len(com_ids[c2])))
+                    index = round(
+                                     bridgeness * len( com_link[c1][c2] )
+                                   / #----------------------------------#
+                                   ( len(com_ids[c1]) + len(com_ids[c2] ))
+                                 )
                    #print((c1,len(com_ids[c1])), (c2,len(com_ids[c2])), index)
                    if index > 0:
-                        for link in sorted(com_link[c1][c2], key=lambda x: x[2], reverse=True)[:index]:
+                        for link in sorted( com_link[c1][c2]
+                                          , key=lambda x: x[2]
+                                          , reverse=True)[:index]:
                            #print(c1, c2, link[2])
                            info = {"s": link[0], "t": link[1], "w": link[2]}
                            links.append(info)

--- a/graph/cooccurrences.py
+++ b/graph/cooccurrences.py
 from gargantext.models     import Node, Ngram, NodeNgram, NodeNgramNgram, \
                                  NodeHyperdata, HyperdataKey
-from gargantext.util.db    import session, aliased, bulk_insert, func
+from gargantext.util.db    import session, aliased, func
 from gargantext.util.lists import WeightedMatrix, UnweightedList, Translations
 from sqlalchemy            import desc, asc, or_, and_
+from datetime              import datetime
-#import inspect
-import datetime
-from celery               import shared_task
 def filterMatrix(matrix, mapList_id, groupList_id):
-    mapList   = UnweightedList( mapList_id  )
+    mapList    = UnweightedList( mapList_id  )
    group_list = Translations  ( groupList_id )
    cooc       = matrix & (mapList * group_list)
    return cooc
-@shared_task
+def countCooccurrences( corpus_id=None      , cooc_id=None
-def countCooccurrences( corpus_id=None         , test= False
                      , field1='ngrams'     , field2='ngrams'
                      , start=None          , end=None
                      , mapList_id=None     , groupList_id=None
+                      , distance=None       , bridgeness=None
                      , n_min=1, n_max=None , limit=1000
-                      , coocNode_id=None    , reset=True
                      , isMonopartite=True  , threshold = 3
-                      , save_on_db= False,  # just return the WeightedMatrix,
+                      , save_on_db= True    , reset=True
-                                                 #    (don't write to DB)
                      ):
    '''
    Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id
-    For the moment list of paramters are not supported because, lists need to
+    For the moment list of parameters are not supported because, lists need to
    be merged before.
    corpus           :: Corpus
    mapList_id       :: Int
    groupList_id     :: Int
-    For the moment, start and end are simple, only year is implemented yet
    start :: TimeStamp -- example: '2010-05-30 02:00:00+02'
    end   :: TimeStamp
    limit :: Int
    '''
-    # TODO : add hyperdata here
-    # Security test
-    field1,field2 = str(field1), str(field2)
-    # Parameters to save in hyperdata of the Node Cooc
+    # FIXME remove the lines below after factorization of parameters
    parameters = dict()
    parameters['field1'] = field1
    parameters['field2'] = field2
@@ -57,17 +47,17 @@ def countCooccurrences( corpus_id=None         , test= False
    # Get corpus as Python object
    corpus = session.query(Node).filter(Node.id==corpus_id).first()
-    # Get node
+    # Get node of the Graph
-    if not coocNode_id:
+    if not cooc_id:
-        coocNode_id0  = ( session.query( Node.id )
+        cooc_id  = ( session.query( Node.id )
                                .filter( Node.typename  == "COOCCURRENCES"
                                       , Node.name      == "GRAPH EXPLORER"
                                       , Node.parent_id == corpus.id
                                       )
                                .first()
                        )
-        if not coocNode_id:
+        if not cooc_id:
            coocNode = corpus.add_child(
            typename  = "COOCCURRENCES",
            name = "GRAPH (in corpus %s)" % corpus.id
@@ -75,12 +65,16 @@ def countCooccurrences( corpus_id=None         , test= False
            session.add(coocNode)
            session.commit()
-            coocNode_id = coocNode.id
+            cooc_id = coocNode.id
        else :
-            coocNode_id = coocNode_id[0]
+            cooc_id = int(cooc_id[0])
+    else:
+        print("GRAPH #%s ... Loading cooccurrences computed already." % cooc_id)
+        cooc = session.query( NodeNgramNgram.ngram1_id, NodeNgramNgram.ngram2_id, NodeNgramNgram.weight ).filter( NodeNgramNgram.node_id == cooc_id ).all()
+        return(int(cooc_id),WeightedMatrix(cooc))
    if reset == True :
-        session.query( NodeNgramNgram ).filter( NodeNgramNgram.node_id == coocNode_id ).delete()
+        session.query( NodeNgramNgram ).filter( NodeNgramNgram.node_id == cooc_id ).delete()
        session.commit()
@@ -161,8 +155,8 @@ def countCooccurrences( corpus_id=None         , test= False
    # Cooc between the dates start and end
    if start is not None:
        #date_start = datetime.datetime.strptime ("2001-2-3 10:11:12", "%Y-%m-%d %H:%M:%S")
-        # TODO : more complexe date format here.
+        # TODO : more precise date format here (day is smaller grain actually).
-        date_start = datetime.datetime.strptime (str(start), "%Y-%m-%d")
+        date_start = datetime.strptime (str(start), "%Y-%m-%d")
        date_start_utc = date_start.strftime("%Y-%m-%d %H:%M:%S")
        Start=aliased(NodeHyperdata)
@@ -177,8 +171,8 @@ def countCooccurrences( corpus_id=None         , test= False
    if end is not None:
-        # TODO : more complexe date format here.
+        # TODO : more precise date format here (day is smaller grain actually).
-        date_end = datetime.datetime.strptime (str(end), "%Y-%m-%d")
+        date_end = datetime.strptime (str(end), "%Y-%m-%d")
        date_end_utc = date_end.strftime("%Y-%m-%d %H:%M:%S")
        End=aliased(NodeHyperdata)
@@ -208,22 +202,30 @@ def countCooccurrences( corpus_id=None         , test= False
    #cooc_query = cooc_query.order_by(desc('cooc_score'))
    matrix = WeightedMatrix(cooc_query)
+    print("GRAPH #%s Filtering the matrix with Map and Group Lists." % cooc_id)
    cooc = filterMatrix(matrix, mapList_id, groupList_id)
-    parameters['MapList_id'] = str(mapList_id)
+    parameters['MapList_id']   = str(mapList_id)
-    parameters['GroupList_id'] = str(mapList_id)
+    parameters['GroupList_id'] = str(groupList_id)
+    # TODO factorize savings on db
    if save_on_db:
-        # Saving cooc Matrix
+        # Saving the cooccurrences
-        cooc.save(coocNode_id)
+        cooc.save(cooc_id)
+        print("GRAPH #%s ... Node Cooccurrence Matrix saved" % cooc_id)
        # Saving the parameters
-        coocNode = session.query(Node).filter(Node.id==coocNode_id).first()
+        print("GRAPH #%s ... Parameters saved in Node." % cooc_id)
-        coocNode.hyperdata = parameters
+        coocNode = session.query(Node).filter(Node.id==cooc_id).first()
+        coocNode.hyperdata[distance] = dict()
+        coocNode.hyperdata[distance]["parameters"] = parameters
        session.add(coocNode)
        session.commit()
-        # Log message
+        #data = cooc2graph(coocNode.id, cooc, distance=distance, bridgeness=bridgeness)
-        print("Cooccurrence Matrix saved")
+        #return data
-    return cooc
+    return(coocNode.id, cooc)
--- a/graph/distances.py
+++ b/graph/distances.py
@@ -16,16 +16,16 @@ import networkx as nx
 def clusterByDistances( cooc_matrix
               , field1=None, field2=None
-               , distance='conditional'):
+               , distance=None):
    '''
-    do_distance :: Coocs[nga, ngb => ccweight] -> (Graph, Partition, {ids}, {weight})
+    clusterByDistance :: Coocs[nga, ngb => ccweight] -> (Graph, Partition, {ids}, {weight})
    '''
    # implicit global session
    authorized = ['conditional', 'distributional', 'cosine']
    if distance not in authorized:
-        distance = 'conditional'
+        raise ValueError("Distance must be in %s" % str(authorized))
    matrix = defaultdict(lambda : defaultdict(float))
    ids    = defaultdict(lambda : defaultdict(int))

--- a/graph/graph.py
+++ b/graph/graph.py
@@ -4,28 +4,88 @@ from gargantext.util.lists        import WeightedMatrix, UnweightedList, Transla
 from gargantext.util.http         import JsonHttpResponse
 from gargantext.models            import Node, Ngram, NodeNgram, NodeNgramNgram, NodeHyperdata
-#from gargantext.util.toolchain.ngram_coocs import compute_coocs
+from graph.cooccurrences  import countCooccurrences
-from graph.cooccurrences  import countCooccurrences, filterMatrix
 from graph.distances      import clusterByDistances
 from graph.bridgeness     import filterByBridgeness
 from gargantext.util.scheduling import scheduled
 from gargantext.constants import graph_constraints
-from datetime import datetime
+from celery               import shared_task
+from datetime             import datetime
+@shared_task
+def compute_graph( corpus_id=None      , cooc_id=None
+                , field1='ngrams'     , field2='ngrams'
+                , start=None          , end=None
+                , mapList_id=None     , groupList_id=None
+                , distance=None       , bridgeness=None
+                , n_min=1, n_max=None , limit=1000
+                , isMonopartite=True  , threshold = 3
+                , save_on_db= True    , reset=True
+                ) :
+        '''
+        All steps to compute a graph:
+        1) count Cooccurrences  (function countCooccurrences)
+                main parameters: threshold, isMonopartite
+        2) filter and cluster By Distances (function clusterByDistances)
+                main parameter: distance
+                TODO option clustering='louvain'
+                     or 'percolation' or 'random walk' or ...
+        3) filter By Bridgeness (function filterByBridgeness)
+                main parameter: bridgeness
+        4) format the graph     (formatGraph)
+                main parameter: format_
+        '''
+        print("GRAPH # ... Computing cooccurrences.")
+        (cooc_id, cooc_matrix) = countCooccurrences( corpus_id=corpus_id, cooc_id=cooc_id
+                                    , field1=field1, field2=field2
+                                    , start=start           , end =end
+                                    , mapList_id=mapList_id , groupList_id=groupList_id
+                                    , isMonopartite=True    , threshold = threshold
+                                    , distance=distance     , bridgeness=bridgeness
+                                    , save_on_db = True
+                                    )
+        print("GRAPH #%d ... Cooccurrences computed." % (cooc_id))
+        print("GRAPH #%d ... Clustering with %s distance." % (cooc_id,distance))
+        G, partition, ids, weight = clusterByDistances ( cooc_matrix
+                                                       , field1="ngrams", field2="ngrams"
+                                                       , distance=distance
+                                                       )
+        print("GRAPH #%d ... Filtering by bridgeness %d." % (cooc_id, bridgeness))
+        data = filterByBridgeness(G,partition,ids,weight,bridgeness,"node_link",field1,field2)
+        print("GRAPH #%d ... Saving Graph in hyperdata as json." % cooc_id)
+        node = session.query(Node).filter(Node.id == cooc_id).first()
+        if node.hyperdata.get(distance, None) is None:
+            node.hyperdata[distance] = dict()
+        node.hyperdata[distance][bridgeness] = data
+        node.save_hyperdata()
+        session.commit()
+        print("GRAPH #%d ... Returning data as json." % cooc_id)
+        return data
 def get_graph( request=None         , corpus=None
            , field1='ngrams'       , field2='ngrams'
            , mapList_id = None     , groupList_id = None
            , cooc_id=None          , type='node_link'
            , start=None            , end=None
-            , threshold=1
+            , distance='conditional', bridgeness=5
-            , distance='conditional'
+            , threshold=1           , isMonopartite=True
-            , isMonopartite=True                # By default, we compute terms/terms graph
+            , saveOnly=True
-            , bridgeness=5
+            ) :
-            , saveOnly=None
-            #, size=1000
-        ):
    '''
    Get_graph : main steps:
    0) Check the parameters
@@ -33,157 +93,141 @@ def get_graph( request=None         , corpus=None
    get_graph :: GraphParameters -> Either (Dic Nodes Links) (Dic State Length)
        where type Length = Int
-    get_graph first checks the parameters and return either graph data or a dic with 
+    get_graph first checks the parameters and return either graph data or a dict with 
    state "type" with an integer to indicate the size of the parameter 
    (maybe we could add a String in that step to factor and give here the error message)
-    1) count Cooccurrences  (function countCooccurrences)
+    1) compute_graph (see function above)
-            main parameters: threshold
+    2) return graph
-    2) filter and cluster By Distances (function clusterByDistances)
-            main parameter: distance
-    3) filter By Bridgeness (function filterByBridgeness)
-            main parameter: bridgeness
-    4) format the graph     (formatGraph)
-            main parameter: format_
    '''
-    before_cooc = datetime.now()
+    # Case of graph has been computed already
+    if cooc_id is not None:
+        print("GRAPH#%d ... Loading data already computed." % int(cooc_id))
-    # case of Cooccurrences have not been computed already
+        node = session.query(Node).filter(Node.id == cooc_id).first()
-    if cooc_id == None:
-        # case of mapList not big enough
-        # ==============================
-        # if we do not have any mapList_id already
-        if mapList_id is None:
-            mapList_id = session.query(Node.id).filter(Node.typename == "MAPLIST").first()[0]
-        mapList_size_query = session.query(NodeNgram).filter(NodeNgram.node_id == mapList_id)
-        mapList_size = mapList_size_query.count()
-        if mapList_size < graph_constraints['mapList']:
-            # Do not compute the graph if mapList is not big enough
-            return {'state': "mapListError", "length" : mapList_size}
-        # Instantiate query for case of corpus not big enough
-        # ===================================================
-        corpus_size_query = (session.query(Node)
-                                    .filter(Node.typename=="DOCUMENT")
-                                    .filter(Node.parent_id == corpus.id)
-                            )
-        # filter by date if any start date
-        # --------------------------------
-        if start is not None:
-            #date_start = datetime.datetime.strptime ("2001-2-3 10:11:12", "%Y-%m-%d %H:%M:%S")
-            date_start = datetime.strptime (str(start), "%Y-%m-%d")
-            date_start_utc = date_start.strftime("%Y-%m-%d %H:%M:%S")
-            Start=aliased(NodeHyperdata)
-            corpus_size_query = (corpus_size_query.join( Start
-                                         , Start.node_id == Node.id
-                                         )
-                                    .filter( Start.key == 'publication_date')
-                                    .filter( Start.value_utc >= date_start_utc)
-                          )
-        # filter by date if any end date
-        # --------------------------------
-        if end is not None:
-            date_end = datetime.strptime (str(end), "%Y-%m-%d")
-            date_end_utc = date_end.strftime("%Y-%m-%d %H:%M:%S")
-            End=aliased(NodeHyperdata)
-            corpus_size_query = (corpus_size_query.join( End
-                                         , End.node_id == Node.id
-                                         )
-                                    .filter( End.key == 'publication_date')
-                                    .filter( End.value_utc <= date_end_utc )
-                          )
+        # Structure of the Node.hyperdata[distance][bridbeness]
+        # All parameters (but distance and bridgeness)
+        # are in Node.hyperdata["parameters"]
+        # Check distance of the graph
+        if node.hyperdata.get(distance, None) is not None:
+            graph = node.hyperdata[distance]
+            # Check bridgeness of the graph
+            if graph.get(str(bridgeness), None) is not None:
+                return graph[str(bridgeness)]
+    # Case of graph has not been computed already
+    # First, check the parameters
+    # Case of mapList not big enough
+    # ==============================
+    # if we do not have any mapList_id already
+    if mapList_id is None:
+        mapList_id = session.query(Node.id).filter(Node.typename == "MAPLIST").first()[0]
+    mapList_size = session.query(NodeNgram).filter(NodeNgram.node_id == mapList_id).count()
+    if mapList_size < graph_constraints['mapList']:
+        # Do not compute the graph if mapList is not big enough
+        return {'state': "mapListError", "length" : mapList_size}
+    # Instantiate query for case of corpus not big enough
+    # ===================================================
+    corpus_size_query = (session.query(Node)
+                                .filter(Node.typename=="DOCUMENT")
+                                .filter(Node.parent_id == corpus.id)
+                        )
+    # Filter corpus by date if any start date
+    # ---------------------------------------
+    if start is not None:
+        #date_start = datetime.datetime.strptime ("2001-2-3 10:11:12", "%Y-%m-%d %H:%M:%S")
+        date_start = datetime.strptime (str(start), "%Y-%m-%d")
+        date_start_utc = date_start.strftime("%Y-%m-%d %H:%M:%S")
+        Start=aliased(NodeHyperdata)
+        corpus_size_query = (corpus_size_query.join( Start
+                                     , Start.node_id == Node.id
+                                     )
+                                .filter( Start.key == 'publication_date')
+                                .filter( Start.value_utc >= date_start_utc)
+                      )
+    # Filter corpus by date if any end date
+    # -------------------------------------
+    if end is not None:
+        date_end = datetime.strptime (str(end), "%Y-%m-%d")
+        date_end_utc = date_end.strftime("%Y-%m-%d %H:%M:%S")
+        End=aliased(NodeHyperdata)
+        corpus_size_query = (corpus_size_query.join( End
+                                     , End.node_id == Node.id
+                                     )
+                                .filter( End.key == 'publication_date')
+                                .filter( End.value_utc <= date_end_utc )
+                      )
+    # Finally test if the size of the corpora is big enough
+    # --------------------------------
+    corpus_size = corpus_size_query.count()
+    if saveOnly is not None and saveOnly == "True":
+        scheduled(compute_graph)( corpus_id=corpus.id, cooc_id=cooc_id
+                                   #, field1="ngrams", field2="ngrams"
+                                    , start=start           , end =end
+                                    , mapList_id=mapList_id , groupList_id=groupList_id
+                                    , isMonopartite=True    , threshold = threshold
+                                    , distance=distance     , bridgeness=bridgeness
+                                    , save_on_db = True
+                                   #, limit=size
+                                    )
+        return {"state" : "saveOnly"}
+    elif corpus_size > graph_constraints['corpusMax']:
+        # Then compute cooc asynchronously with celery
+        scheduled(compute_graph)( corpus_id=corpus.id, cooc_id=cooc_id
+                                   #, field1="ngrams", field2="ngrams"
+                                    , start=start           , end =end
+                                    , mapList_id=mapList_id , groupList_id=groupList_id
+                                    , isMonopartite=True    , threshold = threshold
+                                    , distance=distance     , bridgeness=bridgeness
+                                    , save_on_db = True
+                                   #, limit=size
+                                    )
+        # Dict to inform user that corpus maximum is reached 
+        # then graph is computed asynchronously
+        return {"state" : "corpusMax", "length" : corpus_size}
+    elif corpus_size <= graph_constraints['corpusMin']:
+        # Do not compute the graph if corpus is not big enough
+        return {"state" : "corpusMin", "length" : corpus_size}
-        # Finally test if the size of the corpora is big enough
-        # --------------------------------
-        corpus_size = corpus_size_query.count()
-        if saveOnly is not None and saveOnly == "True":
-            scheduled(countCooccurrences)( corpus_id=corpus.id
-                                       #, field1="ngrams", field2="ngrams"
-                                        , start=start           , end =end
-                                        , mapList_id=mapList_id , groupList_id=groupList_id
-                                        , isMonopartite=True    , threshold = threshold
-                                        , save_on_db = True
-                                       #, limit=size
-                                        )
-            return {"state" : "saveOnly"}
-        if corpus_size > graph_constraints['corpusMax']:
-            # Then compute cooc asynchronously with celery
-            scheduled(countCooccurrences)( corpus_id=corpus.id
-                                       #, field1="ngrams", field2="ngrams"
-                                        , start=start           , end =end
-                                        , mapList_id=mapList_id , groupList_id=groupList_id
-                                        , isMonopartite=True    , threshold = threshold
-                                        , save_on_db = True
-                                       #, limit=size
-                                        )
-            # Dic to inform user that corpus maximum is reached then
-            # graph is computed asynchronously
-            return {"state" : "corpusMax", "length" : corpus_size}
-        elif corpus_size <= graph_constraints['corpusMin']:
-            # Do not compute the graph if corpus is not big enough
-            return {"state" : "corpusMin", "length" : corpus_size}
-        else:
-            # If graph_constraints are ok then compute the graph in live
-            cooc_matrix = countCooccurrences( corpus_id=corpus.id
-                                       #, field1="ngrams", field2="ngrams"
-                                        , start=start           , end =end
-                                        , mapList_id=mapList_id , groupList_id=groupList_id
-                                        , isMonopartite=True    , threshold = threshold
-                                        , save_on_db = True
-                                       #, limit=size
-                                        )
    else:
-        print("Getting data for matrix %d", int(cooc_id))
+        # If graph_constraints are ok then compute the graph in live
-        matrix      = WeightedMatrix(int(cooc_id))
+        data = compute_graph( corpus_id=corpus.id, cooc_id=cooc_id
-        #print(matrix)
+                                  #, field1="ngrams", field2="ngrams"
-        cooc_matrix = filterMatrix(matrix, mapList_id, groupList_id)
+                                   , start=start           , end =end
+                                   , mapList_id=mapList_id , groupList_id=groupList_id
+                                   , isMonopartite=True    , threshold = threshold
-    # fyi
+                                   , distance=distance     , bridgeness=bridgeness
-    after_cooc = datetime.now()
+                                   , save_on_db = True
-    print("... Cooccurrences took %f s." % (after_cooc - before_cooc).total_seconds())
+                                  #, limit=size
+                                   )
    # case when 0 coocs are observed (usually b/c not enough ngrams in maplist)
-    if len(cooc_matrix.items) == 0:
-        print("GET_GRAPH: 0 coocs in matrix")
-        data = {'nodes':[], 'links':[]}  # empty data
-    # normal case
+    if len(data) == 0:
-    else:
+        print("GRAPH #   ... GET_GRAPH: 0 coocs in matrix")
-        G, partition, ids, weight = clusterByDistances ( cooc_matrix
+        data = {'nodes':[], 'links':[]}  # empty data
-                                                       , field1="ngrams", field2="ngrams"
-                                                       , distance=distance
-                                                       )
-        after_cluster = datetime.now()
-        print("... Clustering took %f s." % (after_cluster - after_cooc).total_seconds())
-        data = filterByBridgeness(G,partition,ids,weight,bridgeness,type,field1,field2)
-        after_filter = datetime.now()
-        print("... Filtering took %f s." % (after_filter - after_cluster).total_seconds())
    return data
--- a/graph/rest.py
+++ b/graph/rest.py
-#from rest_framework.authentication import SessionAuthentication, BasicAuthentication
 from gargantext.util.db      import session
 from gargantext.models.nodes import Node
 from graph.graph             import get_graph
+from graph.utils             import compress_graph, format_html
 from gargantext.util.http    import APIView, APIException\
                                  , JsonHttpResponse, requires_auth
 from gargantext.constants    import graph_constraints
-from traceback import format_tb
+from traceback               import format_tb
-def compress_graph(graphdata):
-    """
-    graph data is usually a dict with 2 slots:
-      "nodes": [{"id":4103, "type":"terms", "attributes":{"clust_default": 0}, "size":29, "label":"regard"},...]
-      "links": [{"t": 998,"s": 768,"w": 0.0425531914893617},...]
-    To send this data over the net, this function can reduce a lot of its size:
-      - keep less decimals for float value of each link's weight
-      - use shorter names for node properties (eg: s/clust_default/cl/)
-    result format:
-        "nodes": [{"id":4103, "at":{"cl": 0}, "s":29, "lb":"regard"},...]
-        "links": [{"t": 998,"s": 768,"w": 0.042},...]
-    """
-    for link in graphdata['links']:
-        link['w'] = format(link['w'], '.3f')   # keep only 3 decimals
-    for node in graphdata['nodes']:
-        node['lb'] = node['label']
-        del node['label']
-        node['at'] = node['attributes']
-        del node['attributes']
-        node['at']['cl'] = node['at']['clust_default']
-        del node['at']['clust_default']
-        node['s'] = node['size']
-        del node['size']
-        if node['type'] == "terms":
-            # its the default type for our format: so we don't need it
-            del node['type']
-        else:
-            node['t'] = node['type']
-            del node['type']
-    return graphdata
-def format_html(link):
-    """
-    Build an html link adapted to our json message format
-    """
-    return "<a class='msglink' href='%s'>%s</a>" % (link, link)
-# TODO check authentication
 class Graph(APIView):
    '''
    REST part for graphs.
@@ -75,6 +27,16 @@ class Graph(APIView):
        # Get the node we are working with
        corpus = session.query(Node).filter(Node.id==corpus_id).first()
+        # TODO Parameters to save in hyperdata of the Node Cooc
+        # WARNING: we could factorize the parameters as dict but ...
+        #         ...  it causes a bug in asynchronous function !
+        # Check celery upgrades before.
+        # Example (for the future):
+        #        parameters = dict()
+        #        parameters['field1'] = field1
+        #        parameters['field2'] = field2
        # Get all the parameters in the URL
        cooc_id      = request.GET.get     ('cooc_id'   , None         )
        saveOnly     = request.GET.get     ('saveOnly'  , None         )
@@ -94,8 +56,8 @@ class Graph(APIView):
        type_        = str(request.GET.get ('type'      , 'node_link'  ))
        distance     = str(request.GET.get ('distance'  , 'conditional'))
-        # Get default value if no map list
+        # Get default map List of corpus
        if mapList_id == 0 :
            mapList_id = ( session.query ( Node.id )
                                    .filter( Node.typename  == "MAPLIST"
@@ -107,7 +69,6 @@ class Graph(APIView):
            mapList_id = mapList_id[0]
            if mapList_id == None :
-                # todo add as an error msg ?
                raise ValueError("MAPLIST node needed for cooccurrences")
@@ -123,36 +84,26 @@ class Graph(APIView):
            groupList_id  = groupList_id[0]
            if groupList_id == None :
-                # todo add as an error msg ?
                raise ValueError("GROUPLIST node needed for cooccurrences")
-        # Check the options
+        # Declare accepted fields
        accepted_field1 = ['ngrams', 'journal', 'source', 'authors']
        accepted_field2 = ['ngrams',                               ]
        options         = ['start', 'end', 'threshold', 'distance', 'cooc_id' ]
        try:
-            # Test params
+            # Check if parameters are accepted
            if (field1 in accepted_field1) and (field2 in accepted_field2):
-                if start is not None and end is not None :
+                data = get_graph( corpus=corpus, cooc_id = cooc_id
-                    data = get_graph( corpus=corpus, cooc_id = cooc_id
+                               , field1=field1           , field2=field2
-                                  #, field1=field1           , field2=field2
+                               , mapList_id = mapList_id , groupList_id = groupList_id
-                                   , mapList_id = mapList_id , groupList_id = groupList_id
+                               , start=start             , end=end
-                                   , start=start             , end=end
+                               , threshold =threshold    
-                                   , threshold =threshold    , distance=distance
+                               , distance=distance       , bridgeness=bridgeness
-                                   , saveOnly=saveOnly
+                               , saveOnly=saveOnly
-                                   )
+                               )
-                else:
-                    data = get_graph( corpus = corpus, cooc_id = cooc_id
-                                  #, field1=field1, field2=field2
-                                   , mapList_id = mapList_id , groupList_id = groupList_id
-                                   , threshold  = threshold
-                                   , distance   = distance
-                                   , bridgeness = bridgeness
-                                   , saveOnly=saveOnly
-                                   )
                # data :: Either (Dic Nodes Links) (Dic State Length)

--- a/graph/utils.py
+++ b/graph/utils.py
+def compress_graph(graphdata):
+    """
+    graph data is usually a dict with 2 slots:
+      "nodes": [{"id":4103, "type":"terms", "attributes":{"clust_default": 0}, "size":29, "label":"regard"},...]
+      "links": [{"t": 998,"s": 768,"w": 0.0425531914893617},...]
+    To send this data over the net, this function can reduce a lot of its size:
+      - keep less decimals for float value of each link's weight
+      - use shorter names for node properties (eg: s/clust_default/cl/)
+    result format:
+        "nodes": [{"id":4103, "at":{"cl": 0}, "s":29, "lb":"regard"},...]
+        "links": [{"t": 998,"s": 768,"w": 0.042},...]
+    """
+    for link in graphdata['links']:
+        link['w'] = format(link['w'], '.3f')   # keep only 3 decimals
+    for node in graphdata['nodes']:
+        node['lb'] = node['label']
+        del node['label']
+        node['at'] = node['attributes']
+        del node['attributes']
+        node['at']['cl'] = node['at']['clust_default']
+        del node['at']['clust_default']
+        node['s'] = node['size']
+        del node['size']
+        if node['type'] == "terms":
+            # its the default type for our format: so we don't need it
+            del node['type']
+        else:
+            node['t'] = node['type']
+            del node['type']
+    return graphdata
+def format_html(link):
+    """
+    Build an html link adapted to our json message format
+    """
+    return "<a class='msglink' href='%s'>%s</a>" % (link, link)
--- a/graph/views.py
+++ b/graph/views.py
@@ -14,6 +14,8 @@ def explorer(request, project_id, corpus_id):
    Graph explorer, also known as TinaWebJS, using SigmaJS.
    Nodes are ngrams (from title or abstract or journal name.
    Links represent proximity measure.
+    Data are received in RESTfull mode (see rest.py).
    '''
    # we pass our corpus
@@ -46,7 +48,10 @@ def explorer(request, project_id, corpus_id):
 @requires_auth
 def myGraphs(request, project_id, corpus_id):
    '''
-    List all of my Graphs
+    List all of my Graphs.
+    Each Graphs as one Node of Cooccurrences.
+    Each Graph is save in hyperdata of each Node.
    '''
    user = cache.User[request.user.id]

--- a/templates/pages/corpora/myGraphs.html
+++ b/templates/pages/corpora/myGraphs.html
@@ -26,7 +26,7 @@
                    <div class="col-md-5 content">
                        <li>
-                            <h5>{{cooc.name}}</h5>
+                            <h4>{{cooc.name}}</h4>
                            {{cooc.date}}
                            {% for key, value in coocs_count.items %}
                                {% if key == cooc.id %}
@@ -40,7 +40,7 @@
                                        <li> ~{{ value }} nodes with distances:
                                            <ul>
                                                <li>
-                                                <a href="/projects/{{project.id}}/corpora/{{corpus.id}}/explorer?cooc_id={{cooc.id}}&distance=distributional&bridgeness=5">
+                                                <a href="/projects/{{project.id}}/corpora/{{corpus.id}}/explorer?cooc_id={{cooc.id}}&distance=conditional&bridgeness=5">
                                                <span class="glyphicon glyphicon-eye-open" aria-hidden="true"></span>
                                                Conditional
                                                </a>

--- a/templates/pages/main/about.html
+++ b/templates/pages/main/about.html
@@ -102,10 +102,13 @@
                <div class="panel-body">
                    <div class="container">
                        <ul>
+                            <li>
+                                Newsletters : <a target="blank" href="https://phplist.iscpif.fr/?p=subscribe&id=2">subscribe</a>
+                            </li>
                            <li>
                                Mailing-lists
                                <ul>
-                                    <li>User mailing-list: soon</li>
+                                    <li>User mailing-list: soon </li>
                                    <li>Devel mailing-list: soon</li>
                                </ul>
                            </li>

--- a/templates/pages/menu.html
+++ b/templates/pages/menu.html
@@ -294,7 +294,7 @@
            <p>
                Gargantext
                <span class="glyphicon glyphicon-registration-mark" aria-hidden="true"></span>
-                , version 3.0.5.4,
+                , version 3.0.5.5,
                <a href="http://www.cnrs.fr" target="blank" title="Institution that enables this project.">
                    Copyrights
                    <span class="glyphicon glyphicon-copyright-mark" aria-hidden="true"></span>