Remove graph module

ce761204 · sim · 6a0506a4 · 6a0506a4 · 6a0506a4 · 6a0506a4
Commit ce761204 authored Oct 31, 2017 by sim
18 changed files
--- a/gargantext/graph/README.md
+++ b/gargantext/graph/README.md
-Module Graph Explorer: from text to graph
-=========================================
-
-
-## Graph Explorer main 
-0) All urls.py of the Graph Explorer
-
-1) Main view of the graph explorer:  views.py
-    -> Graph Explorer
-    -> My graph View
-    -> REST API to get Data
-
-2) Graph is generated (graph.py) through different steps
-    a) check the constraints (graph_constraints) in gargantext/constants.py
-    b) Data are retrieved as REST
-            rest.py: check REST parameters
-    c) graph.py:
-        get_graph: check Graph parameters
-        compute_graph: compute graph
-            1) Cooccurences are computed (in live or asynchronously): cooccurrences.py
-            2) Thresold and distances : distances.py
-            3) clustering: louvain.py
-            4) links between communities: bridgeness.py
-    d) compress graph before returning it: utils.py
-
-4) Additional features:
-    a) intersection of graphs: intersection.py
-
-
-## How to contribute ?
-Some solutions:
-1) please report to dev@gargantext.org
-2) fix with git repo and pull request
-
-## TODO
-myGraphs view:
-    * progress bar
-    * Show already computed graphs vs to be computed with parameters
-    * show parameters
-    * copy / paste and change some parameters to generate new graph
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
--- a/gargantext/graph/__init__.py
+++ b/gargantext/graph/__init__.py
--- a/gargantext/graph/bridgeness.py
+++ b/gargantext/graph/bridgeness.py
-# Article coming soon
-
-from gargantext.util.db       import session
-from gargantext.models.ngrams import Ngram
-from collections              import defaultdict
-
-from networkx.readwrite           import json_graph
-
-def filterByBridgeness(G,partition,ids,weight,bridgeness,type,field1,field2):
-    '''
-    Bridgeness = measure to control links (bridges) between communities.
-    '''
-    # Data are stored in a dict(), (== hashmap by default with Python)
-    data = dict()
-    if type == "node_link":
-        nodesB_dict = {}
-        for node_id in G.nodes():
-            #node,type(labels[node])
-            nodesB_dict [ ids[node_id][1] ] = True
-            
-            # TODO the query below is not optimized (do it do_distance).
-            the_label = session.query(Ngram.terms).filter(Ngram.id==node_id).first()
-            the_label = ", ".join(the_label)
-            
-            
-            G.node[node_id]['label']        = the_label
-
-            G.node[node_id]['size']         = weight[node_id]
-            
-            G.node[node_id]['type']         = ids[node_id][0].replace("ngrams","terms")
-            
-            G.node[node_id]['attributes']   = { "clust_default": partition[node_id]} # new format
-            # G.add_edge(node, "cluster " + str(partition[node]), weight=3)
-
-        links = []
-        i=1
-
-        if bridgeness > 0:
-            com_link = defaultdict(lambda: defaultdict(list))
-            com_ids = defaultdict(list)
-
-            for k, v in partition.items():
-                com_ids[v].append(k)
-
-        for e in G.edges_iter():
-            s = e[0]
-            t = e[1]
-            weight = G[ids[s][1]][ids[t][1]]["weight"]
-
-            if bridgeness < 0:
-                info = { "s": ids[s][1]
-                       , "t": ids[t][1]
-                       , "w": weight
-                       }
-                links.append(info)
-
-            else:
-                if partition[s] == partition[t]:
-
-                    info = { "s": ids[s][1]
-                           , "t": ids[t][1]
-                           , "w": weight
-                           }
-                    links.append(info)
-
-                if bridgeness > 0:
-                    if partition[s] < partition[t]:
-                        com_link[partition[s]][partition[t]].append((s,t,weight))
-
-        if bridgeness > 0:
-            for c1 in com_link.keys():
-                for c2 in com_link[c1].keys():
-                    index = round(
-                                     bridgeness * len( com_link[c1][c2] )
-                                   / #----------------------------------#
-                                   ( len(com_ids[c1]) + len(com_ids[c2] ))
-                                 )
-                    #print((c1,len(com_ids[c1])), (c2,len(com_ids[c2])), index)
-                    if index > 0:
-                        for link in sorted( com_link[c1][c2]
-                                          , key=lambda x: x[2]
-                                          , reverse=True)[:index]:
-                            #print(c1, c2, link[2])
-                            
-                            info = {"s": link[0], "t": link[1], "w": link[2]}
-                            
-                            links.append(info)
-
-
-        B = json_graph.node_link_data(G)
-        B["links"] = []
-        B["links"] = links
-        if field1 == field2 == 'ngrams' :
-            data["nodes"] = B["nodes"]
-            data["links"] = B["links"]
-        else:
-            A = get_graphA( "journal" , nodesB_dict , B["links"] , corpus )
-            print("#nodesA:",len(A["nodes"]))
-            print("#linksAA + #linksAB:",len(A["links"]))
-            print("#nodesB:",len(B["nodes"]))
-            print("#linksBB:",len(B["links"]))
-            data["nodes"] = A["nodes"] + B["nodes"]
-            data["links"] = A["links"] + B["links"]
-            print("  total nodes :",len(data["nodes"]))
-            print("  total links :",len(data["links"]))
-            print("")
-
-    elif type == "adjacency":
-        for node in G.nodes():
-            try:
-                #node,type(labels[node])
-                #G.node[node]['label']   = node
-                G.node[node]['name']    = node
-                #G.node[node]['size']    = weight[node]
-                G.node[node]['group']   = partition[node]
-                #G.add_edge(node, partition[node], weight=3)
-            except Exception as error:
-                print("error02: ",error)
-        data = json_graph.node_link_data(G)
-    elif type == 'bestpartition':
-        return(partition)
-
-    return(data)
--- a/gargantext/graph/cooccurrences.py
+++ b/gargantext/graph/cooccurrences.py
-from gargantext.models     import Node, Ngram, NodeNgram, NodeNgramNgram, \
-                                  NodeHyperdata, HyperdataKey
-from gargantext.util.db    import session, aliased, func
-
-from gargantext.util.lists import WeightedMatrix, UnweightedList, Translations
-
-from sqlalchemy            import desc, asc, or_, and_
-from datetime              import datetime
-
-
-def filterMatrix(matrix, mapList_id, groupList_id):
-    mapList    = UnweightedList( mapList_id  )
-    group_list = Translations  ( groupList_id )
-    cooc       = matrix & (mapList * group_list)
-    return cooc
-
-
-def countCooccurrences( corpus_id=None      , cooc_id=None
-                      , field1='ngrams'     , field2='ngrams'
-                      , start=None          , end=None
-                      , mapList_id=None     , groupList_id=None
-                      , distance=None       , bridgeness=None
-                      , n_min=1, n_max=None , limit=1000
-                      , isMonopartite=True  , threshold = 3
-                      , save_on_db= True    , reset=True
-                      ):
-    '''
-    Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id
-    For the moment list of parameters are not supported because, lists need to
-    be merged before.
-    corpus           :: Corpus
-
-    mapList_id       :: Int
-    groupList_id     :: Int
-
-    start :: TimeStamp -- example: '2010-05-30 02:00:00+02'
-    end   :: TimeStamp
-    limit :: Int
-
-    '''
-    # FIXME remove the lines below after factorization of parameters
-    parameters = dict()
-    parameters['field1'] = field1
-    parameters['field2'] = field2
-
-    # Get corpus as Python object
-    corpus = session.query(Node).filter(Node.id==corpus_id).first()
-
-    # Get node of the Graph
-    if not cooc_id:
-
-        cooc_id  = ( session.query( Node.id )
-                                .filter( Node.typename  == "COOCCURRENCES"
-                                       , Node.name      == "GRAPH EXPLORER"
-                                       , Node.parent_id == corpus.id
-                                       )
-                                .first()
-                        )
-        if not cooc_id:
-            coocNode = corpus.add_child(
-            typename  = "COOCCURRENCES",
-            name = "GRAPH (in corpus %s)" % corpus.id
-            )
-
-            session.add(coocNode)
-            session.commit()
-            cooc_id = coocNode.id
-        else :
-            cooc_id = int(cooc_id[0])
-
-    # when cooc_id preexisted, but we want to continue  (reset = True)
-    #    (to give new contents to this cooc_id)
-    elif reset:
-        print("GRAPH #%s ... Counting new cooccurrences data." % cooc_id)
-        session.query( NodeNgramNgram ).filter( NodeNgramNgram.node_id == cooc_id ).delete()
-        session.commit()
-
-    # when cooc_id preexisted and we just want to load it (reset = False)
-    else:
-        print("GRAPH #%s ... Loading cooccurrences computed already." % cooc_id)
-        cooc = session.query( NodeNgramNgram.ngram1_id, NodeNgramNgram.ngram2_id, NodeNgramNgram.weight ).filter( NodeNgramNgram.node_id == cooc_id ).all()
-        return(int(cooc_id),WeightedMatrix(cooc))
-
-
-
-    NodeNgramX = aliased(NodeNgram)
-
-    # Simple Cooccurrences
-    cooc_score = func.count(NodeNgramX.node_id).label('cooc_score')
-
-    # A kind of Euclidean distance cooccurrences
-    #cooc_score = func.sqrt(func.sum(NodeNgramX.weight * NodeNgramY.weight)).label('cooc_score')
-
-    if isMonopartite :
-        NodeNgramY = aliased(NodeNgram)
-
-        cooc_query = (session.query( NodeNgramX.ngram_id
-                                   , NodeNgramY.ngram_id
-                                   , cooc_score
-                                   )
-                             .join( Node
-                                  , Node.id == NodeNgramX.node_id
-                                  )
-                             .join( NodeNgramY
-                                  , NodeNgramY.node_id == Node.id
-                                  )
-                             .filter( Node.parent_id==corpus.id
-                                    , Node.typename=="DOCUMENT"
-                                    )
-                     )
-    else :
-        NodeNgramY = aliased(NodeNgram)
-
-        cooc_query = (session.query( NodeHyperdataNgram.ngram_id
-                                   , NodeNgramY.ngram_id
-                                   , cooc_score
-                                   )
-                             .join( Node
-                                  , Node.id == NodeHyperdataNgram.node_id
-                                  )
-                             .join( NodeNgramY
-                                  , NodeNgramY.node_id == Node.id
-                                  )
-                             .join( Hyperdata
-                                  , Hyperdata.id == NodeHyperdataNgram.hyperdata_id
-                                  )
-                             .filter( Node.parent_id == corpus.id
-                                    , Node.typename == "DOCUMENT"
-                                    )
-                             .filter( Hyperdata.name == field1 )
-                     )
-
-    # Size of the ngrams between n_min and n_max
-    if n_min is not None or n_max is not None:
-        if isMonopartite:
-            NgramX = aliased(Ngram)
-            cooc_query = cooc_query.join ( NgramX
-                                         , NgramX.id == NodeNgramX.ngram_id
-                                         )
-
-        NgramY = aliased(Ngram)
-        cooc_query = cooc_query.join ( NgramY
-                                     , NgramY.id == NodeNgramY.ngram_id
-                                     )
-
-    if n_min is not None:
-        cooc_query = (cooc_query
-             .filter(NgramY.n >= n_min)
-            )
-        if isMonopartite:
-            cooc_query = cooc_query.filter(NgramX.n >= n_min)
-
-    if n_max is not None:
-        cooc_query = (cooc_query
-             .filter(NgramY.n >= n_min)
-            )
-        if isMonopartite:
-            cooc_query = cooc_query.filter(NgramX.n >= n_min)
-
-    # Cooc between the dates start and end
-    if start is not None:
-        #date_start = datetime.datetime.strptime ("2001-2-3 10:11:12", "%Y-%m-%d %H:%M:%S")
-        # TODO : more precise date format here (day is smaller grain actually).
-        date_start = datetime.strptime (str(start), "%Y-%m-%d")
-        date_start_utc = date_start.strftime("%Y-%m-%d %H:%M:%S")
-
-        Start=aliased(NodeHyperdata)
-        cooc_query = (cooc_query.join( Start
-                                     , Start.node_id == Node.id
-                                     )
-                                .filter( Start.key == 'publication_date')
-                                .filter( Start.value_utc >= date_start_utc)
-                      )
-
-        parameters['start'] = date_start_utc
-
-
-    if end is not None:
-        # TODO : more precise date format here (day is smaller grain actually).
-        date_end = datetime.strptime (str(end), "%Y-%m-%d")
-        date_end_utc = date_end.strftime("%Y-%m-%d %H:%M:%S")
-
-        End=aliased(NodeHyperdata)
-
-        cooc_query = (cooc_query.join( End
-                                     , End.node_id == Node.id
-                                     )
-                                .filter( End.key == 'publication_date')
-                                .filter( End.value_utc <= date_end_utc )
-                      )
-
-        parameters['end'] = date_end_utc
-
-    if isMonopartite:
-        # Cooc is symetric, take only the main cooccurrences and cut at the limit
-        cooc_query = cooc_query.filter(NodeNgramX.ngram_id < NodeNgramY.ngram_id)
-
-    cooc_query = cooc_query.having(cooc_score >= threshold)
-
-    if isMonopartite:
-        cooc_query = cooc_query.group_by(NodeNgramX.ngram_id, NodeNgramY.ngram_id)
-    else:
-        cooc_query = cooc_query.group_by(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id)
-
-    # Order according some scores
-    # If ordering is really needed, use Ordered Index (faster)
-    #cooc_query = cooc_query.order_by(desc('cooc_score'))
-
-    matrix = WeightedMatrix(cooc_query)
-
-    print("GRAPH #%s Filtering the matrix with Map and Group Lists." % cooc_id)
-    cooc = filterMatrix(matrix, mapList_id, groupList_id)
-
-    parameters['MapList_id']   = str(mapList_id)
-    parameters['GroupList_id'] = str(groupList_id)
-
-    # TODO factorize savings on db
-    if save_on_db:
-        # Saving the cooccurrences
-        cooc.save(cooc_id)
-        print("GRAPH #%s ... Node Cooccurrence Matrix saved" % cooc_id)
-
-        # Saving the parameters
-        print("GRAPH #%s ... Parameters saved in Node." % cooc_id)
-        coocNode = session.query(Node).filter(Node.id==cooc_id).first()
-
-        coocNode.hyperdata["parameters"] = dict()
-        coocNode.hyperdata["parameters"] = parameters
-        coocNode.save_hyperdata()
-        session.commit()
-
-        #data = cooc2graph(coocNode.id, cooc, distance=distance, bridgeness=bridgeness)
-    else:
-        return cooc
-
-    return(coocNode.id, cooc)
--- a/gargantext/graph/distances.py
+++ b/gargantext/graph/distances.py
-import math
-import numpy    as np
-import pandas   as pd
-import networkx as nx
-
-from copy        import copy
-from collections import defaultdict
-from math        import log,sqrt
-#from operator import itemgetter
-
-from gargantext.models     import Node, NodeNgram, NodeNgramNgram, \
-                                  NodeHyperdata
-from gargantext.util.db    import session, aliased
-
-from .louvain import best_partition
-
-
-def clusterByDistances( cooc_matrix
-               , field1=None, field2=None
-               , distance=None):
-    '''
-    clusterByDistance :: Coocs[nga, ngb => ccweight] -> (Graph, Partition, {ids}, {weight})
-    '''
-
-    # implicit global session
-
-    authorized = ['conditional', 'distributional', 'cosine']
-    if distance not in authorized:
-        raise ValueError("Distance must be in %s" % str(authorized))
-
-    matrix = defaultdict(lambda : defaultdict(float))
-    ids    = defaultdict(lambda : defaultdict(int))
-    labels = dict()
-    weight = dict()
-
-    for cooc in cooc_matrix.items:
-        ngram1_id = cooc[0]
-        ngram2_id = cooc[1]
-        ccweight = cooc_matrix.items[cooc]
-
-        matrix[ngram1_id][ngram2_id] = ccweight
-        matrix[ngram2_id][ngram1_id] = ccweight
-
-        ids[ngram1_id] = (field1, ngram1_id)
-        ids[ngram2_id] = (field2, ngram2_id)
-
-        weight[ngram1_id] = weight.get(ngram1_id, 0) + ccweight
-        weight[ngram2_id] = weight.get(ngram2_id, 0) + ccweight
-
-    x = pd.DataFrame(matrix).fillna(0)
-
-    if distance == 'conditional':
-        x = x / x.sum(axis=1)
-        #y = y / y.sum(axis=0)
-
-        xs = x.sum(axis=1) - x
-        ys = x.sum(axis=0) - x
-
-        # top inclus ou exclus
-        n = ( xs + ys) / (2 * (x.shape[0] - 1))
-        # top generic or specific
-        m = ( xs - ys) / (2 * (x.shape[0] - 1))
-
-        n = n.sort_index(inplace=False)
-        m = m.sort_index(inplace=False)
-
-        nodes_included = 10000 #int(round(size/20,0))
-        #nodes_excluded = int(round(size/10,0))
-
-        nodes_specific = 10000 #int(round(size/10,0))
-        #nodes_generic = int(round(size/10,0))
-
-        # TODO use the included score for the node size
-        n_index = pd.Index.intersection(x.index, n.index[:nodes_included])
-        # Generic:
-        #m_index = pd.Index.intersection(x.index, m.index[:nodes_generic])
-        # Specific:
-        m_index = pd.Index.intersection(x.index, m.index[-nodes_specific:])
-        #m_index = pd.Index.intersection(x.index, n.index[:nodes_included])
-
-        x_index = pd.Index.union(n_index, m_index)
-        xx = x[list(x_index)].T[list(x_index)]
-
-        # Removing unconnected nodes
-        xxx = xx.values
-        threshold = min(xxx.max(axis=1))
-        matrix_filtered = np.where(xxx >= threshold, xxx, 0)
-        #matrix_filtered = matrix_filtered.resize((90,90))
-
-        G = nx.from_numpy_matrix(np.matrix(matrix_filtered))
-        G = nx.relabel_nodes(G, dict(enumerate([ ids[id_][1] for id_ in list(xx.columns)])))
-
-    elif distance == 'cosine':
-        scd = defaultdict(lambda : defaultdict(int))
-
-        for i in matrix.keys():
-            for j in matrix.keys():
-                numerator = sum(
-                                [
-                                matrix[i][k] * matrix[j][k]
-                                    for k in matrix.keys()
-                                    if i != j and k != i and k != j
-                                ]
-                            )
-
-                denominator  = sqrt(
-                                    sum([
-                                    matrix[i][k]
-                                        for k in matrix.keys()
-                                        if k != i and k != j #and matrix[i][k] > 0
-                                       ])
-                                    *
-                                    sum([
-                                    matrix[i][k]
-                                        for k in matrix.keys()
-                                        if k != i and k != j #and matrix[i][k] > 0
-                                       ])
-
-                               )
-
-                try:
-                    scd[i][j] = numerator / denominator
-                except Exception as error:
-                    scd[i][j] = 0
-
-        minmax = min([ max([ scd[i][j] for i in scd.keys()]) for j in scd.keys()])
-
-        G = nx.DiGraph()
-        G.add_edges_from(
-                          [
-                            (i, j, {'weight': scd[i][j]})
-                                for i in scd.keys() for j in scd.keys()
-                                if i != j and scd[i][j] > minmax and scd[i][j] > scd[j][i]
-                          ]
-                        )
-
-
-
-    elif distance == 'distributional':
-        mi = defaultdict(lambda : defaultdict(int))
-        total_cooc = x.sum().sum()
-
-        for i in matrix.keys():
-            si = sum([matrix[i][j] for j in matrix[i].keys() if i != j])
-            for j in matrix[i].keys():
-                sj = sum([matrix[j][k] for k in matrix[j].keys() if j != k])
-                if i!=j :
-                    mi[i][j] = log( matrix[i][j] / ((si * sj) / total_cooc) )
-
-        r = defaultdict(lambda : defaultdict(int))
-
-        for i in matrix.keys():
-            for j in matrix.keys():
-                sumMin = sum(
-                                [
-                                min(mi[i][k], mi[j][k])
-                                    for k in matrix.keys()
-                                    if i != j and k != i and k != j and mi[i][k] > 0
-                                ]
-                            )
-
-                sumMi  = sum(
-                                [
-                                mi[i][k]
-                                    for k in matrix.keys()
-                                    if k != i and k != j and mi[i][k] > 0
-                                ]
-                            )
-
-                try:
-                    r[i][j] = sumMin / sumMi
-                except Exception as error:
-                    r[i][j] = 0
-
-        # Need to filter the weak links, automatic threshold here
-        minmax = min([ max([ r[i][j] for i in r.keys()]) for j in r.keys()])
-
-        G = nx.DiGraph()
-        G.add_edges_from(
-                          [
-                            (i, j, {'weight': r[i][j]})
-                                for i in r.keys() for j in r.keys()
-                                if i != j and r[i][j] > minmax and r[i][j] > r[j][i]
-                          ]
-                        )
-
-#        degree_max = max([(n, d) for n,d in G.degree().items()], key=itemgetter(1))[1]
-#        nodes_to_remove = [n for (n,d) in G.degree().items() if d <= round(degree_max/2)]
-#        G.remove_nodes_from(nodes_to_remove)
-
-    # Removing too connected nodes (find automatic way to do it)
-    #edges_to_remove = [ e for e in G.edges_iter() if
-
-    #   nodes_to_remove = [n for n in degree if degree[n] <= 1]
-    #   G.remove_nodes_from(nodes_to_remove)
-
-
-
-    def getWeight(item):
-        return item[1]
-#
-#    node_degree = sorted(G.degree().items(), key=getWeight, reverse=True)
-#    #print(node_degree)
-#    nodes_too_connected = [n[0] for n in node_degree[0:(round(len(node_degree)/5))]]
-#
-#    for n in nodes_too_connected:
-#        n_edges = list()
-#        for v in nx.neighbors(G,n):
-#            #print((n, v), G[n][v]['weight'], ":", (v,n), G[v][n]['weight'])
-#            n_edges.append(((n, v), G[n][v]['weight']))
-#
-#        n_edges_sorted = sorted(n_edges, key=getWeight, reverse=True)
-#        #G.remove_edges_from([ e[0] for e in n_edges_sorted[round(len(n_edges_sorted)/2):]])
-#        #G.remove_edges_from([ e[0] for e in n_edges_sorted[(round(len(nx.neighbors(G,n))/3)):]])
-#        G.remove_edges_from([ e[0] for e in n_edges_sorted[10:]])
-
-    G.remove_nodes_from(nx.isolates(G))
-    partition = best_partition(G.to_undirected())
-
-    return(G,partition,ids,weight)
--- a/gargantext/graph/graph.py
+++ b/gargantext/graph/graph.py
--- a/gargantext/graph/growth.py
+++ b/gargantext/graph/growth.py
-"""
-Computes ngram growth on periods
-"""
-
-from gargantext.models   import Node, NodeNgram, NodeNodeNgram, NodeNgramNgram
-from gargantext.util.db_cache  import cache
-from gargantext.util.db  import session, bulk_insert, aliased, \
-                                func, get_engine # = sqlalchemy.func like sum() or count()
-from datetime             import datetime
-
-
-def timeframes(start, end):
-    """
-    timeframes :: String -> String -> (UTCTime, UTCTime, UTCTime)
-    """
-    
-    start = datetime.strptime (str(start), "%Y-%m-%d")
-    end   = datetime.strptime (str(end), "%Y-%m-%d")
-
-    date_0 = start - (end - start)
-    date_1 = start
-    date_2 = end
-
-    return (date_0, date_1, date_2)
-
-
-
-def compute_growth(corpus_id, groupList_id, mapList_id, start, end):
-    """
-    compute_graph :: Int -> UTCTime -> UTCTime -> Int -> Int 
-                   -> [(Int, Numeric)]
-    
-    this function uses SQL function in 
-    /srv/gargantext/install/gargamelle/sqlFunctions.sql
-
-    First compute occurrences of ngrams in mapList (with groups) on the first
-    period, then on the second and finally returns growth.
-
-    Directly computed with Postgres Database (C) for optimization.
-    """
-    connection = get_engine()
-    
-    (date_0, date_1, date_2) = timeframes(start, end)
-    
-    query = """SELECT * FROM OCC_HIST( {corpus_id}
-                                     , {groupList_id}
-                                     , {mapList_id}
-                                     , '{date_0}'
-                                     , '{date_1}'
-                                     , '{date_2}'
-                                     )
-            """.format( corpus_id    = corpus_id
-                      , groupList_id = groupList_id
-                      , mapList_id   = mapList_id
-                      , date_0       = date_0
-                      , date_1       = date_1
-                      , date_2       = date_2
-                      )
-    return(connection.execute(query))
-
-
--- a/gargantext/graph/intersection.py
+++ b/gargantext/graph/intersection.py
-from gargantext.models     import Node, Ngram, NodeNgram, NodeNgramNgram, \
-                                  HyperdataKey
-
-from gargantext.util.db    import session, aliased, bulk_insert, func
-
-from gargantext.util.lists import WeightedMatrix, UnweightedList, Translations
-from gargantext.util.http  import JsonHttpResponse
-from sqlalchemy            import desc, asc, or_, and_, func
-
-import datetime
-import ast
-import networkx as nx
-
-
-def doc_freq(corpus_id, node_ids):
-    '''
-    doc_freq :: Corpus_id -> [(Ngram_id, Int)]
-    Given a corpus, compute number of documents that have the ngram in it.
-    '''
-    return ( session.query(NodeNgram.ngram_id, func.count(NodeNgram.node_id))
-                    .join(Node, NodeNgram.node_id == Node.id)
-                    .filter( Node.parent_id == corpus_id
-                           , Node.typename== 'DOCUMENT')
-                    .filter( NodeNgram.weight > 0 
-                           , NodeNgram.ngram_id.in_(node_ids) )
-                    .group_by(NodeNgram.ngram_id)
-                    .all()
-                  )
-
-def doc_ngram_representativity(corpus_id, node_ids):
-    '''
-    doc_ngram_representativity :: Corpus_ID -> Dict Ngram_id Float
-    Given a corpus, compute part of of documents that have the ngram it it.
-    '''
-    nodes_count = ( session.query(Node)
-                           .filter( Node.parent_id == corpus_id
-                                  , Node.typename == 'DOCUMENT'
-                                  )
-                           .count()
-                  )
-
-    result = dict()
-    for ngram_id, somme in doc_freq(corpus_id, node_ids):
-        result[ngram_id] = somme / nodes_count
-
-    return result
-
-def compare_corpora(Corpus_id_A, Corpus_id_B, node_ids):
-    '''
-    compare_corpora :: Corpus_id -> Corpus_id -> Dict Ngram_id Float
-    Given two corpus :
-        - if corpora are the same, it return :
-            (dict of document frequency per ngram as key)
-        - if corpora are different, it returns :
-            doc_ngram_representativit(Corpus_id_A) / doc_ngram_representativity(Corpus_id_B)
-            (as dict per ngram as key)
-    '''
-
-    result = dict()
-    
-    if int(Corpus_id_A) == int(Corpus_id_B):
-        for ngram_id, somme in doc_freq(Corpus_id_A, node_ids):
-            result[ngram_id] = somme
-    
-    else:
-
-        data_A = doc_ngram_representativity(Corpus_id_A, node_ids)
-        data_B = doc_ngram_representativity(Corpus_id_B, node_ids)
-    
-        queue     = list()
-
-        for k in data_A.keys():
-            if k not in data_B.keys():
-                queue.append(k)
-            else:
-                result[k] = data_B[k] / data_A[k]
-
-        maximum = max([ result[k] for k in result.keys()])
-        minimum = min([ result[k] for k in result.keys()])
-
-        for k in queue:
-            result[k] = minimum
-
-    return result
-
-def intersection(request , corpuses_ids, measure='cooc'):
-    '''
-    intersection :: (str(Int) + "a" str(Int)) -> Dict(Ngram.id :: Int, Score :: Int)
-    intersection = returns as Json Http Response the intersection of two graphs
-
-    '''
-    if request.method == 'POST' and "nodeids" in request.POST and len(request.POST["nodeids"])>0 :
-
-        node_ids = [int(i) for i in (ast.literal_eval( request.POST["nodeids"] )) ]
-        # Here are the visible nodes of the initial semantic map.
-
-        corpuses_ids = corpuses_ids.split('a')
-
-        corpuses_ids = [int(i) for i in corpuses_ids]
-        # corpus[1] will be the corpus to compare
-        
-        return JsonHttpResponse(compare_corpora(corpuses_ids[0], corpuses_ids[1], node_ids))
-
--- a/gargantext/graph/louvain.py
+++ b/gargantext/graph/louvain.py
--- a/gargantext/graph/mail_notification.py
+++ b/gargantext/graph/mail_notification.py
-
-from gargantext.models.users import User
-from gargantext.util.db      import session
-from django.core.mail        import send_mail
-from gargantext.settings     import BASE_URL
-
-def notify_owner(corpus,cooc_id,distance,bridgeness):
-    user = session.query(User).filter(User.id == corpus.user_id).first()
-
-    message = '''
-    Bonjour,
-    votre graph vient de se terminer dans votre corpus intitulé:
-                        %s
-
-    Vous pouvez accéder et renommer votre Graph à l'adresse:
-    http://%s/projects/%d/corpora/%d/explorer?cooc_id=%d&distance=%s&bridgeness=%d
-
-    Nous restons à votre disposition pour tout complément d'information.
-    Cordialement
-    --
-        L'équipe de Gargantext (CNRS)
-
-    ''' % (corpus.name, BASE_URL, corpus.parent_id, corpus.id, cooc_id, distance, bridgeness)
-    
-    if user.email != "" :
-        send_mail('[Gargantext] Votre Graph est calculé'
-                 , message
-                 , 'team@gargantext.org'
-                 , [user.email], fail_silently=False )
-    else:
-        print("User %s (%d), has no email" % (user.username, user.id) )
-
-
-
--- a/gargantext/graph/rest.py
+++ b/gargantext/graph/rest.py
--- a/gargantext/graph/templates/explorer.html
+++ b/gargantext/graph/templates/explorer.html
--- a/gargantext/graph/urls.py
+++ b/gargantext/graph/urls.py
-from django.conf.urls import url
-
-# Module "Graph Explorer"
-from .rest import Graph
-from .views import explorer, myGraphs
-from .intersection import intersection
-
-
-# TODO : factor urls
-# url will have this pattern:
-# ^explorer/$corpus_id/view
-# ^explorer/$corpus_id/data.json
-# ^explorer/$corpus_id/intersection
-
-# GET ^api/projects/(\d+)/corpora/(\d+)/explorer$ -> data in json format
-
-urlpatterns = [ url(r'^projects/(\d+)/corpora/(\d+)/explorer$'      , explorer       )
-              , url(r'^projects/(\d+)/corpora/(\d+)/myGraphs$'      , myGraphs       )
-              , url(r'^explorer/intersection/(\w+)$'                , intersection   )
-              ]
--- a/gargantext/graph/utils.py
+++ b/gargantext/graph/utils.py
-
-def compress_graph(graphdata):
-    """
-    graph data is usually a dict with 2 slots:
-      "nodes": [{"id":4103, "type":"terms", "attributes":{"clust_default": 0}, "size":29, "label":"regard"},...]
-      "links": [{"t": 998,"s": 768,"w": 0.0425531914893617},...]
-
-    To send this data over the net, this function can reduce a lot of its size:
-      - keep less decimals for float value of each link's weight
-      - use shorter names for node properties (eg: s/clust_default/cl/)
-
-    result format:
-        "nodes": [{"id":4103, "at":{"cl": 0}, "s":29, "lb":"regard"},...]
-        "links": [{"t": 998,"s": 768,"w": 0.042},...]
-    """
-    for link in graphdata['links']:
-        link['w'] = format(link['w'], '.3f')   # keep only 3 decimals
-
-    for node in graphdata['nodes']:
-        node['lb'] = node['label']
-        del node['label']
-        
-        #node['attributes']['growth'] = 0.8
-
-        node['at'] = node['attributes']
-        del node['attributes']
-
-        node['at']['cl'] = node['at']['clust_default']
-        del node['at']['clust_default']
-
-        node['s'] = node['size']
-        del node['size']
-
-        if node['type'] == "terms":
-            # its the default type for our format: so we don't need it
-            del node['type']
-        else:
-            node['t'] = node['type']
-            del node['type']
-
-    return graphdata
-
-def format_html(link):
-    """
-    Build an html link adapted to our json message format
-    """
-    return "<a class='msglink' href='%s'>%s</a>" % (link, link)
-
-
--- a/gargantext/graph/views.py
+++ b/gargantext/graph/views.py
-from gargantext.util.http import *
-from gargantext.util.db import *
-from gargantext.util.db_cache import cache
-from gargantext.models import *
-from gargantext.constants import *
-from gargantext.settings import *
-from gargantext.constants import USER_LANG
-from datetime import datetime
-from gargantext.views.pages.main import get_user_params
-
-@requires_auth
-def explorer(request, project_id, corpus_id):
-    '''
-    Graph explorer, also known as TinaWebJS, using SigmaJS.
-    Nodes are ngrams (from title or abstract or journal name.
-    Links represent proximity measure.
-
-    Data are received in RESTfull mode (see rest.py).
-    '''
-
-    # we pass our corpus
-    corpus = cache.Node[corpus_id]
-
-    # security check
-    user = cache.User[request.user.id]
-
-    if corpus is None:
-        raise Http404()
-    if not user.owns(corpus):
-        return HttpResponseForbidden()
-
-    # get the maplist_id for modifications
-    maplist_id = corpus.children(typename="MAPLIST").first().id
-
-    # and the project just for project.id in corpusBannerTop
-    project = cache.Node[project_id]
-
-    # rendered page : explorer.html
-    return render(
-        template_name = 'explorer.html',
-        request = request,
-        context = {
-            'debug'     : settings.DEBUG   ,
-            'request'   : request          ,
-            'user'      : request.user     ,
-            'date'      : datetime.now()   ,
-            'project'   : project          ,
-            'corpus'    : corpus           ,
-            'maplist_id': maplist_id       ,
-            'view'      : 'graph'          ,
-            'user_parameters': get_user_params(request.user),
-            'languages': USER_LANG
-        },
-    )
-
-
-
-@requires_auth
-def myGraphs(request, project_id, corpus_id):
-    '''
-    List all of my Graphs.
-
-    Each Graphs as one Node of Cooccurrences.
-    Each Graph is save in hyperdata of each Node.
-    '''
-
-    user = cache.User[request.user.id]
-
-    # we pass our corpus
-    corpus = cache.Node[corpus_id]
-
-    # and the project just for project.id in corpusBannerTop
-    project = cache.Node[project_id]
-
-    coocs = corpus.children('COOCCURRENCES', order=True).all()
-
-    coocs_count = dict()
-    for cooc in coocs:
-        # FIXME : approximativ number of nodes (not exactly what user sees in explorer)
-        # Need to be connected with Graph Clustering
-        cooc_nodes = (session.query(Ngram.id,func.count(Ngram.id))
-                             .join(NodeNgramNgram, NodeNgramNgram.ngram1_id == Ngram.id)
-                             .filter(NodeNgramNgram.node_id==cooc.id)
-                             .filter(NodeNgramNgram.weight >= 1)
-                             .group_by(Ngram.id)
-                             .all()
-                     )
-
-        #coocs_count[cooc.id] = len(cooc_nodes)
-        coocs_count[cooc.id] = len([cooc_node for cooc_node in cooc_nodes if cooc_node[1] > 1])
-
-        print("coocs_count a posteriori", coocs_count)
-
-    return render(
-        template_name = 'pages/corpora/myGraphs.html',
-        request = request,
-        context = {
-            'debug'        : settings.DEBUG,
-            'request'      : request,
-            'user'         : request.user,
-            'date'         : datetime.now(),
-            'project'      : project,
-            'resourcename' : get_resource_by_name(corpus),
-            'corpus'       : corpus,
-            'view'         : 'myGraph',
-            'coocs'        : coocs,
-            'coocs_count'  : coocs_count,
-            'user_parameters': get_user_params(request.user),
-            'languages': USER_LANG,
-        },
-    )
--- a/gargantext/settings.py
+++ b/gargantext/settings.py
@@ -43,7 +43,6 @@ CELERYBEAT_SCHEDULER = 'djcelery.schedulers.DatabaseScheduler'
 CELERY_IMPORTS = (
                    "gargantext.util.toolchain",
                    "gargantext.util.crawlers",
-                    "gargantext.graph.graph",
                    "gargantext.moissonneurs.pubmed",
                    "gargantext.moissonneurs.istex",
                    "gargantext.util.ngramlists_tools",
@@ -65,7 +64,6 @@ INSTALLED_APPS = [
    'rest_framework',
    'djcelery',
    'gargantext.annotations',
-    'gargantext.graph',
    'gargantext.moissonneurs',
    'gargantext',
 ]

--- a/gargantext/urls.py
+++ b/gargantext/urls.py
@@ -5,7 +5,6 @@ Views are shared between these modules:
 - `pages`,          to present HTML views to the user
 - `contents`,       for Python-generated contents
 - `annotations`,    to annotate local context of a corpus (as global context)
- - `graph explorer`, to explore graphs
 """

 from django.conf.urls                   import include, url
@@ -21,9 +20,6 @@ import gargantext.views.pages.urls
 from gargantext.annotations         import urls as annotations_urls
 from gargantext.annotations.views   import main as annotations_main_view

-# Module for graph service
-import gargantext.graph.urls
-
 # Module Scrapers
 import gargantext.moissonneurs.urls

@@ -34,9 +30,6 @@ urlpatterns = [ url(r'^admin/'     , admin.site.urls
              , url(r'^favicon.ico$', Redirect.as_view( url=static.url('favicon.ico')
                                    , permanent=False), name="favicon"                 )

-              # Module Graph
-              , url(r'^'           , include( gargantext.graph.urls )                  )
-
              # Module Annotation
              # tempo: unchanged doc-annotations routes --
              , url(r'^annotations/', include( annotations_urls )                      )

--- a/gargantext/views/api/urls.py
+++ b/gargantext/views/api/urls.py
@@ -10,7 +10,7 @@ from . import ngrams
 from . import metrics
 from . import ngramlists
 from . import analytics
-from gargantext.graph.rest import Graph
+

 urlpatterns = [ url(r'^nodes$'                , nodes.NodeListResource.as_view())
              , url(r'^nodes/(\d+)$'          , nodes.NodeResource.as_view())
@@ -37,14 +37,6 @@ urlpatterns = [ url(r'^nodes$'                , nodes.NodeListResource.as_view()
              # Metrics
              , url(r'^projects/(\d+)/corpora/(\d+)/metrics$', metrics.CorpusMetrics.as_view())

-              # GraphExplorer
-              , url(r'^projects/(\d+)/corpora/(\d+)/explorer$', Graph.as_view())
-                # data for graph explorer (json)
-                #                 GET /api/projects/43198/corpora/111107/explorer?
-                # Corresponding view is : /projects/43198/corpora/111107/explorer?
-                # Parameters (example):
-                # explorer?field1=ngrams&field2=ngrams&distance=conditional&bridgeness=5&start=1996-6-1&end=2002-10-5
-
               # Ngrams
               , url(r'^ngrams/?$'             , ngrams.ApiNgrams.as_view())