[FEAT GRAPH] Commit before factoring.

a40f95bb · delanoe · 0564f787 · a40f95bb · a40f95bb
Commit a40f95bb authored Mar 30, 2016 by delanoe
Show whitespace changes
Inline Side-by-side

Showing with 12 additions and 200 deletions

distances.py graphExplorer/distances.py +3 -1

functions.py graphExplorer/functions.py +9 -199

No files found.
--- a/graphExplorer/distances.py
+++ b/graphExplorer/distances.py
@@ -14,7 +14,9 @@ import numpy    as np
 import pandas   as pd
 import networkx as nx
-def do_distance(cooc_id, field1=None, field2=None, isMonopartite=True, distance='conditional'):
+def do_distance( cooc_id
+               , field1=None, field2=None
+               , isMonopartite=True, distance='conditional'):
    '''
    do_distance :: Int -> (Graph, Partition, {ids}, {weight})
    '''

--- a/graphExplorer/functions.py
+++ b/graphExplorer/functions.py
@@ -4,7 +4,7 @@ from gargantext.util.http         import JsonHttpResponse
 from gargantext.models            import Node, Ngram, NodeNgram, NodeNgramNgram
 #from gargantext.util.toolchain.ngram_coocs import compute_coocs
-from graphExplorer.distance       import do_distance
+from graphExplorer.distances      import do_distance
 from graphExplorer.cooccurrences  import do_cooc
 # Prelude lib
@@ -27,6 +27,7 @@ def get_cooc( request=None, corpus=None
            , start=None     , end=None
            , threshold=1
            , distance='conditional'
+            , isMonopartite=True                # By default, we compute terms/terms graph
            , size=1000
            , bridgeness=5
            , mapList_id = None , groupList_id = None
@@ -35,8 +36,6 @@ def get_cooc( request=None, corpus=None
    get_ccoc : to compute the graph.
    '''
-    data = {}
    if mapList_id == None :
        mapList_id  = ( session.query ( Node.id )
@@ -61,19 +60,6 @@ def get_cooc( request=None, corpus=None
            raise ValueError("GROUPLIST node needed for cooccurrences")
-    # compute_cooc needs group, fields etc.
-    # group_id = 3
-    SamuelFlag = False
-    # if field1 == field2 == 'ngrams' :
-    #     isMonopartite = True
-    #     SamuelFlag = True
-    # else:
-    #     isMonopartite = False
-    isMonopartite = True # Always. So, calcule the graph B and from these B-nodes, build the graph-A
-    # data deleted each time
-    #cooc_id = get_or_create_node(nodetype='Cooccurrence', corpus=corpus).id
    if corpus is None:
        corpus = session.query(Node).filter(Node.id==corpus_id).first()
@@ -90,6 +76,9 @@ def get_cooc( request=None, corpus=None
                                            , isMonopartite=True
                                            , distance=distance
                                            )
+    # Data are stored in a dict(), (== hashmap by default for Python)
+    data = dict()
    if type == "node_link":
        nodesB_dict = {}
        for node_id in G.nodes():
@@ -192,183 +181,4 @@ def get_cooc( request=None, corpus=None
    return(data)
-def get_graphA( nodeA_type , NodesB , links , corpus ):
-    from analysis.InterUnion import Utils
-    print(" = = = == = = = ")
-    print("In get_graphA(), corpus id:",corpus.id)
-    nodeA_type_id = cache.Hyperdata[nodeA_type].id
-    threshold_cotainf = 0.02
-    max_nodeid = -1
-    for nodeid in NodesB:
-    	if nodeid > max_nodeid:
-    		max_nodeid = nodeid
-    # = = = = [ 01. Getting ALL documents of the Corpus c ] = = = =  #
-    Docs = {}
-    document_type_id = cache.NodeType['Document'].id
-    sql_query = 'select id from node_node where parent_id='+str(corpus.id)+' and type_id='+str(document_type_id)
-    cursor = connection.cursor()
-    cursor.execute(sql_query)
-    results = cursor.fetchall()
-    for i in results:
-        Docs[i[0]] = True
-    print("docs:",len(Docs.keys()))
-    # = = = = [ / 01. Getting ALL documents of the Corpus c ] = = = =  #
-    # = = = = [ 02. Getting ALL Documents related with Ngrams of the carte semantic  ] = = = =  #
-    sql_query = 'select nodey_id,ngram_id from node_nodenodengram where ngram_id IN (' + ','.join(map(str, NodesB.keys())) + ")"
-    cursor = connection.cursor()
-    cursor.execute(sql_query)
-    results = cursor.fetchall()
-    # = = = = [ / 02. Getting ALL Documents related with Ngrams of the carte semantic  ] = = = =  #
-    # = = = = [ 03. Now we limit the retrieved Documents(step 02) to those belonging to the Corpus c ] = = = = ]
-    Docs_and_ = {
-        "nodesA":{},
-        "nodesB":{}
-    }
-    NodesB_and_Docs = {}
-    for i in results:
-        doc_id = i[0]
-        ngram_id = i[1]
-        if ngram_id in NodesB and doc_id in Docs:
-            if doc_id not in Docs_and_["nodesB"]:
-                Docs_and_["nodesB"][doc_id] = []
-            Docs_and_["nodesB"][doc_id].append( ngram_id )
-            if ngram_id not in NodesB_and_Docs:
-                NodesB_and_Docs[ngram_id] = []
-            NodesB_and_Docs[ngram_id].append( doc_id )
-    # = = = = [ / 03. Now we limit the retrieved Documents(step 02) to those belonging to the Corpus c ] = = = = ]
-    # # = = = = [ Getting Authors ] = = = = ]
-    # Authors = {}
-    # sql_query = 'select node_id,value_string from node_node_hyperdata where node_id IN (' + ','.join(map(str, Docs_and_["nodesB"].keys())) + ")"+' and hyperdata_id=10'# 10 -> authors
-    # cursor = connection.cursor()
-    # cursor.execute(sql_query)
-    # results = cursor.fetchall()
-    # for i in results:
-    #     doc_id = i[0]
-    #     authors = i[1].split(",")
-    #     for a in authors:
-    #         if a not in Authors:
-    #             Authors[a] = 0
-    #         Authors[a] += 1
-    # print("")
-    # print("#authors:")
-    # import pprint
-    # pprint.pprint(Authors)
-    # print("")
-    # # = = = = [ / Getting Authors ] = = = = ]
-    # = = = = [ 04. Getting A-elems and making the dictionaries] = = = = ]
-    sql_query = 'select node_id,value_string from node_node_hyperdata where node_id IN (' + \
-            ','.join(map(str, Docs_and_["nodesB"].keys())) + ")"+' and hyperdata_id='+str(nodeA_type_id)
-    cursor = connection.cursor()
-    cursor.execute(sql_query)
-    results = cursor.fetchall()
-    A_Freq = {}
-    A_int2str = {}
-    A_str2int = {}
-    counter = max_nodeid+1
-    for i in results:
-        doc_id = i[0]
-        a = i[1]
-        if a not in A_str2int:
-            A_str2int[ a ] = counter
-            A_int2str[counter] = a
-            counter += 1
-    for i in results:
-        doc_id = i[0]
-        a = A_str2int[i[1]]
-        Docs_and_["nodesA"][doc_id] = a
-        if a not in A_Freq:
-            A_Freq[ a ] = 0
-        A_Freq[ a ] += 1
-    # = = = = [ / 04. Getting A-elems and making the dictionaries ] = = = = ]
-    # = = = = [ Filling graph-A ] = = = = ]
-    Graph_A = Utils()
-    for i in NodesB_and_Docs:
-        ngram = i
-        docs = NodesB_and_Docs[i]
-        k_A_clique = {}
-        for doc in docs:
-            k_A = Docs_and_["nodesA"][doc]
-            k_A_clique[k_A] = True
-        if len(k_A_clique.keys())>1:
-            Graph_A.addCompleteSubGraph( k_A_clique.keys() )
-    # = = = = [ / Filling graph-A ] = = = = ]
-    # = = = = [ graph-A to JSON ] = = = = ]
-    A = Graph_A.G
-    for node_id in A.nodes():
-        A.node[node_id]['label']   = A_int2str[node_id]
-        A.node[node_id]['size']    = A_Freq[node_id]
-        A.node[node_id]['type']    = nodeA_type
-        A.node[node_id]['attributes'] = { "clust_default": 1 }
-    A_links = []
-    min_weight = 999999
-    max_weight = -1
-    Weights_Dist = {}
-    for e in A.edges_iter():
-        s = e[0]
-        t = e[1]
-        w = A[s][t]["weight"]
-        if w not in Weights_Dist:
-            Weights_Dist[ w ] = { "freq": 0 , "deleted":0 }
-        Weights_Dist[ w ]["freq"] += 1
-        if min_weight > w:
-            min_weight = w
-        if max_weight < w:
-            max_weight = w
-    edges2remove = []
-    for e in A.edges_iter():
-        s = e[0]
-        t = e[1]
-        w = A[s][t]["weight"]
-        if Weights_Dist [ w ]["freq"] < ( len(A)*3 ): # weight-threshold
-            info = { 
-                "s":s , 
-                "t":t ,
-                "w": w / max_weight # normalization
-            }
-            A_links.append(info)
-        else:
-            # if Weights_Dist [ w ]["deleted"] < round(Weights_Dist [ w ]["freq"]*0.95):
-            atuple = (s,t)
-            edges2remove.append(atuple)
-            Weights_Dist [ w ]["deleted"] += 1
-    A.remove_edges_from( edges2remove )
-    A.remove_nodes_from(nx.isolates(A))
-    data = json_graph.node_link_data(A) # saving nodesA
-    AB = nx.Graph()
-    for i in NodesB_and_Docs:
-        b = i
-        docs = NodesB_and_Docs[i]
-        for doc in docs:
-            a = Docs_and_["nodesA"][doc]
-            if A.has_node(a):
-                AB.add_edge( a , b )
-    AB_links = []
-    for e in AB.edges_iter():
-        info = { "s": e[0], "t": e[1], "w": 1 }
-        AB_links.append(info)
-    data["links"] = A_links + AB_links # saving AA-links and AB-links
-    # = = = = [ / graph-A to JSON ] = = = = ]
-    return data