Merge branch 'samuel' into merge

9d00f536 · delanoe · 13816c28 · 568a9432 · 9d00f536 · 9d00f536
Commit 9d00f536 authored Oct 22, 2015 by delanoe
4 changed files
--- a/analysis/functions.py
+++ b/analysis/functions.py
@@ -110,7 +110,6 @@ def do_distance(cooc_id, field1=None, field2=None, isMonopartite=True):
    #   G.remove_nodes_from(nodes_to_remove)
    partition = best_partition(G.to_undirected())
-    print("Density of the graph:", nx.density(G))
    return(G,partition,ids,weight)
@@ -124,32 +123,35 @@ def get_cooc(request=None, corpus=None
    '''
    get_ccoc : to compute the graph.
    '''
+    data = {}
    #if session.query(Node).filter(Node.type_id==type_cooc_id, Node.parent_id==corpus_id).first() is None:
    print("Coocurrences do not exist yet, create it.")
    miam_id = get_or_create_node(nodetype='MiamList', corpus=corpus).id
    stop_id = get_or_create_node(nodetype='StopList', corpus=corpus).id
    group_id = get_or_create_node(nodetype='Group', corpus=corpus).id
+    SamuelFlag = False
-    if field1 == field2 == 'ngrams' :
+    # if field1 == field2 == 'ngrams' :
-        isMonopartite = True
+    #     isMonopartite = True
-    else:
+    #     SamuelFlag = True
-        isMonopartite = False
+    # else:
+    #     isMonopartite = False
+    isMonopartite = True # Always. So, calcule the graph B and from these B-nodes, build the graph-A
    # data deleted each time
    #cooc_id = get_or_create_node(nodetype='Cooccurrence', corpus=corpus).id
-    cooc_id = do_cooc(corpus=corpus, field1=field1, field2=field2
+    cooc_id = do_cooc(corpus=corpus, field1="ngrams", field2="ngrams"
            , miam_id=miam_id, group_id=group_id, stop_id=stop_id, limit=size
-            , isMonopartite=isMonopartite
+            , isMonopartite=isMonopartite , start=start , end=end , apax=apax)
-            , apax=apax)
-    G, partition, ids, weight = do_distance(cooc_id, field1=field1, field2=field2, isMonopartite=isMonopartite)
+    G, partition, ids, weight = do_distance(cooc_id, field1="ngrams", field2="ngrams", isMonopartite=isMonopartite)
    if type == "node_link":
+        nodesB_dict = {}
        for node_id in G.nodes():
            try:
                #node,type(labels[node])
                G.node[node_id]['pk'] = ids[node_id][1]
+                nodesB_dict [ ids[node_id][1] ] = True
                the_label = session.query(Ngram.terms).filter(Ngram.id==node_id).first()
                the_label = ", ".join(the_label)
                # TODO the query below is not optimized (do it do_distance).
@@ -163,20 +165,39 @@ def get_cooc(request=None, corpus=None
                pass #PrintException()
                #print("error01: ",error)
-        data = json_graph.node_link_data(G)
+        B = json_graph.node_link_data(G)
        links = []
        i=1
        for e in G.edges_iter():
            s = e[0]
            t = e[1]
-            info = { "id":i , "source":ids[s][1] , "target":ids[t][1]}
+            info = { 
+                "s":ids[s][1] , 
+                "t":ids[t][1] ,
+                "w": G[ids[s][1]][ids[t][1]]["weight"]
+            }
            # print(info)
            links.append(info)
            i+=1
-        # print(data)
+            # print(B)
-        data["links"] = []
+        B["links"] = []
-        data["links"] = links
+        B["links"] = links
+        if field1 == field2 == 'ngrams' :
+            data["nodes"] = B["nodes"]
+            data["links"] = B["links"]
+        else:
+            A = get_graphA( "journal" , nodesB_dict , B["links"] , corpus )
+            print("#nodesA:",len(A["nodes"]))
+            print("#linksAA + #linksAB:",len(A["links"]))
+            print("#nodesB:",len(B["nodes"]))
+            print("#linksBB:",len(B["links"]))
+            data["nodes"] = A["nodes"] + B["nodes"]
+            data["links"] = A["links"] + B["links"]
+            print("  total nodes :",len(data["nodes"]))
+            print("  total links :",len(data["links"]))
+            print("")
    elif type == "adjacency":
        for node in G.nodes():
@@ -193,13 +214,186 @@ def get_cooc(request=None, corpus=None
    elif type == 'bestpartition':
        return(partition)
-    #    data = json_graph.node_link_data(G, attrs={\
-    #            'source':'source',\
-    #            'target':'target',\
-    #            'weight':'weight',\
-    #            #'label':'label',\
-    #            #'color':'color',\
-    #            'id':'id',})
-    #print(data)
    return(data)
+def get_graphA( nodeA_type , NodesB , links , corpus ):
+    from analysis.InterUnion import Utils
+    print(" = = = == = = = ")
+    print("In get_graphA(), corpus id:",corpus.id)
+    nodeA_type_id = cache.Hyperdata[nodeA_type].id
+    threshold_cotainf = 0.05
+    max_nodeid = -1
+    for nodeid in NodesB:
+    	if nodeid > max_nodeid:
+    		max_nodeid = nodeid
+    # = = = = [ 01. Getting ALL documents of the Corpus c ] = = = =  #
+    Docs = {}
+    document_type_id = cache.NodeType['Document'].id
+    sql_query = 'select id from node_node where parent_id='+str(corpus.id)+' and type_id='+str(document_type_id)
+    cursor = connection.cursor()
+    cursor.execute(sql_query)
+    results = cursor.fetchall()
+    for i in results:
+        Docs[i[0]] = True
+    print("docs:",len(Docs.keys()))
+    # = = = = [ / 01. Getting ALL documents of the Corpus c ] = = = =  #
+    # = = = = [ 02. Getting ALL Documents related with Ngrams of the carte semantic  ] = = = =  #
+    sql_query = 'select nodey_id,ngram_id from node_nodenodengram where ngram_id IN (' + ','.join(map(str, NodesB.keys())) + ")"
+    cursor = connection.cursor()
+    cursor.execute(sql_query)
+    results = cursor.fetchall()
+    # = = = = [ / 02. Getting ALL Documents related with Ngrams of the carte semantic  ] = = = =  #
+    # = = = = [ 03. Now we limit the retrieved Documents(step 02) to those belonging to the Corpus c ] = = = = ]
+    Docs_and_ = {
+        "nodesA":{},
+        "nodesB":{}
+    }
+    NodesB_and_Docs = {}
+    for i in results:
+        doc_id = i[0]
+        ngram_id = i[1]
+        if ngram_id in NodesB and doc_id in Docs:
+            if doc_id not in Docs_and_["nodesB"]:
+                Docs_and_["nodesB"][doc_id] = []
+            Docs_and_["nodesB"][doc_id].append( ngram_id )
+            if ngram_id not in NodesB_and_Docs:
+                NodesB_and_Docs[ngram_id] = []
+            NodesB_and_Docs[ngram_id].append( doc_id )
+    # = = = = [ / 03. Now we limit the retrieved Documents(step 02) to those belonging to the Corpus c ] = = = = ]
+    # # = = = = [ Getting Authors ] = = = = ]
+    # Authors = {}
+    # sql_query = 'select node_id,value_string from node_node_hyperdata where node_id IN (' + ','.join(map(str, Docs_and_["nodesB"].keys())) + ")"+' and hyperdata_id=10'# 10 -> authors
+    # cursor = connection.cursor()
+    # cursor.execute(sql_query)
+    # results = cursor.fetchall()
+    # for i in results:
+    #     doc_id = i[0]
+    #     authors = i[1].split(",")
+    #     for a in authors:
+    #         if a not in Authors:
+    #             Authors[a] = 0
+    #         Authors[a] += 1
+    # print("")
+    # print("#authors:")
+    # import pprint
+    # pprint.pprint(Authors)
+    # print("")
+    # # = = = = [ / Getting Authors ] = = = = ]
+    # = = = = [ 04. Getting A-elems and making the dictionaries] = = = = ]
+    sql_query = 'select node_id,value_string from node_node_hyperdata where node_id IN (' + ','.join(map(str, Docs_and_["nodesB"].keys())) + ")"+' and hyperdata_id='+str(nodeA_type_id)
+    cursor = connection.cursor()
+    cursor.execute(sql_query)
+    results = cursor.fetchall()
+    A_Freq = {}
+    A_int2str = {}
+    A_str2int = {}
+    counter = max_nodeid+1
+    for i in results:
+        doc_id = i[0]
+        a = i[1]
+        if a not in A_str2int:
+            A_str2int[ a ] = counter
+            A_int2str[counter] = a
+            counter += 1
+    for i in results:
+        doc_id = i[0]
+        a = A_str2int[i[1]]
+        Docs_and_["nodesA"][doc_id] = a
+        if a not in A_Freq:
+            A_Freq[ a ] = 0
+        A_Freq[ a ] += 1
+    # = = = = [ / 04. Getting A-elems and making the dictionaries ] = = = = ]
+    # = = = = [ Filling graph-A ] = = = = ]
+    Graph_A = Utils()
+    for i in NodesB_and_Docs:
+        ngram = i
+        docs = NodesB_and_Docs[i]
+        k_A_clique = {}
+        for doc in docs:
+            k_A = Docs_and_["nodesA"][doc]
+            k_A_clique[k_A] = True
+        if len(k_A_clique.keys())>1:
+            Graph_A.addCompleteSubGraph( k_A_clique.keys() )
+    # = = = = [ / Filling graph-A ] = = = = ]
+    # = = = = [ graph-A to JSON ] = = = = ]
+    A = Graph_A.G
+    for node_id in A.nodes():
+        A.node[node_id]['label']   = A_int2str[node_id]
+        A.node[node_id]['size']    = A_Freq[node_id]
+        A.node[node_id]['type']    = nodeA_type
+        A.node[node_id]['attributes'] = { "clust_default": 1 }
+    A_links = []
+    min_weight = 999999
+    max_weight = -1
+    Weights_Dist = {}
+    for e in A.edges_iter():
+        s = e[0]
+        t = e[1]
+        w = A[s][t]["weight"]
+        if w not in Weights_Dist:
+            Weights_Dist[ w ] = { "freq": 0 , "deleted":0 }
+        Weights_Dist[ w ]["freq"] += 1
+        if min_weight > w:
+            min_weight = w
+        if max_weight < w:
+            max_weight = w
+    edges2remove = []
+    for e in A.edges_iter():
+        s = e[0]
+        t = e[1]
+        w = A[s][t]["weight"]
+        if Weights_Dist [ w ]["freq"] < ( len(A)*3 ): # weight-threshold
+            info = { 
+                "s":s , 
+                "t":t ,
+                "w": w / max_weight # normalization
+            }
+            A_links.append(info)
+        else:
+            # if Weights_Dist [ w ]["deleted"] < round(Weights_Dist [ w ]["freq"]*0.95):
+            atuple = (s,t)
+            edges2remove.append(atuple)
+            Weights_Dist [ w ]["deleted"] += 1
+    A.remove_edges_from( edges2remove )
+    A.remove_nodes_from(nx.isolates(A))
+    data = json_graph.node_link_data(A) # saving nodesA
+    AB = nx.Graph()
+    for i in NodesB_and_Docs:
+        b = i
+        docs = NodesB_and_Docs[i]
+        for doc in docs:
+            a = Docs_and_["nodesA"][doc]
+            if A.has_node(a):
+                AB.add_edge( a , b )
+    AB_links = []
+    for e in AB.edges_iter():
+        info = { "s": e[0], "t": e[1], "w": 1 }
+        AB_links.append(info)
+    data["links"] = A_links + AB_links # saving AA-links and AB-links
+    # = = = = [ / graph-A to JSON ] = = = = ]
+    return data
--- a/static/js/Docs_dyna_chart_and_table.js
+++ b/static/js/Docs_dyna_chart_and_table.js
@@ -24,9 +24,15 @@ var latest,oldest;
 var TheBuffer = false
 function Push2Buffer( NewVal ) {
+    console.log( " = = = = = = = = " )
+    console.log( "Push2Buffer()" )
+    console.log( "\t"+NewVal )
    if ( TheBuffer == false) {
        if( ! NewVal ) {
-            var limits = [ new Date( oldest[0],oldest[1],oldest[2] ) , new Date( latest[0],latest[1],latest[2] ) ];
+            // var limits = [ new Date( oldest[0],oldest[1],oldest[2] ) , new Date( latest[0],latest[1],latest[2] ) ];
+            var limits = [new Date(oldest[0],oldest[1]-1,oldest[2]), new Date(latest[0],latest[1]-1,latest[2] ) ];
+            limits[0] = new Date(limits[0].setDate(limits[0].getDate()-1) );
+            limits[1] = new Date(limits[1].setDate(limits[1].getDate()+1) );
            NewVal = limits;
        }
        console.log( " - - - - - - " )
@@ -42,7 +48,9 @@ function Push2Buffer( NewVal ) {
        var past = TheBuffer[0]+"_"+TheBuffer[1]
        if( ! NewVal ) {
-            var limits = [ new Date( oldest[0],oldest[1],oldest[2] ) , new Date( latest[0],latest[1],latest[2] ) ];
+            var limits = [new Date(oldest[0],oldest[1]-1,oldest[2]), new Date(latest[0],latest[1]-1,latest[2] ) ];
+            limits[0] = new Date(limits[0].setDate(limits[0].getDate()-1) );
+            limits[1] = new Date(limits[1].setDate(limits[1].getDate()+1) );
            NewVal = limits;
        }
        var now = NewVal[0]+"_"+NewVal[1]
@@ -295,8 +303,9 @@ function Main_test( Data , SearchFilter ) {
  oldest = t0;
  latest = t1;
+  TheBuffer = [new Date(t0[0],t0[1]-1,t0[2]), new Date(t1[0],t1[1]-1,t1[2] ) ];
-  TheBuffer = [new Date(t0[0],(t0[1]-1),t0[2]), new Date(t1[0],(t1[1]-1),t1[2]+1)];
+  TheBuffer[0] = new Date(TheBuffer[0].setDate(TheBuffer[0].getDate()-1) );
+  TheBuffer[1] = new Date(TheBuffer[1].setDate(TheBuffer[1].getDate()+1) );
  var arrayd3 = []
  for(var e in Data) {
@@ -397,7 +406,7 @@ function Main_test( Data , SearchFilter ) {
          .group(volumeByMonthGroup)
          .centerBar(true)
          .gap(0)
-          .x(d3.time.scale().domain([new Date(t0[0],t0[1],t0[2]), new Date(t1[0],t1[1],t1[2])]))
+          .x(d3.time.scale().domain([TheBuffer[0], TheBuffer[1] ]))
          .round(d3.time.month.round)
          .xUnits(d3.time.months)
          .renderlet(function (chart) {

--- a/static/js/NGrams_dyna_chart_and_table.js
+++ b/static/js/NGrams_dyna_chart_and_table.js
@@ -24,28 +24,24 @@ var latest,oldest;
 var TheBuffer = false
-var PossibleActions = []
+var PossibleActions = [
+	{
+	  "id":"to_delete",
+	  "name": "Delete",
+	  "color":"red"
+	}, 
+	// {
+	//   "id":"to_keep",
+	//   "name": "Keep",
+	//   "color":"green"
+	// }, 
+	// {
+	//   "id":"to_group",
+	//   "name": "Group",
+	//   "color":"blue"
+	// }
+]
-var action1 = {
-  "id":"to_delete",
-  "name": "Delete",
-  "color":"red"
-}
-// var action2 = {
-//   "id":"to_keep",
-//   "name": "Keep",
-//   "color":"green"
-// }
-// var action3 = {
-//   "id":"to_group",
-//   "name": "Group",
-//   "color":"blue"
-// }
-PossibleActions.push(action1)
-// PossibleActions.push(action2)
-// PossibleActions.push(action3)
 var FlagsBuffer = {}
 for(var i in PossibleActions) {

--- a/templates/corpus/menu.html
+++ b/templates/corpus/menu.html
@@ -95,22 +95,53 @@
 				<div class="col-md-6">
 						<div class="jumbotron">
-								{% if processing > 0 %}
+				{% if processing > 0 %}
-										<h3> <img width="20px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img> Graph (later)</h3>
+						<h3> <img width="20px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img> Networks (later)</h3>
+                        <ol>
+                                <li>Terms</li>
+                                <li>Journals and Terms</li>
+                                <li>Authors and Terms</li>
+                        </ol>
                {% else %}
-                                <h3><a href="/project/{{project.id}}/corpus/{{ corpus.id }}/explorer">Graph</a></h3>
+                        <h3> Networks </h3>
-                {% endif %}
                        <ol>
-                                <li>Visualize</li>
+                                <li data-url="/project/{{project.id}}/corpus/{{ corpus.id }}/explorer?field1=ngrams&amp;field2=ngrams" onclick='gotoexplorer(this)'><a>Terms</a></li>
-                                <li>Explore</li>
+                                <li data-url="/project/{{project.id}}/corpus/{{ corpus.id }}/explorer?field1=journal&amp;field2=ngrams" onclick='gotoexplorer(this)'><a>Journals and Terms</a></li>
-                                <li>Read</li>
+                                <li>Authors and Terms</li>
                        </ol>
+                {% endif %}
                        <h4><a href="/project/{{project.id}}/corpus/{{corpus.id}}/">Back to corpus</a></h3>
                        </div>
                </div>
        </div>
 </div>
+<script type="text/javascript">
+	function gotoexplorer(elem) {
+		var url_ = $(elem).data("url")
+		if (TheBuffer==false)
+			return window.open(url_,'_blank');
+        var current_timerange = TheBuffer
+		var time_limits = [new Date(oldest[0],oldest[1]-1,oldest[2]), new Date(latest[0],latest[1]-1,latest[2] ) ];
+		time_limits[0] = new Date(time_limits[0].setDate(time_limits[0].getDate()-1) );
+		time_limits[1] = new Date(time_limits[1].setDate(time_limits[1].getDate()+1) );
+		if( ( +current_timerange[0]===+time_limits[0] ) &&  ( +current_timerange[1]===+time_limits[1] ) ) {
+			url_ = url_ // rien
+		} else {
+			var start__ = new Date(current_timerange[0].setDate(current_timerange[0].getDate()+1) );
+			var end__ = new Date(current_timerange[1].setDate(current_timerange[1].getDate()-1) );
+			var start_ = start__.getFullYear()+"-"+(start__.getMonth()+1)+"-"+(start__.getDay()+1)
+			var end_ = end__.getFullYear()+"-"+(end__.getMonth()+1)+"-"+(end__.getDay()+1)
+			url_ += "&start=" + start_ + "&end="+end_;
+			// url_ += "&start=" + start__.getFullYear() + "&end="+end__.getFullYear();
+		}
+		return window.open(url_,'_blank');
+	}
+</script>
 {% endblock %}