Commit 9d00f536 authored by delanoe's avatar delanoe

Merge branch 'samuel' into merge

parents 13816c28 568a9432
......@@ -110,7 +110,6 @@ def do_distance(cooc_id, field1=None, field2=None, isMonopartite=True):
# G.remove_nodes_from(nodes_to_remove)
partition = best_partition(G.to_undirected())
print("Density of the graph:", nx.density(G))
return(G,partition,ids,weight)
......@@ -124,32 +123,35 @@ def get_cooc(request=None, corpus=None
'''
get_ccoc : to compute the graph.
'''
data = {}
#if session.query(Node).filter(Node.type_id==type_cooc_id, Node.parent_id==corpus_id).first() is None:
print("Coocurrences do not exist yet, create it.")
miam_id = get_or_create_node(nodetype='MiamList', corpus=corpus).id
stop_id = get_or_create_node(nodetype='StopList', corpus=corpus).id
group_id = get_or_create_node(nodetype='Group', corpus=corpus).id
if field1 == field2 == 'ngrams' :
isMonopartite = True
else:
isMonopartite = False
SamuelFlag = False
# if field1 == field2 == 'ngrams' :
# isMonopartite = True
# SamuelFlag = True
# else:
# isMonopartite = False
isMonopartite = True # Always. So, calcule the graph B and from these B-nodes, build the graph-A
# data deleted each time
#cooc_id = get_or_create_node(nodetype='Cooccurrence', corpus=corpus).id
cooc_id = do_cooc(corpus=corpus, field1=field1, field2=field2
cooc_id = do_cooc(corpus=corpus, field1="ngrams", field2="ngrams"
, miam_id=miam_id, group_id=group_id, stop_id=stop_id, limit=size
, isMonopartite=isMonopartite
, apax=apax)
, isMonopartite=isMonopartite , start=start , end=end , apax=apax)
G, partition, ids, weight = do_distance(cooc_id, field1=field1, field2=field2, isMonopartite=isMonopartite)
G, partition, ids, weight = do_distance(cooc_id, field1="ngrams", field2="ngrams", isMonopartite=isMonopartite)
if type == "node_link":
nodesB_dict = {}
for node_id in G.nodes():
try:
#node,type(labels[node])
G.node[node_id]['pk'] = ids[node_id][1]
nodesB_dict [ ids[node_id][1] ] = True
the_label = session.query(Ngram.terms).filter(Ngram.id==node_id).first()
the_label = ", ".join(the_label)
# TODO the query below is not optimized (do it do_distance).
......@@ -163,20 +165,39 @@ def get_cooc(request=None, corpus=None
pass #PrintException()
#print("error01: ",error)
data = json_graph.node_link_data(G)
B = json_graph.node_link_data(G)
links = []
i=1
for e in G.edges_iter():
s = e[0]
t = e[1]
info = { "id":i , "source":ids[s][1] , "target":ids[t][1]}
info = {
"s":ids[s][1] ,
"t":ids[t][1] ,
"w": G[ids[s][1]][ids[t][1]]["weight"]
}
# print(info)
links.append(info)
i+=1
# print(data)
data["links"] = []
data["links"] = links
# print(B)
B["links"] = []
B["links"] = links
if field1 == field2 == 'ngrams' :
data["nodes"] = B["nodes"]
data["links"] = B["links"]
else:
A = get_graphA( "journal" , nodesB_dict , B["links"] , corpus )
print("#nodesA:",len(A["nodes"]))
print("#linksAA + #linksAB:",len(A["links"]))
print("#nodesB:",len(B["nodes"]))
print("#linksBB:",len(B["links"]))
data["nodes"] = A["nodes"] + B["nodes"]
data["links"] = A["links"] + B["links"]
print(" total nodes :",len(data["nodes"]))
print(" total links :",len(data["links"]))
print("")
elif type == "adjacency":
for node in G.nodes():
......@@ -193,13 +214,186 @@ def get_cooc(request=None, corpus=None
elif type == 'bestpartition':
return(partition)
# data = json_graph.node_link_data(G, attrs={\
# 'source':'source',\
# 'target':'target',\
# 'weight':'weight',\
# #'label':'label',\
# #'color':'color',\
# 'id':'id',})
#print(data)
return(data)
def get_graphA( nodeA_type , NodesB , links , corpus ):
from analysis.InterUnion import Utils
print(" = = = == = = = ")
print("In get_graphA(), corpus id:",corpus.id)
nodeA_type_id = cache.Hyperdata[nodeA_type].id
threshold_cotainf = 0.05
max_nodeid = -1
for nodeid in NodesB:
if nodeid > max_nodeid:
max_nodeid = nodeid
# = = = = [ 01. Getting ALL documents of the Corpus c ] = = = = #
Docs = {}
document_type_id = cache.NodeType['Document'].id
sql_query = 'select id from node_node where parent_id='+str(corpus.id)+' and type_id='+str(document_type_id)
cursor = connection.cursor()
cursor.execute(sql_query)
results = cursor.fetchall()
for i in results:
Docs[i[0]] = True
print("docs:",len(Docs.keys()))
# = = = = [ / 01. Getting ALL documents of the Corpus c ] = = = = #
# = = = = [ 02. Getting ALL Documents related with Ngrams of the carte semantic ] = = = = #
sql_query = 'select nodey_id,ngram_id from node_nodenodengram where ngram_id IN (' + ','.join(map(str, NodesB.keys())) + ")"
cursor = connection.cursor()
cursor.execute(sql_query)
results = cursor.fetchall()
# = = = = [ / 02. Getting ALL Documents related with Ngrams of the carte semantic ] = = = = #
# = = = = [ 03. Now we limit the retrieved Documents(step 02) to those belonging to the Corpus c ] = = = = ]
Docs_and_ = {
"nodesA":{},
"nodesB":{}
}
NodesB_and_Docs = {}
for i in results:
doc_id = i[0]
ngram_id = i[1]
if ngram_id in NodesB and doc_id in Docs:
if doc_id not in Docs_and_["nodesB"]:
Docs_and_["nodesB"][doc_id] = []
Docs_and_["nodesB"][doc_id].append( ngram_id )
if ngram_id not in NodesB_and_Docs:
NodesB_and_Docs[ngram_id] = []
NodesB_and_Docs[ngram_id].append( doc_id )
# = = = = [ / 03. Now we limit the retrieved Documents(step 02) to those belonging to the Corpus c ] = = = = ]
# # = = = = [ Getting Authors ] = = = = ]
# Authors = {}
# sql_query = 'select node_id,value_string from node_node_hyperdata where node_id IN (' + ','.join(map(str, Docs_and_["nodesB"].keys())) + ")"+' and hyperdata_id=10'# 10 -> authors
# cursor = connection.cursor()
# cursor.execute(sql_query)
# results = cursor.fetchall()
# for i in results:
# doc_id = i[0]
# authors = i[1].split(",")
# for a in authors:
# if a not in Authors:
# Authors[a] = 0
# Authors[a] += 1
# print("")
# print("#authors:")
# import pprint
# pprint.pprint(Authors)
# print("")
# # = = = = [ / Getting Authors ] = = = = ]
# = = = = [ 04. Getting A-elems and making the dictionaries] = = = = ]
sql_query = 'select node_id,value_string from node_node_hyperdata where node_id IN (' + ','.join(map(str, Docs_and_["nodesB"].keys())) + ")"+' and hyperdata_id='+str(nodeA_type_id)
cursor = connection.cursor()
cursor.execute(sql_query)
results = cursor.fetchall()
A_Freq = {}
A_int2str = {}
A_str2int = {}
counter = max_nodeid+1
for i in results:
doc_id = i[0]
a = i[1]
if a not in A_str2int:
A_str2int[ a ] = counter
A_int2str[counter] = a
counter += 1
for i in results:
doc_id = i[0]
a = A_str2int[i[1]]
Docs_and_["nodesA"][doc_id] = a
if a not in A_Freq:
A_Freq[ a ] = 0
A_Freq[ a ] += 1
# = = = = [ / 04. Getting A-elems and making the dictionaries ] = = = = ]
# = = = = [ Filling graph-A ] = = = = ]
Graph_A = Utils()
for i in NodesB_and_Docs:
ngram = i
docs = NodesB_and_Docs[i]
k_A_clique = {}
for doc in docs:
k_A = Docs_and_["nodesA"][doc]
k_A_clique[k_A] = True
if len(k_A_clique.keys())>1:
Graph_A.addCompleteSubGraph( k_A_clique.keys() )
# = = = = [ / Filling graph-A ] = = = = ]
# = = = = [ graph-A to JSON ] = = = = ]
A = Graph_A.G
for node_id in A.nodes():
A.node[node_id]['label'] = A_int2str[node_id]
A.node[node_id]['size'] = A_Freq[node_id]
A.node[node_id]['type'] = nodeA_type
A.node[node_id]['attributes'] = { "clust_default": 1 }
A_links = []
min_weight = 999999
max_weight = -1
Weights_Dist = {}
for e in A.edges_iter():
s = e[0]
t = e[1]
w = A[s][t]["weight"]
if w not in Weights_Dist:
Weights_Dist[ w ] = { "freq": 0 , "deleted":0 }
Weights_Dist[ w ]["freq"] += 1
if min_weight > w:
min_weight = w
if max_weight < w:
max_weight = w
edges2remove = []
for e in A.edges_iter():
s = e[0]
t = e[1]
w = A[s][t]["weight"]
if Weights_Dist [ w ]["freq"] < ( len(A)*3 ): # weight-threshold
info = {
"s":s ,
"t":t ,
"w": w / max_weight # normalization
}
A_links.append(info)
else:
# if Weights_Dist [ w ]["deleted"] < round(Weights_Dist [ w ]["freq"]*0.95):
atuple = (s,t)
edges2remove.append(atuple)
Weights_Dist [ w ]["deleted"] += 1
A.remove_edges_from( edges2remove )
A.remove_nodes_from(nx.isolates(A))
data = json_graph.node_link_data(A) # saving nodesA
AB = nx.Graph()
for i in NodesB_and_Docs:
b = i
docs = NodesB_and_Docs[i]
for doc in docs:
a = Docs_and_["nodesA"][doc]
if A.has_node(a):
AB.add_edge( a , b )
AB_links = []
for e in AB.edges_iter():
info = { "s": e[0], "t": e[1], "w": 1 }
AB_links.append(info)
data["links"] = A_links + AB_links # saving AA-links and AB-links
# = = = = [ / graph-A to JSON ] = = = = ]
return data
......@@ -24,9 +24,15 @@ var latest,oldest;
var TheBuffer = false
function Push2Buffer( NewVal ) {
console.log( " = = = = = = = = " )
console.log( "Push2Buffer()" )
console.log( "\t"+NewVal )
if ( TheBuffer == false) {
if( ! NewVal ) {
var limits = [ new Date( oldest[0],oldest[1],oldest[2] ) , new Date( latest[0],latest[1],latest[2] ) ];
// var limits = [ new Date( oldest[0],oldest[1],oldest[2] ) , new Date( latest[0],latest[1],latest[2] ) ];
var limits = [new Date(oldest[0],oldest[1]-1,oldest[2]), new Date(latest[0],latest[1]-1,latest[2] ) ];
limits[0] = new Date(limits[0].setDate(limits[0].getDate()-1) );
limits[1] = new Date(limits[1].setDate(limits[1].getDate()+1) );
NewVal = limits;
}
console.log( " - - - - - - " )
......@@ -42,7 +48,9 @@ function Push2Buffer( NewVal ) {
var past = TheBuffer[0]+"_"+TheBuffer[1]
if( ! NewVal ) {
var limits = [ new Date( oldest[0],oldest[1],oldest[2] ) , new Date( latest[0],latest[1],latest[2] ) ];
var limits = [new Date(oldest[0],oldest[1]-1,oldest[2]), new Date(latest[0],latest[1]-1,latest[2] ) ];
limits[0] = new Date(limits[0].setDate(limits[0].getDate()-1) );
limits[1] = new Date(limits[1].setDate(limits[1].getDate()+1) );
NewVal = limits;
}
var now = NewVal[0]+"_"+NewVal[1]
......@@ -295,8 +303,9 @@ function Main_test( Data , SearchFilter ) {
oldest = t0;
latest = t1;
TheBuffer = [new Date(t0[0],(t0[1]-1),t0[2]), new Date(t1[0],(t1[1]-1),t1[2]+1)];
TheBuffer = [new Date(t0[0],t0[1]-1,t0[2]), new Date(t1[0],t1[1]-1,t1[2] ) ];
TheBuffer[0] = new Date(TheBuffer[0].setDate(TheBuffer[0].getDate()-1) );
TheBuffer[1] = new Date(TheBuffer[1].setDate(TheBuffer[1].getDate()+1) );
var arrayd3 = []
for(var e in Data) {
......@@ -397,7 +406,7 @@ function Main_test( Data , SearchFilter ) {
.group(volumeByMonthGroup)
.centerBar(true)
.gap(0)
.x(d3.time.scale().domain([new Date(t0[0],t0[1],t0[2]), new Date(t1[0],t1[1],t1[2])]))
.x(d3.time.scale().domain([TheBuffer[0], TheBuffer[1] ]))
.round(d3.time.month.round)
.xUnits(d3.time.months)
.renderlet(function (chart) {
......
......@@ -24,28 +24,24 @@ var latest,oldest;
var TheBuffer = false
var PossibleActions = []
var PossibleActions = [
{
"id":"to_delete",
"name": "Delete",
"color":"red"
},
// {
// "id":"to_keep",
// "name": "Keep",
// "color":"green"
// },
// {
// "id":"to_group",
// "name": "Group",
// "color":"blue"
// }
]
var action1 = {
"id":"to_delete",
"name": "Delete",
"color":"red"
}
// var action2 = {
// "id":"to_keep",
// "name": "Keep",
// "color":"green"
// }
// var action3 = {
// "id":"to_group",
// "name": "Group",
// "color":"blue"
// }
PossibleActions.push(action1)
// PossibleActions.push(action2)
// PossibleActions.push(action3)
var FlagsBuffer = {}
for(var i in PossibleActions) {
......
......@@ -95,22 +95,53 @@
<div class="col-md-6">
<div class="jumbotron">
{% if processing > 0 %}
<h3> <img width="20px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img> Graph (later)</h3>
{% if processing > 0 %}
<h3> <img width="20px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img> Networks (later)</h3>
<ol>
<li>Terms</li>
<li>Journals and Terms</li>
<li>Authors and Terms</li>
</ol>
{% else %}
<h3><a href="/project/{{project.id}}/corpus/{{ corpus.id }}/explorer">Graph</a></h3>
{% endif %}
<h3> Networks </h3>
<ol>
<li>Visualize</li>
<li>Explore</li>
<li>Read</li>
<li data-url="/project/{{project.id}}/corpus/{{ corpus.id }}/explorer?field1=ngrams&amp;field2=ngrams" onclick='gotoexplorer(this)'><a>Terms</a></li>
<li data-url="/project/{{project.id}}/corpus/{{ corpus.id }}/explorer?field1=journal&amp;field2=ngrams" onclick='gotoexplorer(this)'><a>Journals and Terms</a></li>
<li>Authors and Terms</li>
</ol>
{% endif %}
<h4><a href="/project/{{project.id}}/corpus/{{corpus.id}}/">Back to corpus</a></h3>
</div>
</div>
</div>
</div>
<script type="text/javascript">
function gotoexplorer(elem) {
var url_ = $(elem).data("url")
if (TheBuffer==false)
return window.open(url_,'_blank');
var current_timerange = TheBuffer
var time_limits = [new Date(oldest[0],oldest[1]-1,oldest[2]), new Date(latest[0],latest[1]-1,latest[2] ) ];
time_limits[0] = new Date(time_limits[0].setDate(time_limits[0].getDate()-1) );
time_limits[1] = new Date(time_limits[1].setDate(time_limits[1].getDate()+1) );
if( ( +current_timerange[0]===+time_limits[0] ) && ( +current_timerange[1]===+time_limits[1] ) ) {
url_ = url_ // rien
} else {
var start__ = new Date(current_timerange[0].setDate(current_timerange[0].getDate()+1) );
var end__ = new Date(current_timerange[1].setDate(current_timerange[1].getDate()-1) );
var start_ = start__.getFullYear()+"-"+(start__.getMonth()+1)+"-"+(start__.getDay()+1)
var end_ = end__.getFullYear()+"-"+(end__.getMonth()+1)+"-"+(end__.getDay()+1)
url_ += "&start=" + start_ + "&end="+end_;
// url_ += "&start=" + start__.getFullYear() + "&end="+end__.getFullYear();
}
return window.open(url_,'_blank');
}
</script>
{% endblock %}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment