Commit d414ea78 authored by Castillo's avatar Castillo

stable version

parent bed4825d
...@@ -3,8 +3,6 @@ ...@@ -3,8 +3,6 @@
import findspark import findspark
findspark.init() findspark.init()
from pyspark import SparkContext
# from pyspark import SparkConf
from pyspark.mllib.fpm import FPGrowth from pyspark.mllib.fpm import FPGrowth
from InterUnion import Utils from InterUnion import Utils
import simplejson as json import simplejson as json
...@@ -188,17 +186,8 @@ class Period: ...@@ -188,17 +186,8 @@ class Period:
class Phylo: class Phylo:
def __init__(self , t=[] , minK=4 , minJ=0.0 , memm="4g" , ncores="12" ): def __init__(self , t=[] , minK=4 , minJ=0.0 , spark_context=False , ncores=10 ):
SparkContext.setSystemProperty('spark.executor.memory', memm) self.sc = spark_context
SparkContext.setSystemProperty('spark.driver.memory', memm)
SparkContext.setSystemProperty('spark.executor.cores', ncores)
SparkContext.setSystemProperty('spark.driver.cores', ncores)
self.sc = SparkContext("local["+ncores+"]","simple app")
print("")
print("")
pprint.pprint( self.sc._conf.getAll() )
print("")
print("")
self.utls = Utils() self.utls = Utils()
# self.years = t #list( range( t[0], t[1]+1 ) ) # combinations(self.years, 2) # self.years = t #list( range( t[0], t[1]+1 ) ) # combinations(self.years, 2)
self.p = { self.p = {
...@@ -226,6 +215,7 @@ class Phylo: ...@@ -226,6 +215,7 @@ class Phylo:
self.minjacc = minJ self.minjacc = minJ
# self.KxC = {} # self.KxC = {}
def get_atts( self , scn ): def get_atts( self , scn ):
d = { d = {
"sc" : scn , "sc" : scn ,
...@@ -343,99 +333,213 @@ class Phylo: ...@@ -343,99 +333,213 @@ class Phylo:
for idx in self.phylomm: for idx in self.phylomm:
if self.phylomm[idx]["count"] > 0: if self.phylomm[idx]["count"] > 0:
found_distances += self.phylomm[idx]["rdd_"].filter( lambda x: x[0]>=jacc_min ).collect() found_distances += self.phylomm[idx]["rdd_"].filter( lambda x: x[0]>=jacc_min ).collect()
for i in found_distances: # for i in found_distances:
print(i) # print(i)
print( "\t",jacc_min,"-> |JACCARD|:",len(found_distances) ) print( "\t",jacc_min,"-> |JACCARD|:",len(found_distances) )
timerange = [ 1982 , 2014 ] timerange = [ 1982 , 2014 ]
phylojson = lll.export_phylo( liens=found_distances , T=timerange , jacc_min=jacc_min ) phylojson = lll.export_phylo( liens=found_distances , T=timerange , jacc_min=jacc_min )
# for i in phylojson["nodes"]:
# print( i )
# print(phylojson["nodes"][i])
# print("")
# print(" - - ")
# for i in phylojson["links"]:
# print( i )
# print("")
# print(" - - ")
# print(" - - ")
# print(" - - ")
# print(" - - ")
# print(" - - ")
# print(" - - ")
# print(" - - ")
# print(" - - ")
nodes_md = self.get_opossites( found_distances ) nodes_md = self.get_opossites( found_distances )
nB2A = {} nB2A = {}
nA2B = {} nA2B = {}
for ID in nodes_md: NodesD_i2s = {}
NodesD_s2i = {}
NodesC = 0
for IDA_o in nodes_md:
IDA_s = "A_"+str(IDA_o)
if IDA_s not in NodesD_s2i:
NodesC += 1
NodesD_i2s[ NodesC ] = {
"ID_o": IDA_o,
"ID_s": IDA_s,
"ID_i": NodesC
}
NodesD_s2i[ IDA_s ] = NodesC
items_ = {} items_ = {}
for ii in nodes_md[ID]["items"]: for ii in nodes_md[IDA_o]["items"]:
items_[ii] = True IDB_s = "B_"+str( ii )
nA2B[ ID ] = items_ if IDB_s not in NodesD_s2i:
NodesC += 1
NodesD_i2s[ NodesC ] = {
"ID_o": ii,
"ID_s": IDB_s,
"ID_i": NodesC
}
NodesD_s2i[ IDB_s ] = NodesC
IDB_i = NodesD_s2i[ IDB_s ]
items_[ IDB_i ] = True
nA2B[ NodesD_s2i[ IDA_s ] ] = items_
for i in items_: for i in items_:
if i not in nB2A: if i not in nB2A:
nB2A[i] = {} nB2A[i] = {}
nB2A[i][ID] = True nB2A[i][ NodesD_s2i[ IDA_s ] ] = True
from n_partite_graph import nPartiteGraph from n_partite_graph import nPartiteGraph
bg = nPartiteGraph() bg = nPartiteGraph()
ress = bg.BiGraph_2( nA2B , nB2A ) graph_b = bg.BiGraph_2( nA2B , nB2A )
graphArray = ress[0] # [graphArray , nodesA_toB.keys() , nodesB_toA.keys() , len(Links)] GraphB = graph_b["G"]
if GraphB.number_of_edges()==0:
return { "nodes": [] , "links": [] }
Nodes = []
Links = []
for n in GraphB.nodes_iter():
# label = NodesD_i2s[ n ]["ID_s"]
node = {
"attributes":{},
"r_id": NodesD_i2s[ n ]["ID_s"],
"id": n,
"label": str( NodesD_i2s[ n ]["ID_o"] ),
"type": "mesh_term",
"size": GraphB.degree(n)
}
Nodes.append(node)
# print( node )
# print("")
# print("")
# print(" - - - - ")
# print(" - - - - ")
# print(" - - - - ")
# print("")
# print("")
C_liens = len( Links )+1
for e in GraphB.edges_iter():
s = e[0]
t = e[1]
link = {
"id": C_liens,
"s":s,
"t":t,
"w": GraphB[s][t]["weight"]
}
Links.append(link)
# print( "\t" , s ,"->", t )
# print( NodesD_i2s[ s ] )
# print( NodesD_i2s[ t ] )
# print("")
C_liens += 1
# print("")
# print("now links:")
# print("")
C_liens = len( Links )+1
for i in phylojson["links"]:
s_ = i["s"]
t_ = i["t"]
# print( "\tphylojson" , s_ ,"->", t_ )
# print( phylojson["nodes"][s_] )
# print( phylojson["nodes"][t_] )
s = "A_"+str(s_)
t = "A_"+str(t_)
# print( NodesD_s2i[ s ] ,"->", NodesD_s2i[ t ] )
ID_s = NodesD_s2i[ s ]
ID_t = NodesD_s2i[ t ]
link = {
"id": C_liens,
"s":ID_s,
"t":ID_t,
"type":"line",
"w": i["w"]
}
Links.append( link )
C_liens += 1
# for i in graphArray["nodes"]:
# print("_ ",i)
# print("_ ","")
# print("_ "," - - - - - - - - -")
graphArray["links"] += phylojson["links"]
# Links += phylojson["links"]
C_liens = len( graphArray["links"] )+1
C_liens = len( Links )+1
# print("") # print("")
# print(" - - - - - -") # print(" - - - - - -")
for cID in phylojson["nodes"]: for cID in phylojson["nodes"]:
# print( cID) ID_s = "A_"+str(cID)
# if i["label"] in phylojson["nodes"]: try:
graphArray["nodes"].append( phylojson["nodes"][cID] ) ID_i = NodesD_s2i[ ID_s ]
if cID in nA2B: # print( cID ,":",ID_i )
for ngram in nA2B[cID]:
# print( "\t",ngram ) node_ = phylojson["nodes"][cID]
node_["id"] = ID_i
link = { node_["label"] = cID
"id": C_liens, # node_["shape"] = "square"
"s":phylojson["nodes"][cID]["id"], # node_["type"] = "Cluster"
"t":ngram, # "x":float(coord[0]) ,
"w": 1 # "y":float(coord[1]) }
} Nodes.append( node_ )
C_liens += 1
graphArray["links"].append( link ) if ID_i in nA2B:
# # print("") for ngram in nA2B[ID_i]:
# print(" - - - - - -") # print( "\t",ngram )
# print("")
link = {
# for i in graphArray["links"]: "id": C_liens,
# print("_ ",i) "s":ID_i,
# print("_ ","") "t":ngram,
"w": 1
}
Links.append( link )
C_liens += 1
except:
xxx = 10
# a year-node
# # return { "nodes": [] , "links": [] }
# # # print("")
# # print(" - - - - - -")
# # print("")
# # for i in graphArray["links"]:
# # print("_ ",i)
# # print("_ ","")
# for i in graphArray["nodes"]:
# print( i)
# print( "")
# print( " - - - - - - - - -")
# for i in graphArray["links"]:
# print( i)
# print( "")
# # for i in graphArray["nodes"]:
# # print( i)
# # print( "")
# # print( " - - - - - - - - -")
# # for i in graphArray["links"]:
# # print( i)
# # print( "")
# print( "|V_phy|:", len(phylojson["nodes"])) # Nodes_DD = {}
# print( "|E_phy|:", len(phylojson["links"])) # for i in Nodes:
# print( "|V|:", len(graphArray["nodes"])) # print("_ ",i["id"])
# print( "|E|:", len(graphArray["links"]) ) # Nodes_DD[ i["id"] ] = i
# # print("_ ","")
# print("_ "," - - - - - - - - -")
# for i in Links:
# print("_ ",i["s"] ,"->", i["t"] )
# print( Nodes_DD[ i["s"] ] )
# print( Nodes_DD[ i["t"] ] )
# print("")
# print("_ "," - - - - - - - - -")
# # print( "|V_phy|:", len(phylojson["nodes"]))
# # print( "|E_phy|:", len(phylojson["links"]))
# # print( "|V|:", len(graphArray["nodes"]))
# # print( "|E|:", len(graphArray["links"]) )
graphArray = {
"nodes": Nodes,
"links": Links
}
return graphArray return graphArray
......
...@@ -225,6 +225,12 @@ class nPartiteGraph: ...@@ -225,6 +225,12 @@ class nPartiteGraph:
GraphB = Graph.G GraphB = Graph.G
GraphB.remove_nodes_from(nx.isolates(GraphB)) GraphB.remove_nodes_from(nx.isolates(GraphB))
GraphB = self.normalize_edges( GraphB ) GraphB = self.normalize_edges( GraphB )
graphArray = {
"nodes":[],
"links":[],
"G": GraphB
}
return graphArray
# print (len(GraphB)) # print (len(GraphB))
Nodes = [] Nodes = []
...@@ -249,10 +255,6 @@ class nPartiteGraph: ...@@ -249,10 +255,6 @@ class nPartiteGraph:
# Links.append(link) # Links.append(link)
# c += 1 # c += 1
graphArray = {
"nodes":Nodes,
"links":Links,
}
# pprint.pprint(graphArray["clusters"]) # pprint.pprint(graphArray["clusters"])
......
...@@ -352,7 +352,7 @@ class PhyloMaker: ...@@ -352,7 +352,7 @@ class PhyloMaker:
t = e[1] t = e[1]
# if "fake" not in AG[s][t]: # if "fake" not in AG[s][t]:
# print(e) # print(e)
infodict = {"s":Phy_D[s] , "t":Phy_D[t] , "w":AG[s][t]["weight"] , "type":"line" } infodict = {"s":s , "t":t , "w":AG[s][t]["weight"] , "type":"line" }
EdgesDict.append(infodict) EdgesDict.append(infodict)
Graph = { Graph = {
......
...@@ -30,6 +30,17 @@ runner = Runner(app) #*# ...@@ -30,6 +30,17 @@ runner = Runner(app) #*#
from bigindex import LoadShit from bigindex import LoadShit
import urllib import urllib
import findspark
findspark.init()
from pyspark import SparkContext
from pyspark import SparkConf
cfg = SparkConf().set('spark.driver.memory', "40g").set('spark.driver.cores', 20 ).setAppName("simple_app")
# .setMaster(cluster_url)
ncores = 20
sc__ = SparkContext(conf=cfg)
I = {} I = {}
class BabelForm(Form): class BabelForm(Form):
...@@ -147,13 +158,43 @@ def test_post(): ...@@ -147,13 +158,43 @@ def test_post():
# print ("intersecting:", float("{0:.3f}".format((t_f - t_i))) ,"[s]") ## # print ("intersecting:", float("{0:.3f}".format((t_f - t_i))) ,"[s]") ##
# print("") ## # print("") ##
sID = p_["scontext"] sID = query #p_["scontext"]
if sID not in I: if sID not in I:
# if len( I.keys() )==0:
# memm=p_["mram"]+"g" ,
# ncores=p_["ncores"]
# theconf = SparkConf().set('spark.driver.memory', memm).set('spark.driver.cores', ncores)
# # .setAppName("broadcastfail")
# # .setMaster(cluster_url)
# sc__ = SparkContext(conf=theconf)
# # SparkContext.setSystemProperty('spark.executor.memory', memm)
# # SparkContext.setSystemProperty('spark.driver.memory', memm)
# # SparkContext.setSystemProperty('spark.executor.cores', ncores)
# # SparkContext.setSystemProperty('spark.driver.cores', ncores)
# # sc__ = SparkContext("local["+ncores+"]","simple app")
print("")
print("")
pprint.pprint( sc__._conf.getAll() )
print("")
print("")
print( " - - -- - -" )
print( " - - -- - -" )
pprint.pprint( p_ )
print( " - - -- - -" )
print( " - - -- - -" )
periods_ = range( int(p_["from_"]) , int(p_["to_"])+1 ) periods_ = range( int(p_["from_"]) , int(p_["to_"])+1 )
# periods_ = range( 2008 , 2011 ) # periods_ = range( 2008 , 2011 )
minjaccard = float(p_["minsetdistance"]) minjaccard = float(p_["minsetdistance"])
I[ sID ] = Phylo( t=periods_ , minJ=float(p_["minsetdistance"]) , memm=p_["mram"]+"g" , ncores=p_["ncores"]) I[ sID ] = Phylo( t=periods_ , minJ=float(p_["minsetdistance"]) , spark_context=sc__ , ncores=ncores )
for k in p_: for k in p_:
...@@ -170,6 +211,7 @@ def test_post(): ...@@ -170,6 +211,7 @@ def test_post():
print( "" ) print( "" )
print( "" ) print( "" )
# executes a fp-growth per year # executes a fp-growth per year
stats , years , pairs , pairsD = I[ sID ].FPG_chain( t=periods_ , WL=records["IdList"] ) stats , years , pairs , pairsD = I[ sID ].FPG_chain( t=periods_ , WL=records["IdList"] )
I[ sID ].years , I[ sID ].pairs , I[ sID ].pairsD = years , pairs , pairsD I[ sID ].years , I[ sID ].pairs , I[ sID ].pairsD = years , pairs , pairsD
......
...@@ -322,6 +322,7 @@ $("#pubmed_scan").bind('click', function() { ...@@ -322,6 +322,7 @@ $("#pubmed_scan").bind('click', function() {
function get_ngrams( query ) { function get_ngrams( query ) {
console.log( "get_ngrams!!" )
console.log( query ) console.log( query )
var DD = { "elems": query} var DD = { "elems": query}
$.ajax({ $.ajax({
...@@ -329,21 +330,41 @@ function get_ngrams( query ) { ...@@ -329,21 +330,41 @@ function get_ngrams( query ) {
type: 'POST', type: 'POST',
data: DD , data: DD ,
success : function(data) { success : function(data) {
console.log( "get_ngrams!!" )
console.log( "data:" )
console.log( data )
for(var i in data){ for(var i in data){
K_i2s[i] = data[i] K_i2s[ i ] = data[i]
} }
console.log( "K_i2s:" )
console.log( K_i2s )
console.log("iter mesh_terms")
for(var i in dicts.nodes) { for(var i in dicts.nodes) {
if( dicts.nodes[i].type=="mesh_term" ) { if( dicts.nodes[i].type=="mesh_term" ) {
console.log ( dicts.nodes[i] ) // console.log ( dicts.nodes[i] )
// console.log ( K_i2s[dicts.nodes[i].label] )
// console.log ( K_i2s[Number(dicts.nodes[i].label)] )
// console.log("")
console.log( dicts.nodes[i].label )
console.log( K_i2s[dicts.nodes[i].label] )
var ID = dicts.nodes[i].id var ID = dicts.nodes[i].id
dicts.nodes[i].label = K_i2s[ID] var newlabel = K_i2s[dicts.nodes[i].label]
Nodes[ ID ].label = K_i2s[ID] if( typeof( newlabel )!="undefined" ) {
dicts.nodes[i].label = newlabel
Nodes[ ID ].label = newlabel
console.log( dicts.nodes[i] )
console.log( Nodes[ ID ] )
console.log("")
}
} }
} }
partialGraph.refresh() partialGraph.refresh()
partialGraph.draw() partialGraph.draw()
}, },
error: function(jqxhr,textStatus,errorThrown) { error: function(jqxhr,textStatus,errorThrown) {
console.log(jqxhr); console.log(jqxhr);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment