Commit 61dbb71b authored by Castillo's avatar Castillo

stable version

parent d414ea78
...@@ -10,6 +10,9 @@ import os ...@@ -10,6 +10,9 @@ import os
import datetime import datetime
import pprint import pprint
import itertools import itertools
import time
from itertools import combinations
import networkx as nx
def lineal_comparisons( years ): def lineal_comparisons( years ):
D = {} D = {}
...@@ -113,7 +116,11 @@ class Period: ...@@ -113,7 +116,11 @@ class Period:
# Psub = P.join( WL ).map(lambda x: (x[0],x[1][0])) # Psub = P.join( WL ).map(lambda x: (x[0],x[1][0]))
T = P.join( WL ).map(lambda x: x[1][0] ) T = P.join( WL ).map(lambda x: x[1][0] )
t_i = time.time() ##
self.P_c = T.count() self.P_c = T.count()
t_f = time.time() ##
T_t = "{0:.3f}".format((t_f - t_i)) +"[s]" ##
# # saving term frequencies # # saving term frequencies
# self.TF = T.flatMap(lambda xs: [x for x in xs]).map(lambda x: (x, 1)) # self.TF = T.flatMap(lambda xs: [x for x in xs]).map(lambda x: (x, 1))
...@@ -127,6 +134,8 @@ class Period: ...@@ -127,6 +134,8 @@ class Period:
# - - - - - - - - - - - - - - - - - - - - - # # - - - - - - - - - - - - - - - - - - - - - #
# self.FI = model.freqItemsets().filter(lambda x: len(x.items)>=minfsetsize and x.freq>=2) # self.FI = model.freqItemsets().filter(lambda x: len(x.items)>=minfsetsize and x.freq>=2)
# .sortBy(lambda x: x.freq , ascending=False).zipWithIndex().map( lambda x: ( x[1] , x[0][0] , x[0][1] ) ).persist() # .sortBy(lambda x: x.freq , ascending=False).zipWithIndex().map( lambda x: ( x[1] , x[0][0] , x[0][1] ) ).persist()
t_i = time.time() ##
self.FI = model.freqItemsets().filter(lambda x: len(x.items)>=minfsetsize and x.freq>=2) self.FI = model.freqItemsets().filter(lambda x: len(x.items)>=minfsetsize and x.freq>=2)
self.FI = self.FI.sortBy(lambda x: x.freq , ascending=False).zipWithIndex().map( lambda x : (x[1],x[0]) ).persist() self.FI = self.FI.sortBy(lambda x: x.freq , ascending=False).zipWithIndex().map( lambda x : (x[1],x[0]) ).persist()
#.filter(lambda x: x[1]<=100).map( lambda x: ( x[1] , x[0][0] , x[0][1] ) ).persist() #.filter(lambda x: x[1]<=100).map( lambda x: ( x[1] , x[0][0] , x[0][1] ) ).persist()
...@@ -140,6 +149,10 @@ class Period: ...@@ -140,6 +149,10 @@ class Period:
# # # # = = [ / Extracting Frequent Itemsets ] = = = # # # # # # # = = [ / Extracting Frequent Itemsets ] = = = # # #
self.FI_c = self.FI.count() self.FI_c = self.FI.count()
t_f = time.time() ##
FI_t = "{0:.3f}".format((t_f - t_i)) +"[s]" ##
# # print("") # # print("")
# # print("") # # print("")
# # print("----FI----",year) # # print("----FI----",year)
...@@ -174,7 +187,8 @@ class Period: ...@@ -174,7 +187,8 @@ class Period:
# print( i ) # print( i )
# print( T.take(3) ) # print( T.take(3) )
print( "\t\t\t|FIs|"," -> ",self.FI_c ) print( "\t\t\t|T|"," -> ",self.P_c , "\t",T_t )
print( "\t\t\t|FI|"," -> ",self.FI_c , "\t",FI_t )
print("") print("")
return self.P_c , self.FI_c return self.P_c , self.FI_c
...@@ -195,6 +209,7 @@ class Phylo: ...@@ -195,6 +209,7 @@ class Phylo:
"from_": { "type": int , "value": -1 }, "from_": { "type": int , "value": -1 },
"to_": { "type": int , "value": -1 }, "to_": { "type": int , "value": -1 },
"minfpgsupp": { "type": float , "value": 0.0001 }, "minfpgsupp": { "type": float , "value": 0.0001 },
"minfsetsupp": { "type": int , "value": 2 },
"minfsetsize": { "type": int , "value": 4 }, "minfsetsize": { "type": int , "value": 4 },
"minsetdistance": { "type": int , "value": 0 }, "minsetdistance": { "type": int , "value": 0 },
"mram": { "type": int , "value": 40 }, "mram": { "type": int , "value": 40 },
...@@ -235,7 +250,11 @@ class Phylo: ...@@ -235,7 +250,11 @@ class Phylo:
N = self.yearsD N = self.yearsD
K = self.minK K = self.minK
WL = self.sc.parallelize( WL ).map( lambda x: (int(x) , 1) ) if len(WL)>0:
WL = self.sc.parallelize( WL ).map( lambda x: (int(x) , 1) )
else:
WL = self.WL
Distribution = {} Distribution = {}
for y in t : for y in t :
period_ = Period( some_sc=self.sc , period=y , numpart=self.partitions ) period_ = Period( some_sc=self.sc , period=y , numpart=self.partitions )
...@@ -276,8 +295,10 @@ class Phylo: ...@@ -276,8 +295,10 @@ class Phylo:
} }
def get_opossites( self , found_distances ): def get_opossites( self , found_distances=[] , filter_s = {} ):
print( "AAAAAAH" )
print( filter_s )
data = {} data = {}
Nodes = {} Nodes = {}
...@@ -304,8 +325,14 @@ class Phylo: ...@@ -304,8 +325,14 @@ class Phylo:
for y in period_nodes: for y in period_nodes:
clusters = self.sc.parallelize( period_nodes[y] ) clusters = self.sc.parallelize( period_nodes[y] )
R = self.yearsD[y].FI.join( clusters ).map(lambda x : [ x[0] , list(x[1][0].items) , x[1][0].freq ] ).collect() R = self.yearsD[y].FI.join( clusters ).map(lambda x : [x[0] , list(x[1][0].items) , x[1][0].freq ] )
for i in R: if "minfsetsupp" in filter_s:
R = R.filter( lambda x : x[2]>=filter_s["minfsetsupp"] )
if "minfsetsize" in filter_s:
R = R.filter( lambda x : len(x[1])>=filter_s["minfsetsize"] )
RR = R.collect()
# pprint.pprint( RR )
for i in RR:
cID = str(y)+"c"+str(i[0]) cID = str(y)+"c"+str(i[0])
if cID not in data: if cID not in data:
data[ cID ] = { data[ cID ] = {
...@@ -318,7 +345,10 @@ class Phylo: ...@@ -318,7 +345,10 @@ class Phylo:
def filter_jaccard(self , jacc_min ): def filter_jaccard(self , filter_s = {} ):
jacc_min = filter_s["jacc_min"]
f__ = filter_s
# print("\tin filter_jaccard!!") # print("\tin filter_jaccard!!")
rname = datetime.datetime.now().isoformat()+"" rname = datetime.datetime.now().isoformat()+""
...@@ -338,17 +368,17 @@ class Phylo: ...@@ -338,17 +368,17 @@ class Phylo:
print( "\t",jacc_min,"-> |JACCARD|:",len(found_distances) ) print( "\t",jacc_min,"-> |JACCARD|:",len(found_distances) )
timerange = [ 1982 , 2014 ] timerange = [ 1982 , 2014 ]
phylojson = lll.export_phylo( liens=found_distances , T=timerange , jacc_min=jacc_min ) phylojson, Parents = lll.export_phylo( liens=found_distances , T=timerange , jacc_min=jacc_min )
nodes_md = self.get_opossites( found_distances ) nodes_md = self.get_opossites( found_distances , filter_s )
nB2A = {} nB2A = {}
nA2B = {} nA2B = {}
NodesD_i2s = {} NodesD_i2s = {}
NodesD_s2i = {} NodesD_s2i = {}
NodesC = 0 NodesC = 0
for IDA_o in nodes_md:
for IDA_o in sorted(nodes_md.keys()):
IDA_s = "A_"+str(IDA_o) IDA_s = "A_"+str(IDA_o)
if IDA_s not in NodesD_s2i: if IDA_s not in NodesD_s2i:
NodesC += 1 NodesC += 1
...@@ -361,6 +391,7 @@ class Phylo: ...@@ -361,6 +391,7 @@ class Phylo:
items_ = {} items_ = {}
# print( IDA_o )
for ii in nodes_md[IDA_o]["items"]: for ii in nodes_md[IDA_o]["items"]:
IDB_s = "B_"+str( ii ) IDB_s = "B_"+str( ii )
if IDB_s not in NodesD_s2i: if IDB_s not in NodesD_s2i:
...@@ -373,7 +404,8 @@ class Phylo: ...@@ -373,7 +404,8 @@ class Phylo:
NodesD_s2i[ IDB_s ] = NodesC NodesD_s2i[ IDB_s ] = NodesC
IDB_i = NodesD_s2i[ IDB_s ] IDB_i = NodesD_s2i[ IDB_s ]
items_[ IDB_i ] = True items_[ IDB_i ] = True
# print("\t",sorted(items_))
# print("")
nA2B[ NodesD_s2i[ IDA_s ] ] = items_ nA2B[ NodesD_s2i[ IDA_s ] ] = items_
for i in items_: for i in items_:
...@@ -381,6 +413,54 @@ class Phylo: ...@@ -381,6 +413,54 @@ class Phylo:
nB2A[i] = {} nB2A[i] = {}
nB2A[i][ NodesD_s2i[ IDA_s ] ] = True nB2A[i][ NodesD_s2i[ IDA_s ] ] = True
# ETAGES = {}
# print("")
# print("PARENTS!!:")
# for p in sorted( Parents.keys() ):
# TO_MERGE = nx.Graph()
# p_items = sorted(nA2B[ NodesD_s2i[ "A_"+str(p) ] ])
# print(p ,":", p_items)
# p_children = sorted(Parents[p])
# p_children_D = {}
# for j in p_children:
# child_items = nA2B[ NodesD_s2i[ "A_"+str(j) ] ]
# print("\t",j ,":", sorted(child_items) )
# if j in Parents:
# j_children = Parents[j]
# if j not in p_children_D:
# p_children_D[ j ] = set( j_children )
# if len(p_children_D)>0:
# for i in p_children_D:
# TO_MERGE.add_node( i )
# p_j_children_pairs = combinations(p_children_D.keys(), 2)
# for cc in p_j_children_pairs:
# CID1 = cc[0]
# CID2 = cc[1]
# if p_children_D[ CID1 ] == p_children_D[ CID2 ]:
# print( "\t\t\tsame content:",CID1,CID2 )
# TO_MERGE.add_edge( CID1 , CID2 )
# print("")
# h = nx.connected_components(TO_MERGE)
# for ss in h:
# if len(ss)>1:
# print("\t\t\t",ss)
# # print(ss)
# merge_this = {}
# for ss_i in ss:
# print("\t\t\t\t",ss_i)
# # merge_this.union( p_children_D[ ss_i ] )
# elems = p_children_D[ ss_i ]
# for ll in elems:
# merge_this[ ll ] = True
# merge_this = set(merge_this.keys())
# print("\t\t\t",merge_this )
# print("")
# print("")
from n_partite_graph import nPartiteGraph from n_partite_graph import nPartiteGraph
bg = nPartiteGraph() bg = nPartiteGraph()
graph_b = bg.BiGraph_2( nA2B , nB2A ) graph_b = bg.BiGraph_2( nA2B , nB2A )
...@@ -443,18 +523,20 @@ class Phylo: ...@@ -443,18 +523,20 @@ class Phylo:
s = "A_"+str(s_) s = "A_"+str(s_)
t = "A_"+str(t_) t = "A_"+str(t_)
# print( NodesD_s2i[ s ] ,"->", NodesD_s2i[ t ] ) # print( NodesD_s2i[ s ] ,"->", NodesD_s2i[ t ] )
ID_s = NodesD_s2i[ s ] if "hidden" not in i:
ID_t = NodesD_s2i[ t ] if s in NodesD_s2i and t in NodesD_s2i:
ID_s = NodesD_s2i[ s ]
link = { ID_t = NodesD_s2i[ t ]
"id": C_liens,
"s":ID_s, link = {
"t":ID_t, "id": C_liens,
"type":"line", "s":ID_s,
"w": i["w"] "t":ID_t,
} "type":"line",
Links.append( link ) "w": i["w"]
C_liens += 1 }
Links.append( link )
C_liens += 1
...@@ -470,68 +552,39 @@ class Phylo: ...@@ -470,68 +552,39 @@ class Phylo:
for cID in phylojson["nodes"]: for cID in phylojson["nodes"]:
ID_s = "A_"+str(cID) ID_s = "A_"+str(cID)
try: try:
ID_i = NodesD_s2i[ ID_s ] if ID_s in NodesD_s2i:
# print( cID ,":",ID_i ) ID_i = NodesD_s2i[ ID_s ]
# print( cID ,":",ID_i )
node_ = phylojson["nodes"][cID]
node_["id"] = ID_i node_ = phylojson["nodes"][cID]
node_["label"] = cID node_["id"] = ID_i
# node_["shape"] = "square" node_["label"] = cID
# node_["type"] = "Cluster" if cID in nodes_md :
# "x":float(coord[0]) , if "supp" in nodes_md[cID]:
# "y":float(coord[1]) } node_["supp"] = nodes_md[cID]["supp"]
Nodes.append( node_ )
# node_["shape"] = "square"
if ID_i in nA2B: # node_["type"] = "Cluster"
for ngram in nA2B[ID_i]: # "x":float(coord[0]) ,
# print( "\t",ngram ) # "y":float(coord[1]) }
Nodes.append( node_ )
link = {
"id": C_liens, if ID_i in nA2B:
"s":ID_i, for ngram in nA2B[ID_i]:
"t":ngram, # print( "\t",ngram )
"w": 1
} link = {
Links.append( link ) "id": C_liens,
C_liens += 1 "s":ID_i,
"t":ngram,
"w": 1
}
Links.append( link )
C_liens += 1
except: except:
xxx = 10 xxx = 10
# a year-node # a year-node
# # return { "nodes": [] , "links": [] }
# # # print("")
# # print(" - - - - - -")
# # print("")
# # for i in graphArray["links"]:
# # print("_ ",i)
# # print("_ ","")
# # for i in graphArray["nodes"]:
# # print( i)
# # print( "")
# # print( " - - - - - - - - -")
# # for i in graphArray["links"]:
# # print( i)
# # print( "")
# Nodes_DD = {}
# for i in Nodes:
# print("_ ",i["id"])
# Nodes_DD[ i["id"] ] = i
# # print("_ ","")
# print("_ "," - - - - - - - - -")
# for i in Links:
# print("_ ",i["s"] ,"->", i["t"] )
# print( Nodes_DD[ i["s"] ] )
# print( Nodes_DD[ i["t"] ] )
# print("")
# print("_ "," - - - - - - - - -")
# # print( "|V_phy|:", len(phylojson["nodes"])) # # print( "|V_phy|:", len(phylojson["nodes"]))
# # print( "|E_phy|:", len(phylojson["links"])) # # print( "|E_phy|:", len(phylojson["links"]))
# # print( "|V|:", len(graphArray["nodes"])) # # print( "|V|:", len(graphArray["nodes"]))
...@@ -601,17 +654,13 @@ class Phylo: ...@@ -601,17 +654,13 @@ class Phylo:
return { "diff_time": { "Distribution": Distribution , "years":nyears , "pairs":pairs , "pairsD":pairsD } } return { "diff_time": { "Distribution": Distribution , "years":nyears , "pairs":pairs , "pairsD":pairsD } }
# print("") # print("")
# print( "old jacc:", self.minjacc ) # print( "old jacc:", self.minjacc )
# print( "new jacc:", p_["minjaccard"] ) # print( "new jacc:", p_["minjaccard"] )
# phylojson = self.filter_jaccard ( p_["minjaccard"] ) # phylojson = self.filter_jaccard ( p_["minjaccard"] )
# print("") # print("")
return None return {}
......
...@@ -168,9 +168,9 @@ class PhyloMaker: ...@@ -168,9 +168,9 @@ class PhyloMaker:
for y in years: for y in years:
AG.add_node(str(y), label=y , fake=True ,shape="plaintext") AG.add_node(str(y), label=y , fake=True ,shape="plaintext")
for i in range(len(years)): for i in sorted(years):
try: try:
AG.add_edge(str(years[i]),str(years[i+1]),fake=True) AG.add_edge(str(i),str(i+1),fake=True,weight=1)
except: except:
pass pass
# - - - - - [ / Adding yearly-graph ] - - - - - # # - - - - - [ / Adding yearly-graph ] - - - - - #
...@@ -226,8 +226,18 @@ class PhyloMaker: ...@@ -226,8 +226,18 @@ class PhyloMaker:
# redundant_ = nx.DiGraph() # redundant_ = nx.DiGraph()
# for n in AG.nodes_iter(): Parents = { }
# node = AG.node[n] for n in AG.nodes_iter():
node = AG.node[n]
if "fake" not in node:
succesors = AG.neighbors( n )
if len( succesors )>0:
Parents[n] = sorted( succesors )
# print( n )
# for j in succesors:
# print( "\t",j )
# print("- - - - ")
# print("")
# if "fake" not in node: # if "fake" not in node:
# parents = AG.predecessors( n ) # parents = AG.predecessors( n )
# if len(parents)>=2: # if len(parents)>=2:
...@@ -350,9 +360,9 @@ class PhyloMaker: ...@@ -350,9 +360,9 @@ class PhyloMaker:
for e in B.edges_iter(): for e in B.edges_iter():
s = e[0] s = e[0]
t = e[1] t = e[1]
# if "fake" not in AG[s][t]:
# print(e)
infodict = {"s":s , "t":t , "w":AG[s][t]["weight"] , "type":"line" } infodict = {"s":s , "t":t , "w":AG[s][t]["weight"] , "type":"line" }
if "fake" in AG[s][t]:
infodict["hidden"] = True
EdgesDict.append(infodict) EdgesDict.append(infodict)
Graph = { Graph = {
...@@ -366,4 +376,4 @@ class PhyloMaker: ...@@ -366,4 +376,4 @@ class PhyloMaker:
end = time.time() end = time.time()
print(float("{0:.2f}".format(end - start)),"[s] : dot layout FIN") print(float("{0:.2f}".format(end - start)),"[s] : dot layout FIN")
return Graph return Graph, Parents
...@@ -111,7 +111,7 @@ def close_contexts(): ...@@ -111,7 +111,7 @@ def close_contexts():
def test_post(): def test_post():
pprint.pprint( request ) pprint.pprint( request )
query = "void" query = "void"
GG = False GG = { "nodes": [] , "links": [] }
stats = False stats = False
records = { "Count": 0 } records = { "Count": 0 }
if request.method == "POST": if request.method == "POST":
...@@ -218,17 +218,23 @@ def test_post(): ...@@ -218,17 +218,23 @@ def test_post():
# # pairs of years to be multiplied # # pairs of years to be multiplied
I[ sID ].temp_matching( thepairs = pairs ) I[ sID ].temp_matching( thepairs = pairs )
GG = I[ sID ].filter_jaccard ( jacc_min=minjaccard ) filters_ = {
"jacc_min": I[ sID ].p["minsetdistance"]["value"],
"minfsetsize": I[ sID ].p["minfsetsize"]["value"],
"minfsetsupp": I[ sID ].p["minfsetsupp"]["value"],
}
GG = I[ sID ].filter_jaccard ( filter_s=filters_ )
Ya = p_["from_"] if len( GG["links"] )>0:
Yb = p_["to_"] Ya = p_["from_"]
GG_v = str( len( GG["nodes"] ) ) Yb = p_["to_"]
GG_e = str( len( GG["links"] ) ) GG_v = str( len( GG["nodes"] ) )
query_file = query.replace(" ","_")+"__"+Ya+"-"+Yb+"__"+GG_v+"x"+GG_e GG_e = str( len( GG["links"] ) )
f = open( "static/Phylo/data/"+query_file+".json","w") query_file = query.replace(" ","_")+"__"+Ya+"-"+Yb+"__"+GG_v+"x"+GG_e
f.write( json.dumps( GG,indent=1 ) ) f = open( "static/Phylo/data/"+query_file+".json","w")
f.close() f.write( json.dumps( GG,indent=1 ) )
f.close()
else: else:
print("\nYour \"",sID,"\" instance has been MODIFIED.") print("\nYour \"",sID,"\" instance has been MODIFIED.")
...@@ -236,6 +242,7 @@ def test_post(): ...@@ -236,6 +242,7 @@ def test_post():
pprint.pprint( I[ sID ].p ) pprint.pprint( I[ sID ].p )
print( "" ) print( "" )
params_ = {} params_ = {}
# # Updating I[ sID ].p parameters with new ones # #
for k in p_: for k in p_:
if "scontext"!=k: if "scontext"!=k:
try: try:
...@@ -258,8 +265,25 @@ def test_post(): ...@@ -258,8 +265,25 @@ def test_post():
I[ sID ].pairsD = diff_ress["diff_time"]["pairsD"] I[ sID ].pairsD = diff_ress["diff_time"]["pairsD"]
I[ sID ].temp_matching( thepairs = diff_ress["diff_time"]["pairs"] ) I[ sID ].temp_matching( thepairs = diff_ress["diff_time"]["pairs"] )
GG = I[ sID ].filter_jaccard ( jacc_min=I[ sID ].p["minsetdistance"]["value"] ) filters_ = {
"jacc_min": I[ sID ].p["minsetdistance"]["value"],
"minfsetsize": I[ sID ].p["minfsetsize"]["value"],
"minfsetsupp": I[ sID ].p["minfsetsupp"]["value"],
}
GG = I[ sID ].filter_jaccard ( filter_s=filters_ )
else:
pairs = I[ sID ].pairs
I[ sID ].temp_matching( thepairs = pairs )
filters_ = {
"jacc_min": I[ sID ].p["minsetdistance"]["value"],
"minfsetsize": I[ sID ].p["minfsetsize"]["value"],
"minfsetsupp": I[ sID ].p["minfsetsupp"]["value"],
}
GG = I[ sID ].filter_jaccard ( filter_s=filters_ )
if len( GG["links"] )>0:
Ya = str(I[ sID ].p["from_"]["value"]) Ya = str(I[ sID ].p["from_"]["value"])
Yb = str(I[ sID ].p["to_"]["value"]) Yb = str(I[ sID ].p["to_"]["value"])
GG_v = str( len( GG["nodes"] ) ) GG_v = str( len( GG["nodes"] ) )
......
...@@ -126,13 +126,21 @@ function dict_diff(obj1, obj2) { ...@@ -126,13 +126,21 @@ function dict_diff(obj1, obj2) {
} }
var K_i2s = {} var K_i2s = {}
var K_oi2i = {}
// var K_s2i = {} // var K_s2i = {}
var loader_ = '<img width=20 src="/static/Phylo/libs/img2/loading-bar.gif"></img>' var loader_ = '<img width=20 src="/static/Phylo/libs/img2/loading-bar.gif"></img>'
var G = { var G = {
"params_t0" : {}, "params_t0" : {},
} }
var Clusters_2DEL = {}
var Terms_2DEL = {}
var POST_ = false var POST_ = false
// "scontext" // "scontext"
...@@ -154,6 +162,32 @@ function getParams(form , children_ ) { ...@@ -154,6 +162,32 @@ function getParams(form , children_ ) {
return p_; return p_;
} }
$("#remove_terms").click( function(){
})
$("#remove_clusters").click( function(){
console.log("removing clusteeeers")
if (! $.isEmptyObject( selections )) {
for(var cID in selections) {
if( Nodes[cID].type=="Cluster" ) {
partialGraph.dropNode(cID)
try {
delete Nodes[cID]
delete dicts.nodes[cID]
delete dicts.D2N[cID]
delete Relations["1|1"][cID]
} catch(err) {
var xxxxx = 111
}
}
}
partialGraph.refresh()
partialGraph.draw()
}
})
function send_params( D ) { function send_params( D ) {
var query = $("#pubmedquery").val().slice() var query = $("#pubmedquery").val().slice()
...@@ -209,7 +243,7 @@ function send_params( D ) { ...@@ -209,7 +243,7 @@ function send_params( D ) {
$("#pubmed_fetch").bind('click', function() { $("#pubmed_fetch").bind('click', function() {
console.log( "hola mundo" ) console.log( "pubmed_fetch" )
var URL = "<URL>" var URL = "<URL>"
...@@ -236,8 +270,20 @@ $("#pubmed_fetch").bind('click', function() { ...@@ -236,8 +270,20 @@ $("#pubmed_fetch").bind('click', function() {
var params_t1 = getParams("phyloform" , "input") var params_t1 = getParams("phyloform" , "input")
var params_diff = dict_diff( params_t1 , G["params_t0"] ) var params_diff = dict_diff( params_t1 , G["params_t0"] )
console.log("")
console.log("")
console.log("")
console.log("DIFF TIMES!!!")
console.log( params_t1 )
console.log( G["params_t0"] )
console.log(" - - - - - - - ")
console.log( params_diff )
console.log("")
console.log("")
console.log("")
// spark context has changed -> change everything // spark context has changed -> change everything
if("scontext" in params_diff) { if("query" in params_diff) {
return send_params( params_t1 ) return send_params( params_t1 )
} }
...@@ -331,32 +377,32 @@ function get_ngrams( query ) { ...@@ -331,32 +377,32 @@ function get_ngrams( query ) {
data: DD , data: DD ,
success : function(data) { success : function(data) {
console.log( "get_ngrams!!" ) console.log( "get_ngrams!!" )
console.log( "data:" ) // console.log( "data:" )
console.log( data ) // console.log( data )
for(var i in data){ for(var i in data){
K_i2s[ i ] = data[i] K_i2s[ i ] = data[i]
} }
console.log( "K_i2s:" ) // console.log( "K_i2s:" )
console.log( K_i2s ) // console.log( K_i2s )
console.log("iter mesh_terms") // console.log("iter mesh_terms")
for(var i in dicts.nodes) { for(var i in dicts.nodes) {
if( dicts.nodes[i].type=="mesh_term" ) { if( dicts.nodes[i].type=="mesh_term" ) {
// console.log ( dicts.nodes[i] ) // console.log ( dicts.nodes[i] )
// console.log ( K_i2s[dicts.nodes[i].label] ) // console.log ( K_i2s[dicts.nodes[i].label] )
// console.log ( K_i2s[Number(dicts.nodes[i].label)] ) // console.log ( K_i2s[Number(dicts.nodes[i].label)] )
// console.log("") // console.log("")
console.log( dicts.nodes[i].label ) // console.log( dicts.nodes[i].label )
console.log( K_i2s[dicts.nodes[i].label] ) // console.log( K_i2s[dicts.nodes[i].label] )
var ID = dicts.nodes[i].id var ID = dicts.nodes[i].id
var newlabel = K_i2s[dicts.nodes[i].label] var newlabel = K_i2s[dicts.nodes[i].label]
if( typeof( newlabel )!="undefined" ) { if( typeof( newlabel )!="undefined" ) {
K_oi2i [ newlabel ] = ID
dicts.nodes[i].label = newlabel dicts.nodes[i].label = newlabel
Nodes[ ID ].label = newlabel Nodes[ ID ].label = newlabel
console.log( dicts.nodes[i] )
console.log( Nodes[ ID ] )
console.log("")
} }
} }
} }
...@@ -364,6 +410,12 @@ function get_ngrams( query ) { ...@@ -364,6 +410,12 @@ function get_ngrams( query ) {
partialGraph.draw() partialGraph.draw()
labels = []
for(var kk in K_i2s ){
updateSearchLabels( kk , K_i2s[kk] , "mesh_term");
}
}, },
error: function(jqxhr,textStatus,errorThrown) { error: function(jqxhr,textStatus,errorThrown) {
......
...@@ -136,25 +136,60 @@ def test_workflow(): ...@@ -136,25 +136,60 @@ def test_workflow():
import time import time
print("hello") print("hello")
minsupp = 0.0001
numpart = 100
minfsetsize = 4
import findspark
findspark.init()
from pyspark.mllib.fpm import FPGrowth
from pyspark import SparkContext
from pyspark import SparkConf
cfg = SparkConf().set('spark.driver.memory', "40g").set('spark.driver.cores', 20 ).setAppName("simple_app")
ncores = 20
sc__ = SparkContext(conf=cfg)
from PhyloSpark import Phylo from PhyloSpark import Phylo
periods_ = [ 1983 , 1984 ]
the_ = Phylo( t=periods_ , memm="20g" , ncores="24" ) periods_ = range(2003,2005+1)
the_ = Phylo( t=periods_ , minJ=0.0 , spark_context=sc__ , ncores=ncores )
# WL = getWL( the_.sc , "/datasets/PubMed2014/chikungunya.txt" ) # WL = getWL( the_.sc , "/datasets/PubMed2014/chikungunya.txt" )
WL_path = "/datasets/PubMed2014/chikungunya.txt" WL_path = "/datasets/PubMed2014/chikungunya.txt"
WL = the_.sc.textFile( WL_path ).map( lambda line: (int(line.strip()) , 1) ) # WL_path = "/datasets/PubMed2014/gut_AND_brain.txt"
WL = sc__.textFile( WL_path ).map( lambda line: (int(line.strip()) , 1) )
# WL = getWL( the_.sc , "/datasets/PubMed2014/cell-aging.txt" ) # WL = getWL( the_.sc , "/datasets/PubMed2014/cell-aging.txt" )
# WL = getWL( the_.sc , "/datasets/PubMed2014/rheumatoid-arthritis.txt" ) # WL = getWL( the_.sc , "/datasets/PubMed2014/rheumatoid-arthritis.txt" )
for i in range(1983,2015): for i in range(2003,2005+1):
# start = time.time() start = time.time()
period = str(i) period = str(i)
print(period) # print(period)
Psub = interDataSet( the_.sc , period , WL ) T = interDataSet( sc__ , period , WL ).map(lambda x: x[1] )
# print("\t",len(ress.collect())) print("\t",period,"->",len(T.collect()))
# print( "\t", T.take(1))
model = FPGrowth.train(T, minSupport=minsupp, numPartitions=numpart)
FI_all_c = model.freqItemsets().count()
print("\t\t |FI|", FI_all_c)
t_i = time.time() ##
FI = model.freqItemsets().filter(lambda x: len(x.items)>=minfsetsize and x.freq>=2)
FI = FI.sortBy(lambda x: x.freq , ascending=False).zipWithIndex().map( lambda x : (x[1],x[0]) ).persist()
FI_c = FI.count()
t_f = time.time() ##
FI_t = "{0:.3f}".format((t_f - t_i)) +"[s]" ##
print("\t\t |FI_| ", FI_c , "\t",FI_t )
print("")
# end = time.time() # end = time.time()
# print("\t\t",end - start) # print("\t\t",end - start)
print("") print("")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment