Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
Phylum
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Castillo
Phylum
Commits
d414ea78
Commit
d414ea78
authored
Nov 15, 2016
by
Castillo
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
stable version
parent
bed4825d
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
258 additions
and
89 deletions
+258
-89
PhyloSpark.py
PhyloSpark.py
+182
-78
n_partite_graph.py
n_partite_graph.py
+6
-4
output_2json.py
output_2json.py
+1
-1
phylum_srv.py
phylum_srv.py
+44
-2
phylomain.js
static/phylomain.js
+25
-4
No files found.
PhyloSpark.py
View file @
d414ea78
...
@@ -3,8 +3,6 @@
...
@@ -3,8 +3,6 @@
import
findspark
import
findspark
findspark
.
init
()
findspark
.
init
()
from
pyspark
import
SparkContext
# from pyspark import SparkConf
from
pyspark.mllib.fpm
import
FPGrowth
from
pyspark.mllib.fpm
import
FPGrowth
from
InterUnion
import
Utils
from
InterUnion
import
Utils
import
simplejson
as
json
import
simplejson
as
json
...
@@ -188,17 +186,8 @@ class Period:
...
@@ -188,17 +186,8 @@ class Period:
class
Phylo
:
class
Phylo
:
def
__init__
(
self
,
t
=
[]
,
minK
=
4
,
minJ
=
0.0
,
memm
=
"4g"
,
ncores
=
"12"
):
def
__init__
(
self
,
t
=
[]
,
minK
=
4
,
minJ
=
0.0
,
spark_context
=
False
,
ncores
=
10
):
SparkContext
.
setSystemProperty
(
'spark.executor.memory'
,
memm
)
self
.
sc
=
spark_context
SparkContext
.
setSystemProperty
(
'spark.driver.memory'
,
memm
)
SparkContext
.
setSystemProperty
(
'spark.executor.cores'
,
ncores
)
SparkContext
.
setSystemProperty
(
'spark.driver.cores'
,
ncores
)
self
.
sc
=
SparkContext
(
"local["
+
ncores
+
"]"
,
"simple app"
)
print
(
""
)
print
(
""
)
pprint
.
pprint
(
self
.
sc
.
_conf
.
getAll
()
)
print
(
""
)
print
(
""
)
self
.
utls
=
Utils
()
self
.
utls
=
Utils
()
# self.years = t #list( range( t[0], t[1]+1 ) ) # combinations(self.years, 2)
# self.years = t #list( range( t[0], t[1]+1 ) ) # combinations(self.years, 2)
self
.
p
=
{
self
.
p
=
{
...
@@ -226,6 +215,7 @@ class Phylo:
...
@@ -226,6 +215,7 @@ class Phylo:
self
.
minjacc
=
minJ
self
.
minjacc
=
minJ
# self.KxC = {}
# self.KxC = {}
def
get_atts
(
self
,
scn
):
def
get_atts
(
self
,
scn
):
d
=
{
d
=
{
"sc"
:
scn
,
"sc"
:
scn
,
...
@@ -343,99 +333,213 @@ class Phylo:
...
@@ -343,99 +333,213 @@ class Phylo:
for
idx
in
self
.
phylomm
:
for
idx
in
self
.
phylomm
:
if
self
.
phylomm
[
idx
][
"count"
]
>
0
:
if
self
.
phylomm
[
idx
][
"count"
]
>
0
:
found_distances
+=
self
.
phylomm
[
idx
][
"rdd_"
]
.
filter
(
lambda
x
:
x
[
0
]
>=
jacc_min
)
.
collect
()
found_distances
+=
self
.
phylomm
[
idx
][
"rdd_"
]
.
filter
(
lambda
x
:
x
[
0
]
>=
jacc_min
)
.
collect
()
for
i
in
found_distances
:
#
for i in found_distances:
print
(
i
)
#
print(i)
print
(
"
\t
"
,
jacc_min
,
"-> |JACCARD|:"
,
len
(
found_distances
)
)
print
(
"
\t
"
,
jacc_min
,
"-> |JACCARD|:"
,
len
(
found_distances
)
)
timerange
=
[
1982
,
2014
]
timerange
=
[
1982
,
2014
]
phylojson
=
lll
.
export_phylo
(
liens
=
found_distances
,
T
=
timerange
,
jacc_min
=
jacc_min
)
phylojson
=
lll
.
export_phylo
(
liens
=
found_distances
,
T
=
timerange
,
jacc_min
=
jacc_min
)
# for i in phylojson["nodes"]:
# print( i )
# print(phylojson["nodes"][i])
# print("")
# print(" - - ")
# for i in phylojson["links"]:
# print( i )
# print("")
# print(" - - ")
# print(" - - ")
# print(" - - ")
# print(" - - ")
# print(" - - ")
# print(" - - ")
# print(" - - ")
# print(" - - ")
nodes_md
=
self
.
get_opossites
(
found_distances
)
nodes_md
=
self
.
get_opossites
(
found_distances
)
nB2A
=
{}
nB2A
=
{}
nA2B
=
{}
nA2B
=
{}
for
ID
in
nodes_md
:
NodesD_i2s
=
{}
NodesD_s2i
=
{}
NodesC
=
0
for
IDA_o
in
nodes_md
:
IDA_s
=
"A_"
+
str
(
IDA_o
)
if
IDA_s
not
in
NodesD_s2i
:
NodesC
+=
1
NodesD_i2s
[
NodesC
]
=
{
"ID_o"
:
IDA_o
,
"ID_s"
:
IDA_s
,
"ID_i"
:
NodesC
}
NodesD_s2i
[
IDA_s
]
=
NodesC
items_
=
{}
items_
=
{}
for
ii
in
nodes_md
[
ID
][
"items"
]:
for
ii
in
nodes_md
[
IDA_o
][
"items"
]:
items_
[
ii
]
=
True
IDB_s
=
"B_"
+
str
(
ii
)
nA2B
[
ID
]
=
items_
if
IDB_s
not
in
NodesD_s2i
:
NodesC
+=
1
NodesD_i2s
[
NodesC
]
=
{
"ID_o"
:
ii
,
"ID_s"
:
IDB_s
,
"ID_i"
:
NodesC
}
NodesD_s2i
[
IDB_s
]
=
NodesC
IDB_i
=
NodesD_s2i
[
IDB_s
]
items_
[
IDB_i
]
=
True
nA2B
[
NodesD_s2i
[
IDA_s
]
]
=
items_
for
i
in
items_
:
for
i
in
items_
:
if
i
not
in
nB2A
:
if
i
not
in
nB2A
:
nB2A
[
i
]
=
{}
nB2A
[
i
]
=
{}
nB2A
[
i
][
ID
]
=
True
nB2A
[
i
][
NodesD_s2i
[
IDA_s
]
]
=
True
from
n_partite_graph
import
nPartiteGraph
from
n_partite_graph
import
nPartiteGraph
bg
=
nPartiteGraph
()
bg
=
nPartiteGraph
()
ress
=
bg
.
BiGraph_2
(
nA2B
,
nB2A
)
graph_b
=
bg
.
BiGraph_2
(
nA2B
,
nB2A
)
graphArray
=
ress
[
0
]
# [graphArray , nodesA_toB.keys() , nodesB_toA.keys() , len(Links)]
GraphB
=
graph_b
[
"G"
]
if
GraphB
.
number_of_edges
()
==
0
:
return
{
"nodes"
:
[]
,
"links"
:
[]
}
Nodes
=
[]
Links
=
[]
for
n
in
GraphB
.
nodes_iter
():
# label = NodesD_i2s[ n ]["ID_s"]
node
=
{
"attributes"
:{},
"r_id"
:
NodesD_i2s
[
n
][
"ID_s"
],
"id"
:
n
,
"label"
:
str
(
NodesD_i2s
[
n
][
"ID_o"
]
),
"type"
:
"mesh_term"
,
"size"
:
GraphB
.
degree
(
n
)
}
Nodes
.
append
(
node
)
# print( node )
# print("")
# print("")
# print(" - - - - ")
# print(" - - - - ")
# print(" - - - - ")
# print("")
# print("")
C_liens
=
len
(
Links
)
+
1
for
e
in
GraphB
.
edges_iter
():
s
=
e
[
0
]
t
=
e
[
1
]
link
=
{
"id"
:
C_liens
,
"s"
:
s
,
"t"
:
t
,
"w"
:
GraphB
[
s
][
t
][
"weight"
]
}
Links
.
append
(
link
)
# print( "\t" , s ,"->", t )
# print( NodesD_i2s[ s ] )
# print( NodesD_i2s[ t ] )
# print("")
C_liens
+=
1
# print("")
# print("now links:")
# print("")
C_liens
=
len
(
Links
)
+
1
for
i
in
phylojson
[
"links"
]:
s_
=
i
[
"s"
]
t_
=
i
[
"t"
]
# print( "\tphylojson" , s_ ,"->", t_ )
# print( phylojson["nodes"][s_] )
# print( phylojson["nodes"][t_] )
s
=
"A_"
+
str
(
s_
)
t
=
"A_"
+
str
(
t_
)
# print( NodesD_s2i[ s ] ,"->", NodesD_s2i[ t ] )
ID_s
=
NodesD_s2i
[
s
]
ID_t
=
NodesD_s2i
[
t
]
link
=
{
"id"
:
C_liens
,
"s"
:
ID_s
,
"t"
:
ID_t
,
"type"
:
"line"
,
"w"
:
i
[
"w"
]
}
Links
.
append
(
link
)
C_liens
+=
1
# for i in graphArray["nodes"]:
# print("_ ",i)
# print("_ ","")
# print("_ "," - - - - - - - - -")
graphArray
[
"links"
]
+=
phylojson
[
"links"
]
# Links += phylojson["links"]
C_liens
=
len
(
graphArray
[
"links"
]
)
+
1
C_liens
=
len
(
Links
)
+
1
# print("")
# print("")
# print(" - - - - - -")
# print(" - - - - - -")
for
cID
in
phylojson
[
"nodes"
]:
for
cID
in
phylojson
[
"nodes"
]:
# print( cID)
ID_s
=
"A_"
+
str
(
cID
)
# if i["label"] in phylojson["nodes"]:
try
:
graphArray
[
"nodes"
]
.
append
(
phylojson
[
"nodes"
][
cID
]
)
ID_i
=
NodesD_s2i
[
ID_s
]
if
cID
in
nA2B
:
# print( cID ,":",ID_i )
for
ngram
in
nA2B
[
cID
]:
# print( "\t",ngram )
node_
=
phylojson
[
"nodes"
][
cID
]
node_
[
"id"
]
=
ID_i
link
=
{
node_
[
"label"
]
=
cID
"id"
:
C_liens
,
# node_["shape"] = "square"
"s"
:
phylojson
[
"nodes"
][
cID
][
"id"
],
# node_["type"] = "Cluster"
"t"
:
ngram
,
# "x":float(coord[0]) ,
"w"
:
1
# "y":float(coord[1]) }
}
Nodes
.
append
(
node_
)
C_liens
+=
1
graphArray
[
"links"
]
.
append
(
link
)
if
ID_i
in
nA2B
:
# # print("")
for
ngram
in
nA2B
[
ID_i
]:
# print(" - - - - - -")
# print( "\t",ngram )
# print("")
link
=
{
# for i in graphArray["links"]:
"id"
:
C_liens
,
# print("_ ",i)
"s"
:
ID_i
,
# print("_ ","")
"t"
:
ngram
,
"w"
:
1
}
Links
.
append
(
link
)
C_liens
+=
1
except
:
xxx
=
10
# a year-node
# # return { "nodes": [] , "links": [] }
# # # print("")
# # print(" - - - - - -")
# # print("")
# # for i in graphArray["links"]:
# # print("_ ",i)
# # print("_ ","")
# for i in graphArray["nodes"]:
# print( i)
# print( "")
# print( " - - - - - - - - -")
# for i in graphArray["links"]:
# print( i)
# print( "")
# # for i in graphArray["nodes"]:
# # print( i)
# # print( "")
# # print( " - - - - - - - - -")
# # for i in graphArray["links"]:
# # print( i)
# # print( "")
# print( "|V_phy|:", len(phylojson["nodes"]))
# Nodes_DD = {}
# print( "|E_phy|:", len(phylojson["links"]))
# for i in Nodes:
# print( "|V|:", len(graphArray["nodes"]))
# print("_ ",i["id"])
# print( "|E|:", len(graphArray["links"]) )
# Nodes_DD[ i["id"] ] = i
# # print("_ ","")
# print("_ "," - - - - - - - - -")
# for i in Links:
# print("_ ",i["s"] ,"->", i["t"] )
# print( Nodes_DD[ i["s"] ] )
# print( Nodes_DD[ i["t"] ] )
# print("")
# print("_ "," - - - - - - - - -")
# # print( "|V_phy|:", len(phylojson["nodes"]))
# # print( "|E_phy|:", len(phylojson["links"]))
# # print( "|V|:", len(graphArray["nodes"]))
# # print( "|E|:", len(graphArray["links"]) )
graphArray
=
{
"nodes"
:
Nodes
,
"links"
:
Links
}
return
graphArray
return
graphArray
...
...
n_partite_graph.py
View file @
d414ea78
...
@@ -225,6 +225,12 @@ class nPartiteGraph:
...
@@ -225,6 +225,12 @@ class nPartiteGraph:
GraphB
=
Graph
.
G
GraphB
=
Graph
.
G
GraphB
.
remove_nodes_from
(
nx
.
isolates
(
GraphB
))
GraphB
.
remove_nodes_from
(
nx
.
isolates
(
GraphB
))
GraphB
=
self
.
normalize_edges
(
GraphB
)
GraphB
=
self
.
normalize_edges
(
GraphB
)
graphArray
=
{
"nodes"
:[],
"links"
:[],
"G"
:
GraphB
}
return
graphArray
# print (len(GraphB))
# print (len(GraphB))
Nodes
=
[]
Nodes
=
[]
...
@@ -249,10 +255,6 @@ class nPartiteGraph:
...
@@ -249,10 +255,6 @@ class nPartiteGraph:
# Links.append(link)
# Links.append(link)
# c += 1
# c += 1
graphArray
=
{
"nodes"
:
Nodes
,
"links"
:
Links
,
}
# pprint.pprint(graphArray["clusters"])
# pprint.pprint(graphArray["clusters"])
...
...
output_2json.py
View file @
d414ea78
...
@@ -352,7 +352,7 @@ class PhyloMaker:
...
@@ -352,7 +352,7 @@ class PhyloMaker:
t
=
e
[
1
]
t
=
e
[
1
]
# if "fake" not in AG[s][t]:
# if "fake" not in AG[s][t]:
# print(e)
# print(e)
infodict
=
{
"s"
:
Phy_D
[
s
]
,
"t"
:
Phy_D
[
t
]
,
"w"
:
AG
[
s
][
t
][
"weight"
]
,
"type"
:
"line"
}
infodict
=
{
"s"
:
s
,
"t"
:
t
,
"w"
:
AG
[
s
][
t
][
"weight"
]
,
"type"
:
"line"
}
EdgesDict
.
append
(
infodict
)
EdgesDict
.
append
(
infodict
)
Graph
=
{
Graph
=
{
...
...
phylum_srv.py
View file @
d414ea78
...
@@ -30,6 +30,17 @@ runner = Runner(app) #*#
...
@@ -30,6 +30,17 @@ runner = Runner(app) #*#
from
bigindex
import
LoadShit
from
bigindex
import
LoadShit
import
urllib
import
urllib
import
findspark
findspark
.
init
()
from
pyspark
import
SparkContext
from
pyspark
import
SparkConf
cfg
=
SparkConf
()
.
set
(
'spark.driver.memory'
,
"40g"
)
.
set
(
'spark.driver.cores'
,
20
)
.
setAppName
(
"simple_app"
)
# .setMaster(cluster_url)
ncores
=
20
sc__
=
SparkContext
(
conf
=
cfg
)
I
=
{}
I
=
{}
class
BabelForm
(
Form
):
class
BabelForm
(
Form
):
...
@@ -147,13 +158,43 @@ def test_post():
...
@@ -147,13 +158,43 @@ def test_post():
# print ("intersecting:", float("{0:.3f}".format((t_f - t_i))) ,"[s]") ##
# print ("intersecting:", float("{0:.3f}".format((t_f - t_i))) ,"[s]") ##
# print("") ##
# print("") ##
sID
=
p_
[
"scontext"
]
sID
=
query
#p_["scontext"]
if
sID
not
in
I
:
if
sID
not
in
I
:
# if len( I.keys() )==0:
# memm=p_["mram"]+"g" ,
# ncores=p_["ncores"]
# theconf = SparkConf().set('spark.driver.memory', memm).set('spark.driver.cores', ncores)
# # .setAppName("broadcastfail")
# # .setMaster(cluster_url)
# sc__ = SparkContext(conf=theconf)
# # SparkContext.setSystemProperty('spark.executor.memory', memm)
# # SparkContext.setSystemProperty('spark.driver.memory', memm)
# # SparkContext.setSystemProperty('spark.executor.cores', ncores)
# # SparkContext.setSystemProperty('spark.driver.cores', ncores)
# # sc__ = SparkContext("local["+ncores+"]","simple app")
print
(
""
)
print
(
""
)
pprint
.
pprint
(
sc__
.
_conf
.
getAll
()
)
print
(
""
)
print
(
""
)
print
(
" - - -- - -"
)
print
(
" - - -- - -"
)
pprint
.
pprint
(
p_
)
print
(
" - - -- - -"
)
print
(
" - - -- - -"
)
periods_
=
range
(
int
(
p_
[
"from_"
])
,
int
(
p_
[
"to_"
])
+
1
)
periods_
=
range
(
int
(
p_
[
"from_"
])
,
int
(
p_
[
"to_"
])
+
1
)
# periods_ = range( 2008 , 2011 )
# periods_ = range( 2008 , 2011 )
minjaccard
=
float
(
p_
[
"minsetdistance"
])
minjaccard
=
float
(
p_
[
"minsetdistance"
])
I
[
sID
]
=
Phylo
(
t
=
periods_
,
minJ
=
float
(
p_
[
"minsetdistance"
])
,
memm
=
p_
[
"mram"
]
+
"g"
,
ncores
=
p_
[
"ncores"
]
)
I
[
sID
]
=
Phylo
(
t
=
periods_
,
minJ
=
float
(
p_
[
"minsetdistance"
])
,
spark_context
=
sc__
,
ncores
=
ncores
)
for
k
in
p_
:
for
k
in
p_
:
...
@@ -170,6 +211,7 @@ def test_post():
...
@@ -170,6 +211,7 @@ def test_post():
print
(
""
)
print
(
""
)
print
(
""
)
print
(
""
)
# executes a fp-growth per year
# executes a fp-growth per year
stats
,
years
,
pairs
,
pairsD
=
I
[
sID
]
.
FPG_chain
(
t
=
periods_
,
WL
=
records
[
"IdList"
]
)
stats
,
years
,
pairs
,
pairsD
=
I
[
sID
]
.
FPG_chain
(
t
=
periods_
,
WL
=
records
[
"IdList"
]
)
I
[
sID
]
.
years
,
I
[
sID
]
.
pairs
,
I
[
sID
]
.
pairsD
=
years
,
pairs
,
pairsD
I
[
sID
]
.
years
,
I
[
sID
]
.
pairs
,
I
[
sID
]
.
pairsD
=
years
,
pairs
,
pairsD
...
...
static/phylomain.js
View file @
d414ea78
...
@@ -322,6 +322,7 @@ $("#pubmed_scan").bind('click', function() {
...
@@ -322,6 +322,7 @@ $("#pubmed_scan").bind('click', function() {
function
get_ngrams
(
query
)
{
function
get_ngrams
(
query
)
{
console
.
log
(
"get_ngrams!!"
)
console
.
log
(
query
)
console
.
log
(
query
)
var
DD
=
{
"elems"
:
query
}
var
DD
=
{
"elems"
:
query
}
$
.
ajax
({
$
.
ajax
({
...
@@ -329,21 +330,41 @@ function get_ngrams( query ) {
...
@@ -329,21 +330,41 @@ function get_ngrams( query ) {
type
:
'POST'
,
type
:
'POST'
,
data
:
DD
,
data
:
DD
,
success
:
function
(
data
)
{
success
:
function
(
data
)
{
console
.
log
(
"get_ngrams!!"
)
console
.
log
(
"data:"
)
console
.
log
(
data
)
for
(
var
i
in
data
){
for
(
var
i
in
data
){
K_i2s
[
i
]
=
data
[
i
]
K_i2s
[
i
]
=
data
[
i
]
}
}
console
.
log
(
"K_i2s:"
)
console
.
log
(
K_i2s
)
console
.
log
(
"iter mesh_terms"
)
for
(
var
i
in
dicts
.
nodes
)
{
for
(
var
i
in
dicts
.
nodes
)
{
if
(
dicts
.
nodes
[
i
].
type
==
"mesh_term"
)
{
if
(
dicts
.
nodes
[
i
].
type
==
"mesh_term"
)
{
console
.
log
(
dicts
.
nodes
[
i
]
)
// console.log ( dicts.nodes[i] )
// console.log ( K_i2s[dicts.nodes[i].label] )
// console.log ( K_i2s[Number(dicts.nodes[i].label)] )
// console.log("")
console
.
log
(
dicts
.
nodes
[
i
].
label
)
console
.
log
(
K_i2s
[
dicts
.
nodes
[
i
].
label
]
)
var
ID
=
dicts
.
nodes
[
i
].
id
var
ID
=
dicts
.
nodes
[
i
].
id
dicts
.
nodes
[
i
].
label
=
K_i2s
[
ID
]
var
newlabel
=
K_i2s
[
dicts
.
nodes
[
i
].
label
]
Nodes
[
ID
].
label
=
K_i2s
[
ID
]
if
(
typeof
(
newlabel
)
!=
"undefined"
)
{
dicts
.
nodes
[
i
].
label
=
newlabel
Nodes
[
ID
].
label
=
newlabel
console
.
log
(
dicts
.
nodes
[
i
]
)
console
.
log
(
Nodes
[
ID
]
)
console
.
log
(
""
)
}
}
}
}
}
partialGraph
.
refresh
()
partialGraph
.
refresh
()
partialGraph
.
draw
()
partialGraph
.
draw
()
},
},
error
:
function
(
jqxhr
,
textStatus
,
errorThrown
)
{
error
:
function
(
jqxhr
,
textStatus
,
errorThrown
)
{
console
.
log
(
jqxhr
);
console
.
log
(
jqxhr
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment