Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
Phylum
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Castillo
Phylum
Commits
61dbb71b
Commit
61dbb71b
authored
Nov 18, 2016
by
Castillo
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
stable version
parent
d414ea78
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
294 additions
and
124 deletions
+294
-124
PhyloSpark.py
PhyloSpark.py
+134
-85
output_2json.py
output_2json.py
+17
-7
phylum_srv.py
phylum_srv.py
+36
-12
phylomain.js
static/phylomain.js
+64
-12
test.py
test.py
+43
-8
No files found.
PhyloSpark.py
View file @
61dbb71b
...
@@ -10,6 +10,9 @@ import os
...
@@ -10,6 +10,9 @@ import os
import
datetime
import
datetime
import
pprint
import
pprint
import
itertools
import
itertools
import
time
from
itertools
import
combinations
import
networkx
as
nx
def
lineal_comparisons
(
years
):
def
lineal_comparisons
(
years
):
D
=
{}
D
=
{}
...
@@ -113,7 +116,11 @@ class Period:
...
@@ -113,7 +116,11 @@ class Period:
# Psub = P.join( WL ).map(lambda x: (x[0],x[1][0]))
# Psub = P.join( WL ).map(lambda x: (x[0],x[1][0]))
T
=
P
.
join
(
WL
)
.
map
(
lambda
x
:
x
[
1
][
0
]
)
T
=
P
.
join
(
WL
)
.
map
(
lambda
x
:
x
[
1
][
0
]
)
t_i
=
time
.
time
()
##
self
.
P_c
=
T
.
count
()
self
.
P_c
=
T
.
count
()
t_f
=
time
.
time
()
##
T_t
=
"{0:.3f}"
.
format
((
t_f
-
t_i
))
+
"[s]"
##
# # saving term frequencies
# # saving term frequencies
# self.TF = T.flatMap(lambda xs: [x for x in xs]).map(lambda x: (x, 1))
# self.TF = T.flatMap(lambda xs: [x for x in xs]).map(lambda x: (x, 1))
...
@@ -127,6 +134,8 @@ class Period:
...
@@ -127,6 +134,8 @@ class Period:
# - - - - - - - - - - - - - - - - - - - - - #
# - - - - - - - - - - - - - - - - - - - - - #
# self.FI = model.freqItemsets().filter(lambda x: len(x.items)>=minfsetsize and x.freq>=2)
# self.FI = model.freqItemsets().filter(lambda x: len(x.items)>=minfsetsize and x.freq>=2)
# .sortBy(lambda x: x.freq , ascending=False).zipWithIndex().map( lambda x: ( x[1] , x[0][0] , x[0][1] ) ).persist()
# .sortBy(lambda x: x.freq , ascending=False).zipWithIndex().map( lambda x: ( x[1] , x[0][0] , x[0][1] ) ).persist()
t_i
=
time
.
time
()
##
self
.
FI
=
model
.
freqItemsets
()
.
filter
(
lambda
x
:
len
(
x
.
items
)
>=
minfsetsize
and
x
.
freq
>=
2
)
self
.
FI
=
model
.
freqItemsets
()
.
filter
(
lambda
x
:
len
(
x
.
items
)
>=
minfsetsize
and
x
.
freq
>=
2
)
self
.
FI
=
self
.
FI
.
sortBy
(
lambda
x
:
x
.
freq
,
ascending
=
False
)
.
zipWithIndex
()
.
map
(
lambda
x
:
(
x
[
1
],
x
[
0
])
)
.
persist
()
self
.
FI
=
self
.
FI
.
sortBy
(
lambda
x
:
x
.
freq
,
ascending
=
False
)
.
zipWithIndex
()
.
map
(
lambda
x
:
(
x
[
1
],
x
[
0
])
)
.
persist
()
#.filter(lambda x: x[1]<=100).map( lambda x: ( x[1] , x[0][0] , x[0][1] ) ).persist()
#.filter(lambda x: x[1]<=100).map( lambda x: ( x[1] , x[0][0] , x[0][1] ) ).persist()
...
@@ -140,6 +149,10 @@ class Period:
...
@@ -140,6 +149,10 @@ class Period:
# # # # = = [ / Extracting Frequent Itemsets ] = = = # # #
# # # # = = [ / Extracting Frequent Itemsets ] = = = # # #
self
.
FI_c
=
self
.
FI
.
count
()
self
.
FI_c
=
self
.
FI
.
count
()
t_f
=
time
.
time
()
##
FI_t
=
"{0:.3f}"
.
format
((
t_f
-
t_i
))
+
"[s]"
##
# # print("")
# # print("")
# # print("")
# # print("")
# # print("----FI----",year)
# # print("----FI----",year)
...
@@ -174,7 +187,8 @@ class Period:
...
@@ -174,7 +187,8 @@ class Period:
# print( i )
# print( i )
# print( T.take(3) )
# print( T.take(3) )
print
(
"
\t\t\t
|FIs|"
,
" -> "
,
self
.
FI_c
)
print
(
"
\t\t\t
|T|"
,
" -> "
,
self
.
P_c
,
"
\t
"
,
T_t
)
print
(
"
\t\t\t
|FI|"
,
" -> "
,
self
.
FI_c
,
"
\t
"
,
FI_t
)
print
(
""
)
print
(
""
)
return
self
.
P_c
,
self
.
FI_c
return
self
.
P_c
,
self
.
FI_c
...
@@ -195,6 +209,7 @@ class Phylo:
...
@@ -195,6 +209,7 @@ class Phylo:
"from_"
:
{
"type"
:
int
,
"value"
:
-
1
},
"from_"
:
{
"type"
:
int
,
"value"
:
-
1
},
"to_"
:
{
"type"
:
int
,
"value"
:
-
1
},
"to_"
:
{
"type"
:
int
,
"value"
:
-
1
},
"minfpgsupp"
:
{
"type"
:
float
,
"value"
:
0.0001
},
"minfpgsupp"
:
{
"type"
:
float
,
"value"
:
0.0001
},
"minfsetsupp"
:
{
"type"
:
int
,
"value"
:
2
},
"minfsetsize"
:
{
"type"
:
int
,
"value"
:
4
},
"minfsetsize"
:
{
"type"
:
int
,
"value"
:
4
},
"minsetdistance"
:
{
"type"
:
int
,
"value"
:
0
},
"minsetdistance"
:
{
"type"
:
int
,
"value"
:
0
},
"mram"
:
{
"type"
:
int
,
"value"
:
40
},
"mram"
:
{
"type"
:
int
,
"value"
:
40
},
...
@@ -235,7 +250,11 @@ class Phylo:
...
@@ -235,7 +250,11 @@ class Phylo:
N
=
self
.
yearsD
N
=
self
.
yearsD
K
=
self
.
minK
K
=
self
.
minK
WL
=
self
.
sc
.
parallelize
(
WL
)
.
map
(
lambda
x
:
(
int
(
x
)
,
1
)
)
if
len
(
WL
)
>
0
:
WL
=
self
.
sc
.
parallelize
(
WL
)
.
map
(
lambda
x
:
(
int
(
x
)
,
1
)
)
else
:
WL
=
self
.
WL
Distribution
=
{}
Distribution
=
{}
for
y
in
t
:
for
y
in
t
:
period_
=
Period
(
some_sc
=
self
.
sc
,
period
=
y
,
numpart
=
self
.
partitions
)
period_
=
Period
(
some_sc
=
self
.
sc
,
period
=
y
,
numpart
=
self
.
partitions
)
...
@@ -276,8 +295,10 @@ class Phylo:
...
@@ -276,8 +295,10 @@ class Phylo:
}
}
def
get_opossites
(
self
,
found_distances
):
def
get_opossites
(
self
,
found_distances
=
[]
,
filter_s
=
{}
):
print
(
"AAAAAAH"
)
print
(
filter_s
)
data
=
{}
data
=
{}
Nodes
=
{}
Nodes
=
{}
...
@@ -304,8 +325,14 @@ class Phylo:
...
@@ -304,8 +325,14 @@ class Phylo:
for
y
in
period_nodes
:
for
y
in
period_nodes
:
clusters
=
self
.
sc
.
parallelize
(
period_nodes
[
y
]
)
clusters
=
self
.
sc
.
parallelize
(
period_nodes
[
y
]
)
R
=
self
.
yearsD
[
y
]
.
FI
.
join
(
clusters
)
.
map
(
lambda
x
:
[
x
[
0
]
,
list
(
x
[
1
][
0
]
.
items
)
,
x
[
1
][
0
]
.
freq
]
)
.
collect
()
R
=
self
.
yearsD
[
y
]
.
FI
.
join
(
clusters
)
.
map
(
lambda
x
:
[
x
[
0
]
,
list
(
x
[
1
][
0
]
.
items
)
,
x
[
1
][
0
]
.
freq
]
)
for
i
in
R
:
if
"minfsetsupp"
in
filter_s
:
R
=
R
.
filter
(
lambda
x
:
x
[
2
]
>=
filter_s
[
"minfsetsupp"
]
)
if
"minfsetsize"
in
filter_s
:
R
=
R
.
filter
(
lambda
x
:
len
(
x
[
1
])
>=
filter_s
[
"minfsetsize"
]
)
RR
=
R
.
collect
()
# pprint.pprint( RR )
for
i
in
RR
:
cID
=
str
(
y
)
+
"c"
+
str
(
i
[
0
])
cID
=
str
(
y
)
+
"c"
+
str
(
i
[
0
])
if
cID
not
in
data
:
if
cID
not
in
data
:
data
[
cID
]
=
{
data
[
cID
]
=
{
...
@@ -318,7 +345,10 @@ class Phylo:
...
@@ -318,7 +345,10 @@ class Phylo:
def
filter_jaccard
(
self
,
jacc_min
):
def
filter_jaccard
(
self
,
filter_s
=
{}
):
jacc_min
=
filter_s
[
"jacc_min"
]
f__
=
filter_s
# print("\tin filter_jaccard!!")
# print("\tin filter_jaccard!!")
rname
=
datetime
.
datetime
.
now
()
.
isoformat
()
+
""
rname
=
datetime
.
datetime
.
now
()
.
isoformat
()
+
""
...
@@ -338,17 +368,17 @@ class Phylo:
...
@@ -338,17 +368,17 @@ class Phylo:
print
(
"
\t
"
,
jacc_min
,
"-> |JACCARD|:"
,
len
(
found_distances
)
)
print
(
"
\t
"
,
jacc_min
,
"-> |JACCARD|:"
,
len
(
found_distances
)
)
timerange
=
[
1982
,
2014
]
timerange
=
[
1982
,
2014
]
phylojson
=
lll
.
export_phylo
(
liens
=
found_distances
,
T
=
timerange
,
jacc_min
=
jacc_min
)
phylojson
,
Parents
=
lll
.
export_phylo
(
liens
=
found_distances
,
T
=
timerange
,
jacc_min
=
jacc_min
)
nodes_md
=
self
.
get_opossites
(
found_distances
)
nodes_md
=
self
.
get_opossites
(
found_distances
,
filter_s
)
nB2A
=
{}
nB2A
=
{}
nA2B
=
{}
nA2B
=
{}
NodesD_i2s
=
{}
NodesD_i2s
=
{}
NodesD_s2i
=
{}
NodesD_s2i
=
{}
NodesC
=
0
NodesC
=
0
for
IDA_o
in
nodes_md
:
for
IDA_o
in
sorted
(
nodes_md
.
keys
()):
IDA_s
=
"A_"
+
str
(
IDA_o
)
IDA_s
=
"A_"
+
str
(
IDA_o
)
if
IDA_s
not
in
NodesD_s2i
:
if
IDA_s
not
in
NodesD_s2i
:
NodesC
+=
1
NodesC
+=
1
...
@@ -361,6 +391,7 @@ class Phylo:
...
@@ -361,6 +391,7 @@ class Phylo:
items_
=
{}
items_
=
{}
# print( IDA_o )
for
ii
in
nodes_md
[
IDA_o
][
"items"
]:
for
ii
in
nodes_md
[
IDA_o
][
"items"
]:
IDB_s
=
"B_"
+
str
(
ii
)
IDB_s
=
"B_"
+
str
(
ii
)
if
IDB_s
not
in
NodesD_s2i
:
if
IDB_s
not
in
NodesD_s2i
:
...
@@ -373,7 +404,8 @@ class Phylo:
...
@@ -373,7 +404,8 @@ class Phylo:
NodesD_s2i
[
IDB_s
]
=
NodesC
NodesD_s2i
[
IDB_s
]
=
NodesC
IDB_i
=
NodesD_s2i
[
IDB_s
]
IDB_i
=
NodesD_s2i
[
IDB_s
]
items_
[
IDB_i
]
=
True
items_
[
IDB_i
]
=
True
# print("\t",sorted(items_))
# print("")
nA2B
[
NodesD_s2i
[
IDA_s
]
]
=
items_
nA2B
[
NodesD_s2i
[
IDA_s
]
]
=
items_
for
i
in
items_
:
for
i
in
items_
:
...
@@ -381,6 +413,54 @@ class Phylo:
...
@@ -381,6 +413,54 @@ class Phylo:
nB2A
[
i
]
=
{}
nB2A
[
i
]
=
{}
nB2A
[
i
][
NodesD_s2i
[
IDA_s
]
]
=
True
nB2A
[
i
][
NodesD_s2i
[
IDA_s
]
]
=
True
# ETAGES = {}
# print("")
# print("PARENTS!!:")
# for p in sorted( Parents.keys() ):
# TO_MERGE = nx.Graph()
# p_items = sorted(nA2B[ NodesD_s2i[ "A_"+str(p) ] ])
# print(p ,":", p_items)
# p_children = sorted(Parents[p])
# p_children_D = {}
# for j in p_children:
# child_items = nA2B[ NodesD_s2i[ "A_"+str(j) ] ]
# print("\t",j ,":", sorted(child_items) )
# if j in Parents:
# j_children = Parents[j]
# if j not in p_children_D:
# p_children_D[ j ] = set( j_children )
# if len(p_children_D)>0:
# for i in p_children_D:
# TO_MERGE.add_node( i )
# p_j_children_pairs = combinations(p_children_D.keys(), 2)
# for cc in p_j_children_pairs:
# CID1 = cc[0]
# CID2 = cc[1]
# if p_children_D[ CID1 ] == p_children_D[ CID2 ]:
# print( "\t\t\tsame content:",CID1,CID2 )
# TO_MERGE.add_edge( CID1 , CID2 )
# print("")
# h = nx.connected_components(TO_MERGE)
# for ss in h:
# if len(ss)>1:
# print("\t\t\t",ss)
# # print(ss)
# merge_this = {}
# for ss_i in ss:
# print("\t\t\t\t",ss_i)
# # merge_this.union( p_children_D[ ss_i ] )
# elems = p_children_D[ ss_i ]
# for ll in elems:
# merge_this[ ll ] = True
# merge_this = set(merge_this.keys())
# print("\t\t\t",merge_this )
# print("")
# print("")
from
n_partite_graph
import
nPartiteGraph
from
n_partite_graph
import
nPartiteGraph
bg
=
nPartiteGraph
()
bg
=
nPartiteGraph
()
graph_b
=
bg
.
BiGraph_2
(
nA2B
,
nB2A
)
graph_b
=
bg
.
BiGraph_2
(
nA2B
,
nB2A
)
...
@@ -443,18 +523,20 @@ class Phylo:
...
@@ -443,18 +523,20 @@ class Phylo:
s
=
"A_"
+
str
(
s_
)
s
=
"A_"
+
str
(
s_
)
t
=
"A_"
+
str
(
t_
)
t
=
"A_"
+
str
(
t_
)
# print( NodesD_s2i[ s ] ,"->", NodesD_s2i[ t ] )
# print( NodesD_s2i[ s ] ,"->", NodesD_s2i[ t ] )
ID_s
=
NodesD_s2i
[
s
]
if
"hidden"
not
in
i
:
ID_t
=
NodesD_s2i
[
t
]
if
s
in
NodesD_s2i
and
t
in
NodesD_s2i
:
ID_s
=
NodesD_s2i
[
s
]
link
=
{
ID_t
=
NodesD_s2i
[
t
]
"id"
:
C_liens
,
"s"
:
ID_s
,
link
=
{
"t"
:
ID_t
,
"id"
:
C_liens
,
"type"
:
"line"
,
"s"
:
ID_s
,
"w"
:
i
[
"w"
]
"t"
:
ID_t
,
}
"type"
:
"line"
,
Links
.
append
(
link
)
"w"
:
i
[
"w"
]
C_liens
+=
1
}
Links
.
append
(
link
)
C_liens
+=
1
...
@@ -470,68 +552,39 @@ class Phylo:
...
@@ -470,68 +552,39 @@ class Phylo:
for
cID
in
phylojson
[
"nodes"
]:
for
cID
in
phylojson
[
"nodes"
]:
ID_s
=
"A_"
+
str
(
cID
)
ID_s
=
"A_"
+
str
(
cID
)
try
:
try
:
ID_i
=
NodesD_s2i
[
ID_s
]
if
ID_s
in
NodesD_s2i
:
# print( cID ,":",ID_i )
ID_i
=
NodesD_s2i
[
ID_s
]
# print( cID ,":",ID_i )
node_
=
phylojson
[
"nodes"
][
cID
]
node_
[
"id"
]
=
ID_i
node_
=
phylojson
[
"nodes"
][
cID
]
node_
[
"label"
]
=
cID
node_
[
"id"
]
=
ID_i
# node_["shape"] = "square"
node_
[
"label"
]
=
cID
# node_["type"] = "Cluster"
if
cID
in
nodes_md
:
# "x":float(coord[0]) ,
if
"supp"
in
nodes_md
[
cID
]:
# "y":float(coord[1]) }
node_
[
"supp"
]
=
nodes_md
[
cID
][
"supp"
]
Nodes
.
append
(
node_
)
# node_["shape"] = "square"
if
ID_i
in
nA2B
:
# node_["type"] = "Cluster"
for
ngram
in
nA2B
[
ID_i
]:
# "x":float(coord[0]) ,
# print( "\t",ngram )
# "y":float(coord[1]) }
Nodes
.
append
(
node_
)
link
=
{
"id"
:
C_liens
,
if
ID_i
in
nA2B
:
"s"
:
ID_i
,
for
ngram
in
nA2B
[
ID_i
]:
"t"
:
ngram
,
# print( "\t",ngram )
"w"
:
1
}
link
=
{
Links
.
append
(
link
)
"id"
:
C_liens
,
C_liens
+=
1
"s"
:
ID_i
,
"t"
:
ngram
,
"w"
:
1
}
Links
.
append
(
link
)
C_liens
+=
1
except
:
except
:
xxx
=
10
xxx
=
10
# a year-node
# a year-node
# # return { "nodes": [] , "links": [] }
# # # print("")
# # print(" - - - - - -")
# # print("")
# # for i in graphArray["links"]:
# # print("_ ",i)
# # print("_ ","")
# # for i in graphArray["nodes"]:
# # print( i)
# # print( "")
# # print( " - - - - - - - - -")
# # for i in graphArray["links"]:
# # print( i)
# # print( "")
# Nodes_DD = {}
# for i in Nodes:
# print("_ ",i["id"])
# Nodes_DD[ i["id"] ] = i
# # print("_ ","")
# print("_ "," - - - - - - - - -")
# for i in Links:
# print("_ ",i["s"] ,"->", i["t"] )
# print( Nodes_DD[ i["s"] ] )
# print( Nodes_DD[ i["t"] ] )
# print("")
# print("_ "," - - - - - - - - -")
# # print( "|V_phy|:", len(phylojson["nodes"]))
# # print( "|V_phy|:", len(phylojson["nodes"]))
# # print( "|E_phy|:", len(phylojson["links"]))
# # print( "|E_phy|:", len(phylojson["links"]))
# # print( "|V|:", len(graphArray["nodes"]))
# # print( "|V|:", len(graphArray["nodes"]))
...
@@ -601,17 +654,13 @@ class Phylo:
...
@@ -601,17 +654,13 @@ class Phylo:
return
{
"diff_time"
:
{
"Distribution"
:
Distribution
,
"years"
:
nyears
,
"pairs"
:
pairs
,
"pairsD"
:
pairsD
}
}
return
{
"diff_time"
:
{
"Distribution"
:
Distribution
,
"years"
:
nyears
,
"pairs"
:
pairs
,
"pairsD"
:
pairsD
}
}
# print("")
# print("")
# print( "old jacc:", self.minjacc )
# print( "old jacc:", self.minjacc )
# print( "new jacc:", p_["minjaccard"] )
# print( "new jacc:", p_["minjaccard"] )
# phylojson = self.filter_jaccard ( p_["minjaccard"] )
# phylojson = self.filter_jaccard ( p_["minjaccard"] )
# print("")
# print("")
return
None
return
{}
...
...
output_2json.py
View file @
61dbb71b
...
@@ -168,9 +168,9 @@ class PhyloMaker:
...
@@ -168,9 +168,9 @@ class PhyloMaker:
for
y
in
years
:
for
y
in
years
:
AG
.
add_node
(
str
(
y
),
label
=
y
,
fake
=
True
,
shape
=
"plaintext"
)
AG
.
add_node
(
str
(
y
),
label
=
y
,
fake
=
True
,
shape
=
"plaintext"
)
for
i
in
range
(
len
(
years
)
):
for
i
in
sorted
(
years
):
try
:
try
:
AG
.
add_edge
(
str
(
years
[
i
]),
str
(
years
[
i
+
1
]),
fake
=
True
)
AG
.
add_edge
(
str
(
i
),
str
(
i
+
1
),
fake
=
True
,
weight
=
1
)
except
:
except
:
pass
pass
# - - - - - [ / Adding yearly-graph ] - - - - - #
# - - - - - [ / Adding yearly-graph ] - - - - - #
...
@@ -226,8 +226,18 @@ class PhyloMaker:
...
@@ -226,8 +226,18 @@ class PhyloMaker:
# redundant_ = nx.DiGraph()
# redundant_ = nx.DiGraph()
# for n in AG.nodes_iter():
Parents
=
{
}
# node = AG.node[n]
for
n
in
AG
.
nodes_iter
():
node
=
AG
.
node
[
n
]
if
"fake"
not
in
node
:
succesors
=
AG
.
neighbors
(
n
)
if
len
(
succesors
)
>
0
:
Parents
[
n
]
=
sorted
(
succesors
)
# print( n )
# for j in succesors:
# print( "\t",j )
# print("- - - - ")
# print("")
# if "fake" not in node:
# if "fake" not in node:
# parents = AG.predecessors( n )
# parents = AG.predecessors( n )
# if len(parents)>=2:
# if len(parents)>=2:
...
@@ -350,9 +360,9 @@ class PhyloMaker:
...
@@ -350,9 +360,9 @@ class PhyloMaker:
for
e
in
B
.
edges_iter
():
for
e
in
B
.
edges_iter
():
s
=
e
[
0
]
s
=
e
[
0
]
t
=
e
[
1
]
t
=
e
[
1
]
# if "fake" not in AG[s][t]:
# print(e)
infodict
=
{
"s"
:
s
,
"t"
:
t
,
"w"
:
AG
[
s
][
t
][
"weight"
]
,
"type"
:
"line"
}
infodict
=
{
"s"
:
s
,
"t"
:
t
,
"w"
:
AG
[
s
][
t
][
"weight"
]
,
"type"
:
"line"
}
if
"fake"
in
AG
[
s
][
t
]:
infodict
[
"hidden"
]
=
True
EdgesDict
.
append
(
infodict
)
EdgesDict
.
append
(
infodict
)
Graph
=
{
Graph
=
{
...
@@ -366,4 +376,4 @@ class PhyloMaker:
...
@@ -366,4 +376,4 @@ class PhyloMaker:
end
=
time
.
time
()
end
=
time
.
time
()
print
(
float
(
"{0:.2f}"
.
format
(
end
-
start
)),
"[s] : dot layout FIN"
)
print
(
float
(
"{0:.2f}"
.
format
(
end
-
start
)),
"[s] : dot layout FIN"
)
return
Graph
return
Graph
,
Parents
phylum_srv.py
View file @
61dbb71b
...
@@ -111,7 +111,7 @@ def close_contexts():
...
@@ -111,7 +111,7 @@ def close_contexts():
def
test_post
():
def
test_post
():
pprint
.
pprint
(
request
)
pprint
.
pprint
(
request
)
query
=
"void"
query
=
"void"
GG
=
False
GG
=
{
"nodes"
:
[]
,
"links"
:
[]
}
stats
=
False
stats
=
False
records
=
{
"Count"
:
0
}
records
=
{
"Count"
:
0
}
if
request
.
method
==
"POST"
:
if
request
.
method
==
"POST"
:
...
@@ -218,17 +218,23 @@ def test_post():
...
@@ -218,17 +218,23 @@ def test_post():
# # pairs of years to be multiplied
# # pairs of years to be multiplied
I
[
sID
]
.
temp_matching
(
thepairs
=
pairs
)
I
[
sID
]
.
temp_matching
(
thepairs
=
pairs
)
GG
=
I
[
sID
]
.
filter_jaccard
(
jacc_min
=
minjaccard
)
filters_
=
{
"jacc_min"
:
I
[
sID
]
.
p
[
"minsetdistance"
][
"value"
],
"minfsetsize"
:
I
[
sID
]
.
p
[
"minfsetsize"
][
"value"
],
"minfsetsupp"
:
I
[
sID
]
.
p
[
"minfsetsupp"
][
"value"
],
}
GG
=
I
[
sID
]
.
filter_jaccard
(
filter_s
=
filters_
)
Ya
=
p_
[
"from_"
]
if
len
(
GG
[
"links"
]
)
>
0
:
Yb
=
p_
[
"to_"
]
Ya
=
p_
[
"from_"
]
GG_v
=
str
(
len
(
GG
[
"nodes"
]
)
)
Yb
=
p_
[
"to_"
]
GG_e
=
str
(
len
(
GG
[
"links"
]
)
)
GG_v
=
str
(
len
(
GG
[
"nodes"
]
)
)
query_file
=
query
.
replace
(
" "
,
"_"
)
+
"__"
+
Ya
+
"-"
+
Yb
+
"__"
+
GG_v
+
"x"
+
GG_e
GG_e
=
str
(
len
(
GG
[
"links"
]
)
)
f
=
open
(
"static/Phylo/data/"
+
query_file
+
".json"
,
"w"
)
query_file
=
query
.
replace
(
" "
,
"_"
)
+
"__"
+
Ya
+
"-"
+
Yb
+
"__"
+
GG_v
+
"x"
+
GG_e
f
.
write
(
json
.
dumps
(
GG
,
indent
=
1
)
)
f
=
open
(
"static/Phylo/data/"
+
query_file
+
".json"
,
"w"
)
f
.
close
()
f
.
write
(
json
.
dumps
(
GG
,
indent
=
1
)
)
f
.
close
()
else
:
else
:
print
(
"
\n
Your
\"
"
,
sID
,
"
\"
instance has been MODIFIED."
)
print
(
"
\n
Your
\"
"
,
sID
,
"
\"
instance has been MODIFIED."
)
...
@@ -236,6 +242,7 @@ def test_post():
...
@@ -236,6 +242,7 @@ def test_post():
pprint
.
pprint
(
I
[
sID
]
.
p
)
pprint
.
pprint
(
I
[
sID
]
.
p
)
print
(
""
)
print
(
""
)
params_
=
{}
params_
=
{}
# # Updating I[ sID ].p parameters with new ones # #
for
k
in
p_
:
for
k
in
p_
:
if
"scontext"
!=
k
:
if
"scontext"
!=
k
:
try
:
try
:
...
@@ -258,8 +265,25 @@ def test_post():
...
@@ -258,8 +265,25 @@ def test_post():
I
[
sID
]
.
pairsD
=
diff_ress
[
"diff_time"
][
"pairsD"
]
I
[
sID
]
.
pairsD
=
diff_ress
[
"diff_time"
][
"pairsD"
]
I
[
sID
]
.
temp_matching
(
thepairs
=
diff_ress
[
"diff_time"
][
"pairs"
]
)
I
[
sID
]
.
temp_matching
(
thepairs
=
diff_ress
[
"diff_time"
][
"pairs"
]
)
GG
=
I
[
sID
]
.
filter_jaccard
(
jacc_min
=
I
[
sID
]
.
p
[
"minsetdistance"
][
"value"
]
)
filters_
=
{
"jacc_min"
:
I
[
sID
]
.
p
[
"minsetdistance"
][
"value"
],
"minfsetsize"
:
I
[
sID
]
.
p
[
"minfsetsize"
][
"value"
],
"minfsetsupp"
:
I
[
sID
]
.
p
[
"minfsetsupp"
][
"value"
],
}
GG
=
I
[
sID
]
.
filter_jaccard
(
filter_s
=
filters_
)
else
:
pairs
=
I
[
sID
]
.
pairs
I
[
sID
]
.
temp_matching
(
thepairs
=
pairs
)
filters_
=
{
"jacc_min"
:
I
[
sID
]
.
p
[
"minsetdistance"
][
"value"
],
"minfsetsize"
:
I
[
sID
]
.
p
[
"minfsetsize"
][
"value"
],
"minfsetsupp"
:
I
[
sID
]
.
p
[
"minfsetsupp"
][
"value"
],
}
GG
=
I
[
sID
]
.
filter_jaccard
(
filter_s
=
filters_
)
if
len
(
GG
[
"links"
]
)
>
0
:
Ya
=
str
(
I
[
sID
]
.
p
[
"from_"
][
"value"
])
Ya
=
str
(
I
[
sID
]
.
p
[
"from_"
][
"value"
])
Yb
=
str
(
I
[
sID
]
.
p
[
"to_"
][
"value"
])
Yb
=
str
(
I
[
sID
]
.
p
[
"to_"
][
"value"
])
GG_v
=
str
(
len
(
GG
[
"nodes"
]
)
)
GG_v
=
str
(
len
(
GG
[
"nodes"
]
)
)
...
...
static/phylomain.js
View file @
61dbb71b
...
@@ -126,13 +126,21 @@ function dict_diff(obj1, obj2) {
...
@@ -126,13 +126,21 @@ function dict_diff(obj1, obj2) {
}
}
var
K_i2s
=
{}
var
K_i2s
=
{}
var
K_oi2i
=
{}
// var K_s2i = {}
// var K_s2i = {}
var
loader_
=
'<img width=20 src="/static/Phylo/libs/img2/loading-bar.gif"></img>'
var
loader_
=
'<img width=20 src="/static/Phylo/libs/img2/loading-bar.gif"></img>'
var
G
=
{
var
G
=
{
"params_t0"
:
{},
"params_t0"
:
{},
}
}
var
Clusters_2DEL
=
{}
var
Terms_2DEL
=
{}
var
POST_
=
false
var
POST_
=
false
// "scontext"
// "scontext"
...
@@ -154,6 +162,32 @@ function getParams(form , children_ ) {
...
@@ -154,6 +162,32 @@ function getParams(form , children_ ) {
return
p_
;
return
p_
;
}
}
$
(
"#remove_terms"
).
click
(
function
(){
})
$
(
"#remove_clusters"
).
click
(
function
(){
console
.
log
(
"removing clusteeeers"
)
if
(
!
$
.
isEmptyObject
(
selections
))
{
for
(
var
cID
in
selections
)
{
if
(
Nodes
[
cID
].
type
==
"Cluster"
)
{
partialGraph
.
dropNode
(
cID
)
try
{
delete
Nodes
[
cID
]
delete
dicts
.
nodes
[
cID
]
delete
dicts
.
D2N
[
cID
]
delete
Relations
[
"1|1"
][
cID
]
}
catch
(
err
)
{
var
xxxxx
=
111
}
}
}
partialGraph
.
refresh
()
partialGraph
.
draw
()
}
})
function
send_params
(
D
)
{
function
send_params
(
D
)
{
var
query
=
$
(
"#pubmedquery"
).
val
().
slice
()
var
query
=
$
(
"#pubmedquery"
).
val
().
slice
()
...
@@ -209,7 +243,7 @@ function send_params( D ) {
...
@@ -209,7 +243,7 @@ function send_params( D ) {
$
(
"#pubmed_fetch"
).
bind
(
'click'
,
function
()
{
$
(
"#pubmed_fetch"
).
bind
(
'click'
,
function
()
{
console
.
log
(
"
hola mundo
"
)
console
.
log
(
"
pubmed_fetch
"
)
var
URL
=
"<URL>"
var
URL
=
"<URL>"
...
@@ -236,8 +270,20 @@ $("#pubmed_fetch").bind('click', function() {
...
@@ -236,8 +270,20 @@ $("#pubmed_fetch").bind('click', function() {
var
params_t1
=
getParams
(
"phyloform"
,
"input"
)
var
params_t1
=
getParams
(
"phyloform"
,
"input"
)
var
params_diff
=
dict_diff
(
params_t1
,
G
[
"params_t0"
]
)
var
params_diff
=
dict_diff
(
params_t1
,
G
[
"params_t0"
]
)
console
.
log
(
""
)
console
.
log
(
""
)
console
.
log
(
""
)
console
.
log
(
"DIFF TIMES!!!"
)
console
.
log
(
params_t1
)
console
.
log
(
G
[
"params_t0"
]
)
console
.
log
(
" - - - - - - - "
)
console
.
log
(
params_diff
)
console
.
log
(
""
)
console
.
log
(
""
)
console
.
log
(
""
)
// spark context has changed -> change everything
// spark context has changed -> change everything
if
(
"
scontext
"
in
params_diff
)
{
if
(
"
query
"
in
params_diff
)
{
return
send_params
(
params_t1
)
return
send_params
(
params_t1
)
}
}
...
@@ -331,32 +377,32 @@ function get_ngrams( query ) {
...
@@ -331,32 +377,32 @@ function get_ngrams( query ) {
data
:
DD
,
data
:
DD
,
success
:
function
(
data
)
{
success
:
function
(
data
)
{
console
.
log
(
"get_ngrams!!"
)
console
.
log
(
"get_ngrams!!"
)
console
.
log
(
"data:"
)
//
console.log( "data:" )
console
.
log
(
data
)
//
console.log( data )
for
(
var
i
in
data
){
for
(
var
i
in
data
){
K_i2s
[
i
]
=
data
[
i
]
K_i2s
[
i
]
=
data
[
i
]
}
}
console
.
log
(
"K_i2s:"
)
//
console.log( "K_i2s:" )
console
.
log
(
K_i2s
)
//
console.log( K_i2s )
console
.
log
(
"iter mesh_terms"
)
//
console.log("iter mesh_terms")
for
(
var
i
in
dicts
.
nodes
)
{
for
(
var
i
in
dicts
.
nodes
)
{
if
(
dicts
.
nodes
[
i
].
type
==
"mesh_term"
)
{
if
(
dicts
.
nodes
[
i
].
type
==
"mesh_term"
)
{
// console.log ( dicts.nodes[i] )
// console.log ( dicts.nodes[i] )
// console.log ( K_i2s[dicts.nodes[i].label] )
// console.log ( K_i2s[dicts.nodes[i].label] )
// console.log ( K_i2s[Number(dicts.nodes[i].label)] )
// console.log ( K_i2s[Number(dicts.nodes[i].label)] )
// console.log("")
// console.log("")
console
.
log
(
dicts
.
nodes
[
i
].
label
)
//
console.log( dicts.nodes[i].label )
console
.
log
(
K_i2s
[
dicts
.
nodes
[
i
].
label
]
)
//
console.log( K_i2s[dicts.nodes[i].label] )
var
ID
=
dicts
.
nodes
[
i
].
id
var
ID
=
dicts
.
nodes
[
i
].
id
var
newlabel
=
K_i2s
[
dicts
.
nodes
[
i
].
label
]
var
newlabel
=
K_i2s
[
dicts
.
nodes
[
i
].
label
]
if
(
typeof
(
newlabel
)
!=
"undefined"
)
{
if
(
typeof
(
newlabel
)
!=
"undefined"
)
{
K_oi2i
[
newlabel
]
=
ID
dicts
.
nodes
[
i
].
label
=
newlabel
dicts
.
nodes
[
i
].
label
=
newlabel
Nodes
[
ID
].
label
=
newlabel
Nodes
[
ID
].
label
=
newlabel
console
.
log
(
dicts
.
nodes
[
i
]
)
console
.
log
(
Nodes
[
ID
]
)
console
.
log
(
""
)
}
}
}
}
}
}
...
@@ -364,6 +410,12 @@ function get_ngrams( query ) {
...
@@ -364,6 +410,12 @@ function get_ngrams( query ) {
partialGraph
.
draw
()
partialGraph
.
draw
()
labels
=
[]
for
(
var
kk
in
K_i2s
){
updateSearchLabels
(
kk
,
K_i2s
[
kk
]
,
"mesh_term"
);
}
},
},
error
:
function
(
jqxhr
,
textStatus
,
errorThrown
)
{
error
:
function
(
jqxhr
,
textStatus
,
errorThrown
)
{
...
...
test.py
View file @
61dbb71b
...
@@ -136,25 +136,60 @@ def test_workflow():
...
@@ -136,25 +136,60 @@ def test_workflow():
import
time
import
time
print
(
"hello"
)
print
(
"hello"
)
minsupp
=
0.0001
numpart
=
100
minfsetsize
=
4
import
findspark
findspark
.
init
()
from
pyspark.mllib.fpm
import
FPGrowth
from
pyspark
import
SparkContext
from
pyspark
import
SparkConf
cfg
=
SparkConf
()
.
set
(
'spark.driver.memory'
,
"40g"
)
.
set
(
'spark.driver.cores'
,
20
)
.
setAppName
(
"simple_app"
)
ncores
=
20
sc__
=
SparkContext
(
conf
=
cfg
)
from
PhyloSpark
import
Phylo
from
PhyloSpark
import
Phylo
periods_
=
[
1983
,
1984
]
the_
=
Phylo
(
t
=
periods_
,
memm
=
"20g"
,
ncores
=
"24"
)
periods_
=
range
(
2003
,
2005
+
1
)
the_
=
Phylo
(
t
=
periods_
,
minJ
=
0.0
,
spark_context
=
sc__
,
ncores
=
ncores
)
# WL = getWL( the_.sc , "/datasets/PubMed2014/chikungunya.txt" )
# WL = getWL( the_.sc , "/datasets/PubMed2014/chikungunya.txt" )
WL_path
=
"/datasets/PubMed2014/chikungunya.txt"
WL_path
=
"/datasets/PubMed2014/chikungunya.txt"
WL
=
the_
.
sc
.
textFile
(
WL_path
)
.
map
(
lambda
line
:
(
int
(
line
.
strip
())
,
1
)
)
# WL_path = "/datasets/PubMed2014/gut_AND_brain.txt"
WL
=
sc__
.
textFile
(
WL_path
)
.
map
(
lambda
line
:
(
int
(
line
.
strip
())
,
1
)
)
# WL = getWL( the_.sc , "/datasets/PubMed2014/cell-aging.txt" )
# WL = getWL( the_.sc , "/datasets/PubMed2014/cell-aging.txt" )
# WL = getWL( the_.sc , "/datasets/PubMed2014/rheumatoid-arthritis.txt" )
# WL = getWL( the_.sc , "/datasets/PubMed2014/rheumatoid-arthritis.txt" )
for
i
in
range
(
1983
,
2015
):
for
i
in
range
(
2003
,
2005
+
1
):
#
start = time.time()
start
=
time
.
time
()
period
=
str
(
i
)
period
=
str
(
i
)
print
(
period
)
# print(period)
Psub
=
interDataSet
(
the_
.
sc
,
period
,
WL
)
T
=
interDataSet
(
sc__
,
period
,
WL
)
.
map
(
lambda
x
:
x
[
1
]
)
# print("\t",len(ress.collect()))
print
(
"
\t
"
,
period
,
"->"
,
len
(
T
.
collect
()))
# print( "\t", T.take(1))
model
=
FPGrowth
.
train
(
T
,
minSupport
=
minsupp
,
numPartitions
=
numpart
)
FI_all_c
=
model
.
freqItemsets
()
.
count
()
print
(
"
\t\t
|FI|"
,
FI_all_c
)
t_i
=
time
.
time
()
##
FI
=
model
.
freqItemsets
()
.
filter
(
lambda
x
:
len
(
x
.
items
)
>=
minfsetsize
and
x
.
freq
>=
2
)
FI
=
FI
.
sortBy
(
lambda
x
:
x
.
freq
,
ascending
=
False
)
.
zipWithIndex
()
.
map
(
lambda
x
:
(
x
[
1
],
x
[
0
])
)
.
persist
()
FI_c
=
FI
.
count
()
t_f
=
time
.
time
()
##
FI_t
=
"{0:.3f}"
.
format
((
t_f
-
t_i
))
+
"[s]"
##
print
(
"
\t\t
|FI_| "
,
FI_c
,
"
\t
"
,
FI_t
)
print
(
""
)
# end = time.time()
# end = time.time()
# print("\t\t",end - start)
# print("\t\t",end - start)
print
(
""
)
print
(
""
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment