Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
03ac1095
Commit
03ac1095
authored
Mar 04, 2015
by
PkSM3
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[UPDATE] pushing for the big merge
parent
2d1a9b89
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
50 additions
and
70 deletions
+50
-70
functions.py
analysis/functions.py
+1
-6
views.py
gargantext_web/views.py
+1
-21
admin.py
node/admin.py
+0
-2
models.py
node/models.py
+39
-29
PubmedFileParser.py
parsing/FileParsers/PubmedFileParser.py
+1
-1
views.py
scrap_pubmed/views.py
+5
-9
explorer.html
templates/explorer.html
+3
-2
No files found.
analysis/functions.py
View file @
03ac1095
...
@@ -160,14 +160,12 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150
...
@@ -160,14 +160,12 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150
if
Node
.
objects
.
filter
(
type
=
type_cooc
,
parent
=
corpus
)
.
first
()
is
None
:
if
Node
.
objects
.
filter
(
type
=
type_cooc
,
parent
=
corpus
)
.
first
()
is
None
:
print
(
"Coocurrences do not exist yet, create it."
)
print
(
"Coocurrences do not exist yet, create it."
)
whitelist
=
create_whitelist
(
request
.
user
,
corpus
,
size
=
n
)
whitelist
=
create_whitelist
(
request
.
user
,
corpus
,
size
=
n
)
print
(
"PRINTING WHITELIST:"
,
whitelist
)
cooccurrence_node
=
create_cooc
(
user
=
request
.
user
,
corpus
=
corpus
,
whitelist
=
whitelist
,
size
=
n
)
cooccurrence_node
=
create_cooc
(
user
=
request
.
user
,
corpus
=
corpus
,
whitelist
=
whitelist
,
size
=
n
)
print
(
cooccurrence_node
.
id
,
"Cooc created"
)
else
:
else
:
cooccurrence_node
=
Node
.
objects
.
filter
(
type
=
type_cooc
,
parent
=
corpus
)
.
first
()
cooccurrence_node
=
Node
.
objects
.
filter
(
type
=
type_cooc
,
parent
=
corpus
)
.
first
()
for
cooccurrence
in
NodeNgramNgram
.
objects
.
filter
(
node
=
cooccurrence_node
):
for
cooccurrence
in
NodeNgramNgram
.
objects
.
filter
(
node
=
cooccurrence_node
):
# print(cooccurrence.ngramx.terms," <=> ",cooccurrence.ngramy.terms,"
:
",cooccurrence.score)
# print(cooccurrence.ngramx.terms," <=> ",cooccurrence.ngramy.terms,"
\t
",cooccurrence.score)
ids
[
cooccurrence
.
ngramx
.
terms
]
=
cooccurrence
.
ngramx
.
id
ids
[
cooccurrence
.
ngramx
.
terms
]
=
cooccurrence
.
ngramx
.
id
ids
[
cooccurrence
.
ngramy
.
terms
]
=
cooccurrence
.
ngramy
.
id
ids
[
cooccurrence
.
ngramy
.
terms
]
=
cooccurrence
.
ngramy
.
id
...
@@ -180,8 +178,6 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150
...
@@ -180,8 +178,6 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150
weight
[
cooccurrence
.
ngramy
.
terms
]
=
weight
.
get
(
cooccurrence
.
ngramy
.
terms
,
0
)
+
cooccurrence
.
score
weight
[
cooccurrence
.
ngramy
.
terms
]
=
weight
.
get
(
cooccurrence
.
ngramy
.
terms
,
0
)
+
cooccurrence
.
score
weight
[
cooccurrence
.
ngramx
.
terms
]
=
weight
.
get
(
cooccurrence
.
ngramx
.
terms
,
0
)
+
cooccurrence
.
score
weight
[
cooccurrence
.
ngramx
.
terms
]
=
weight
.
get
(
cooccurrence
.
ngramx
.
terms
,
0
)
+
cooccurrence
.
score
print
(
"
\n
===================
\n
NUMBER OF NGRAMS_2:"
,
len
(
weight
.
keys
()))
df
=
pd
.
DataFrame
(
matrix
)
.
fillna
(
0
)
df
=
pd
.
DataFrame
(
matrix
)
.
fillna
(
0
)
x
=
copy
(
df
.
values
)
x
=
copy
(
df
.
values
)
x
=
x
/
x
.
sum
(
axis
=
1
)
x
=
x
/
x
.
sum
(
axis
=
1
)
...
@@ -194,7 +190,6 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150
...
@@ -194,7 +190,6 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150
G
=
nx
.
from_numpy_matrix
(
matrix_filtered
)
G
=
nx
.
from_numpy_matrix
(
matrix_filtered
)
G
=
nx
.
relabel_nodes
(
G
,
dict
(
enumerate
([
labels
[
label
]
for
label
in
list
(
df
.
columns
)])))
G
=
nx
.
relabel_nodes
(
G
,
dict
(
enumerate
([
labels
[
label
]
for
label
in
list
(
df
.
columns
)])))
#G = nx.relabel_nodes(G, dict(enumerate(df.columns)))
#G = nx.relabel_nodes(G, dict(enumerate(df.columns)))
print
(
"NUMBER OF NODES_2"
,
len
(
G
))
# Removing too connected nodes (find automatic way to do it)
# Removing too connected nodes (find automatic way to do it)
# outdeg = G.degree()
# outdeg = G.degree()
# to_remove = [n for n in outdeg if outdeg[n] >= 10]
# to_remove = [n for n in outdeg if outdeg[n] >= 10]
...
...
gargantext_web/views.py
View file @
03ac1095
...
@@ -312,15 +312,12 @@ def project(request, project_id):
...
@@ -312,15 +312,12 @@ def project(request, project_id):
if
docs_total
==
0
or
docs_total
is
None
:
if
docs_total
==
0
or
docs_total
is
None
:
docs_total
=
1
docs_total
=
1
# The donut will show: percentage by
# The donut will show: percentage by
donut
=
[
{
'source'
:
key
,
donut
=
[
{
'source'
:
key
,
'count'
:
donut_part
[
key
]
,
'count'
:
donut_part
[
key
]
,
'part'
:
round
(
donut_part
[
key
]
*
100
/
docs_total
)
}
\
'part'
:
round
(
donut_part
[
key
]
*
100
/
docs_total
)
}
\
for
key
in
donut_part
.
keys
()
]
for
key
in
donut_part
.
keys
()
]
dauser
=
User
.
objects
.
get
(
username
=
user
)
dauser
=
User
.
objects
.
get
(
username
=
user
)
groups
=
len
(
dauser
.
groups
.
filter
(
name
=
"PubMed_0.1"
))
groups
=
len
(
dauser
.
groups
.
filter
(
name
=
"PubMed_0.1"
))
print
(
"*groupslen*:"
,
groups
)
print
(
"*groupslen*:"
,
groups
)
...
@@ -330,8 +327,6 @@ def project(request, project_id):
...
@@ -330,8 +327,6 @@ def project(request, project_id):
form
=
CustomForm
(
request
.
POST
,
request
.
FILES
)
form
=
CustomForm
(
request
.
POST
,
request
.
FILES
)
if
form
.
is_valid
():
if
form
.
is_valid
():
name
=
form
.
cleaned_data
[
'name'
]
name
=
form
.
cleaned_data
[
'name'
]
thefile
=
form
.
cleaned_data
[
'file'
]
thefile
=
form
.
cleaned_data
[
'file'
]
resource_type
=
ResourceType
.
objects
.
get
(
name
=
str
(
form
.
cleaned_data
[
'type'
]
))
resource_type
=
ResourceType
.
objects
.
get
(
name
=
str
(
form
.
cleaned_data
[
'type'
]
))
...
@@ -364,9 +359,7 @@ def project(request, project_id):
...
@@ -364,9 +359,7 @@ def project(request, project_id):
type
=
node_type
,
type
=
node_type
,
name
=
name
,
name
=
name
,
)
)
corpus
.
save
()
corpus
.
save
()
corpus
.
add_resource
(
corpus
.
add_resource
(
user
=
request
.
user
,
user
=
request
.
user
,
type
=
resource_type
,
type
=
resource_type
,
...
@@ -386,13 +379,11 @@ def project(request, project_id):
...
@@ -386,13 +379,11 @@ def project(request, project_id):
return
HttpResponseRedirect
(
'/project/'
+
str
(
project_id
))
return
HttpResponseRedirect
(
'/project/'
+
str
(
project_id
))
except
Exception
as
error
:
except
Exception
as
error
:
print
(
'ee'
,
error
)
print
(
'ee'
,
error
)
form
=
CorpusForm
(
request
=
request
)
form
=
CorpusForm
(
request
=
request
)
formResource
=
ResourceForm
()
formResource
=
ResourceForm
()
else
:
else
:
print
(
"bad form, bad form"
)
print
(
"bad form, bad form"
)
return
render
(
request
,
'project.html'
,
{
return
render
(
request
,
'project.html'
,
{
...
@@ -409,8 +400,7 @@ def project(request, project_id):
...
@@ -409,8 +400,7 @@ def project(request, project_id):
})
})
else
:
else
:
form
=
CustomForm
()
form
=
CustomForm
()
return
render
(
request
,
'project.html'
,
{
return
render
(
request
,
'project.html'
,
{
'form'
:
form
,
'form'
:
form
,
'user'
:
user
,
'user'
:
user
,
...
@@ -666,8 +656,6 @@ def subcorpusJSON(request, project_id, corpus_id, start , end ):
...
@@ -666,8 +656,6 @@ def subcorpusJSON(request, project_id, corpus_id, start , end ):
# return HttpResponse(html)
# return HttpResponse(html)
return
HttpResponse
(
serializer
.
data
,
content_type
=
'application/json'
)
return
HttpResponse
(
serializer
.
data
,
content_type
=
'application/json'
)
def
delete_project
(
request
,
node_id
):
def
delete_project
(
request
,
node_id
):
Node
.
objects
.
filter
(
id
=
node_id
)
.
all
()
.
delete
()
Node
.
objects
.
filter
(
id
=
node_id
)
.
all
()
.
delete
()
return
HttpResponseRedirect
(
'/projects/'
)
return
HttpResponseRedirect
(
'/projects/'
)
...
@@ -676,7 +664,6 @@ def delete_corpus(request, project_id, corpus_id):
...
@@ -676,7 +664,6 @@ def delete_corpus(request, project_id, corpus_id):
Node
.
objects
.
filter
(
id
=
corpus_id
)
.
all
()
.
delete
()
Node
.
objects
.
filter
(
id
=
corpus_id
)
.
all
()
.
delete
()
return
HttpResponseRedirect
(
'/project/'
+
project_id
)
return
HttpResponseRedirect
(
'/project/'
+
project_id
)
def
chart
(
request
,
project_id
,
corpus_id
):
def
chart
(
request
,
project_id
,
corpus_id
):
''' Charts to compare, filter, count'''
''' Charts to compare, filter, count'''
if
MAINTENANCE
:
return
HttpResponseRedirect
(
'/maintenance/'
)
if
MAINTENANCE
:
return
HttpResponseRedirect
(
'/maintenance/'
)
...
@@ -732,10 +719,6 @@ def graph(request, project_id, corpus_id):
...
@@ -732,10 +719,6 @@ def graph(request, project_id, corpus_id):
return
HttpResponse
(
html
)
return
HttpResponse
(
html
)
def
exploration
(
request
):
def
exploration
(
request
):
if
MAINTENANCE
:
return
HttpResponseRedirect
(
'/maintenance/'
)
if
MAINTENANCE
:
return
HttpResponseRedirect
(
'/maintenance/'
)
t
=
get_template
(
'exploration.html'
)
t
=
get_template
(
'exploration.html'
)
...
@@ -793,8 +776,6 @@ def corpus_csv(request, project_id, corpus_id):
...
@@ -793,8 +776,6 @@ def corpus_csv(request, project_id, corpus_id):
return
response
return
response
def
send_csv
(
request
,
corpus_id
):
def
send_csv
(
request
,
corpus_id
):
'''
'''
Create the HttpResponse object with the appropriate CSV header.
Create the HttpResponse object with the appropriate CSV header.
...
@@ -835,7 +816,6 @@ def send_csv(request, corpus_id):
...
@@ -835,7 +816,6 @@ def send_csv(request, corpus_id):
return
response
return
response
# To get the data
# To get the data
from
gargantext_web.api
import
JsonHttpResponse
from
gargantext_web.api
import
JsonHttpResponse
from
analysis.functions
import
get_cooc
from
analysis.functions
import
get_cooc
...
...
node/admin.py
View file @
03ac1095
...
@@ -132,8 +132,6 @@ class CustomForm(forms.Form):
...
@@ -132,8 +132,6 @@ class CustomForm(forms.Form):
# raise forms.ValidationError(_('We need a zip pls.'))
# raise forms.ValidationError(_('We need a zip pls.'))
return
file_
return
file_
class
CorpusForm
(
ModelForm
):
class
CorpusForm
(
ModelForm
):
#parent = ModelChoiceField(EmptyQuerySet)
#parent = ModelChoiceField(EmptyQuerySet)
def
__init__
(
self
,
*
args
,
**
kwargs
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
...
...
node/models.py
View file @
03ac1095
...
@@ -222,12 +222,14 @@ class Node(CTENode):
...
@@ -222,12 +222,14 @@ class Node(CTENode):
associations
=
defaultdict
(
float
)
# float or int?
associations
=
defaultdict
(
float
)
# float or int?
if
isinstance
(
keys
,
dict
):
if
isinstance
(
keys
,
dict
):
for
key
,
weight
in
keys
.
items
():
for
key
,
weight
in
keys
.
items
():
for
ngram
in
extractor
.
extract_ngrams
(
self
.
metadata
[
key
]):
text2process
=
str
(
self
.
metadata
[
key
])
.
replace
(
'['
,
''
)
.
replace
(
']'
,
''
)
for
ngram
in
extractor
.
extract_ngrams
(
text2process
):
terms
=
' '
.
join
([
token
for
token
,
tag
in
ngram
])
terms
=
' '
.
join
([
token
for
token
,
tag
in
ngram
])
associations
[
ngram
]
+=
weight
associations
[
ngram
]
+=
weight
else
:
else
:
for
key
in
keys
:
for
key
in
keys
:
for
ngram
in
extractor
.
extract_ngrams
(
self
.
metadata
[
key
]):
text2process
=
str
(
self
.
metadata
[
key
])
.
replace
(
'['
,
''
)
.
replace
(
']'
,
''
)
for
ngram
in
extractor
.
extract_ngrams
(
text2process
):
terms
=
' '
.
join
([
token
for
token
,
tag
in
ngram
])
terms
=
' '
.
join
([
token
for
token
,
tag
in
ngram
])
associations
[
terms
]
+=
1
associations
[
terms
]
+=
1
Node_Ngram
.
objects
.
bulk_create
([
Node_Ngram
.
objects
.
bulk_create
([
...
@@ -318,18 +320,21 @@ class Node(CTENode):
...
@@ -318,18 +320,21 @@ class Node(CTENode):
language
=
langages_cache
[
metadata_values
[
'language_iso2'
]]
if
'language_iso2'
in
metadata_values
else
None
,
language
=
langages_cache
[
metadata_values
[
'language_iso2'
]]
if
'language_iso2'
in
metadata_values
else
None
,
if
isinstance
(
language
,
tuple
):
if
isinstance
(
language
,
tuple
):
language
=
language
[
0
]
language
=
language
[
0
]
Node
(
node
=
Node
(
user_id
=
user_id
,
user_id
=
user_id
,
type_id
=
type_id
,
type_id
=
type_id
,
name
=
name
,
name
=
name
,
parent
=
self
,
parent
=
self
,
language_id
=
language
.
id
if
language
else
None
,
language_id
=
language
.
id
if
language
else
None
,
metadata
=
metadata_values
metadata
=
metadata_values
)
.
save
()
)
node
.
save
()
metadata_values
[
"id"
]
=
node
.
id
# # make metadata filterable
# # make metadata filterable
self
.
children
.
all
()
.
make_metadata_filterable
()
self
.
children
.
all
()
.
make_metadata_filterable
()
# # mark the resources as parsed for this node
# # mark the resources as parsed for this node
self
.
node_resource
.
update
(
parsed
=
True
)
self
.
node_resource
.
update
(
parsed
=
True
)
return
metadata_list
def
extract_ngrams__MOV
(
self
,
array
,
keys
,
ngramsextractorscache
=
None
,
ngramscaches
=
None
):
def
extract_ngrams__MOV
(
self
,
array
,
keys
,
ngramsextractorscache
=
None
,
ngramscaches
=
None
):
if
ngramsextractorscache
is
None
:
if
ngramsextractorscache
is
None
:
...
@@ -369,7 +374,7 @@ class Node(CTENode):
...
@@ -369,7 +374,7 @@ class Node(CTENode):
associations
[
terms
]
+=
1
associations
[
terms
]
+=
1
if
(
len
(
associations
)
>
0
):
if
(
len
(
associations
)
>
0
):
results
.
append
(
[
i
,
associations
]
)
results
.
append
(
[
metadata
[
"id"
]
,
associations
]
)
i
+=
1
i
+=
1
return
results
return
results
...
@@ -421,7 +426,7 @@ class Node(CTENode):
...
@@ -421,7 +426,7 @@ class Node(CTENode):
ngramid
+=
1
ngramid
+=
1
# *03* [ / making dictionaries for NGram_Text <=> NGram_ID ]
# *03* [ / making dictionaries for NGram_Text <=> NGram_ID ]
docs_X_terms
=
{}
for
i
in
FreqList
:
# foreach ID in Doc:
for
i
in
FreqList
:
# foreach ID in Doc:
docID
=
i
[
0
]
docID
=
i
[
0
]
associations
=
i
[
1
]
associations
=
i
[
1
]
...
@@ -435,9 +440,10 @@ class Node(CTENode):
...
@@ -435,9 +440,10 @@ class Node(CTENode):
ngrams_by_document
=
termsCount
# i re-calculed this because of *02*
ngrams_by_document
=
termsCount
# i re-calculed this because of *02*
terms
=
[]
terms
=
[]
terms_occ
=
[]
if
ngrams_by_document
>
0
:
if
ngrams_by_document
>
0
:
for
ngram_text
,
weight
in
associations
.
items
():
for
ngram_text
,
weight
in
associations
.
items
():
if
ngram_text
in
NGram2ID
:
if
ngram_text
in
NGram2ID
:
terms
.
append
(
NGram2ID
[
ngram_text
])
terms
.
append
(
NGram2ID
[
ngram_text
])
# [ calculating TF-IDF ]
# [ calculating TF-IDF ]
occurrences_of_ngram
=
weight
occurrences_of_ngram
=
weight
...
@@ -446,6 +452,9 @@ class Node(CTENode):
...
@@ -446,6 +452,9 @@ class Node(CTENode):
yy
=
FirstNgrams
[
ngram_text
][
"C"
]
yy
=
FirstNgrams
[
ngram_text
][
"C"
]
inverse_document_frequency
=
log
(
xx
/
yy
)
#log base e
inverse_document_frequency
=
log
(
xx
/
yy
)
#log base e
tfidfScore
=
term_frequency
*
inverse_document_frequency
tfidfScore
=
term_frequency
*
inverse_document_frequency
terms_occ
.
append
(
[
NGram2ID
[
ngram_text
]
,
round
(
tfidfScore
,
3
)
]
)
# [ / calculating TF-IDF ]
# [ / calculating TF-IDF ]
if
"T"
in
FirstNgrams
[
ngram_text
]:
if
"T"
in
FirstNgrams
[
ngram_text
]:
FirstNgrams
[
ngram_text
][
"T"
]
.
append
(
tfidfScore
)
FirstNgrams
[
ngram_text
][
"T"
]
.
append
(
tfidfScore
)
...
@@ -453,9 +462,13 @@ class Node(CTENode):
...
@@ -453,9 +462,13 @@ class Node(CTENode):
FirstNgrams
[
ngram_text
][
"T"
]
=
[
tfidfScore
]
FirstNgrams
[
ngram_text
][
"T"
]
=
[
tfidfScore
]
if
len
(
terms
)
>
1
:
if
len
(
terms
)
>
1
:
docs_X_terms
[
docID
]
=
terms_occ
# print("docid:",docID)
# for i in terms:
# print("\t",ID2NGram[i])
calc
.
addCompleteSubGraph
(
terms
)
calc
.
addCompleteSubGraph
(
terms
)
return
{
"G"
:
calc
.
G
,
"TERMS"
:
ID2NGram
,
"metrics"
:
FirstNgrams
}
return
{
"G"
:
calc
.
G
,
"TERMS"
:
ID2NGram
,
"
ii"
:
docs_X_terms
,
"
metrics"
:
FirstNgrams
}
def
do_coocmatrix__MOV
(
self
,
TERMS
,
G
,
n
=
150
,
type
=
'node_link'
):
def
do_coocmatrix__MOV
(
self
,
TERMS
,
G
,
n
=
150
,
type
=
'node_link'
):
import
pandas
as
pd
import
pandas
as
pd
...
@@ -475,20 +488,19 @@ class Node(CTENode):
...
@@ -475,20 +488,19 @@ class Node(CTENode):
n1
=
e
[
0
]
n1
=
e
[
0
]
n2
=
e
[
1
]
n2
=
e
[
1
]
w
=
G
[
n1
][
n2
][
'weight'
]
w
=
G
[
n1
][
n2
][
'weight'
]
# print("\t",n1," <=> ",n2, " : ", G[n1][n2]['weight'],"\t",TERMS[n1]," <=> ",TERMS[n2], " : ", G[n1][n2]['weight'])
# print(n1," <=> ",n2, " : ", G[n1][n2]['weight'],"\t",TERMS[n1]," <=> ",TERMS[n2], "\t", G[n1][n2]['weight'])
ids
[
TERMS
[
n1
]]
=
n1
ids
[
TERMS
[
n1
]]
=
n1
ids
[
TERMS
[
n2
]]
=
n2
ids
[
TERMS
[
n2
]]
=
n2
labels
[
n1
]
=
TERMS
[
n1
]
labels
[
n1
]
=
TERMS
[
n1
]
labels
[
n2
]
=
TERMS
[
n2
]
labels
[
n2
]
=
TERMS
[
n2
]
matrix
[
n1
][
n2
]
=
w
matrix
[
n1
][
n2
]
=
w
matrix
[
n2
][
n1
]
=
w
matrix
[
n2
][
n1
]
=
w
weight
[
n2
]
=
weight
.
get
(
n2
,
0
)
+
w
weight
[
n1
]
=
weight
.
get
(
n1
,
0
)
+
w
weight
[
TERMS
[
n2
]]
=
weight
.
get
(
TERMS
[
n2
],
0
)
+
w
weight
[
TERMS
[
n1
]]
=
weight
.
get
(
TERMS
[
n1
],
0
)
+
w
print
(
"
\n
===================
\n
NUMBER OF NGRAMS:"
,
len
(
weight
.
keys
()))
df
=
pd
.
DataFrame
(
matrix
)
.
fillna
(
0
)
df
=
pd
.
DataFrame
(
matrix
)
.
fillna
(
0
)
x
=
copy
(
df
.
values
)
x
=
copy
(
df
.
values
)
x
=
x
/
x
.
sum
(
axis
=
1
)
x
=
x
/
x
.
sum
(
axis
=
1
)
...
@@ -499,27 +511,23 @@ class Node(CTENode):
...
@@ -499,27 +511,23 @@ class Node(CTENode):
#matrix_filtered = np.where(x > threshold, x, 0)
#matrix_filtered = np.where(x > threshold, x, 0)
#matrix_filtered = matrix_filtered.resize((90,90))
#matrix_filtered = matrix_filtered.resize((90,90))
G
=
nx
.
from_numpy_matrix
(
matrix_filtered
)
G
=
nx
.
from_numpy_matrix
(
matrix_filtered
)
G
=
nx
.
relabel_nodes
(
G
,
dict
(
enumerate
([
labels
[
label
]
for
label
in
list
(
df
.
columns
)])))
#
G = nx.relabel_nodes(G, dict(enumerate([ labels[label] for label in list(df.columns)])))
print
(
"NUMBER OF NODES:"
,
len
(
G
))
partition
=
best_partition
(
G
)
partition
=
best_partition
(
G
)
data
=
[]
data
=
[]
if
type
==
"node_link"
:
if
type
==
"node_link"
:
for
community
in
set
(
partition
.
values
()):
for
community
in
set
(
partition
.
values
()):
#print(community)
G
.
add_node
(
"cluster "
+
str
(
community
),
hidden
=
1
)
G
.
add_node
(
"cluster "
+
str
(
community
),
hidden
=
1
)
for
node
in
G
.
nodes
():
for
node
in
G
.
nodes
():
try
:
try
:
#node,type(labels[node])
G
.
node
[
node
][
'label'
]
=
TERMS
[
node
]
G
.
node
[
node
][
'label'
]
=
node
G
.
node
[
node
][
'pk'
]
=
node
G
.
node
[
node
][
'name'
]
=
node
G
.
node
[
node
][
'pk'
]
=
ids
[
str
(
node
)]
G
.
node
[
node
][
'size'
]
=
weight
[
node
]
G
.
node
[
node
][
'size'
]
=
weight
[
node
]
G
.
node
[
node
][
'group'
]
=
partition
[
node
]
G
.
node
[
node
][
'group'
]
=
partition
[
node
]
G
.
add_edge
(
node
,
"cluster "
+
str
(
partition
[
node
]),
weight
=
3
)
G
.
add_edge
(
node
,
"cluster "
+
str
(
partition
[
node
]),
weight
=
3
)
except
Exception
as
error
:
except
Exception
as
error
:
print
(
error
)
print
(
"ERROR:"
,
error
)
print
(
"IMA IN node_link CASE"
)
data
=
json_graph
.
node_link_data
(
G
)
data
=
json_graph
.
node_link_data
(
G
)
elif
type
==
"adjacency"
:
elif
type
==
"adjacency"
:
...
@@ -533,10 +541,8 @@ class Node(CTENode):
...
@@ -533,10 +541,8 @@ class Node(CTENode):
#G.add_edge(node, partition[node], weight=3)
#G.add_edge(node, partition[node], weight=3)
except
Exception
as
error
:
except
Exception
as
error
:
print
(
error
)
print
(
error
)
print
(
"IMA IN adjacency CASE"
)
data
=
json_graph
.
node_link_data
(
G
)
data
=
json_graph
.
node_link_data
(
G
)
print
(
"* * * * FINISHED * * * *"
)
return
data
return
data
...
@@ -554,14 +560,14 @@ class Node(CTENode):
...
@@ -554,14 +560,14 @@ class Node(CTENode):
total
+=
(
end
-
start
)
total
+=
(
end
-
start
)
print
(
"LOG::TIME:_ "
+
datetime
.
datetime
.
now
()
.
isoformat
()
+
" parse_resources()__MOV [s]"
,(
end
-
start
))
print
(
"LOG::TIME:_ "
+
datetime
.
datetime
.
now
()
.
isoformat
()
+
" parse_resources()__MOV [s]"
,(
end
-
start
))
print
(
"LOG::TIME: In workflow() writeMetadata__MOV()"
)
print
(
"LOG::TIME: In workflow() writeMetadata__MOV()"
)
start
=
time
.
time
()
start
=
time
.
time
()
self
.
writeMetadata__MOV
(
metadata_list
=
theMetadata
)
theMetadata
=
self
.
writeMetadata__MOV
(
metadata_list
=
theMetadata
)
end
=
time
.
time
()
end
=
time
.
time
()
total
+=
(
end
-
start
)
total
+=
(
end
-
start
)
print
(
"LOG::TIME:_ "
+
datetime
.
datetime
.
now
()
.
isoformat
()
+
" writeMetadata__MOV() [s]"
,(
end
-
start
))
print
(
"LOG::TIME:_ "
+
datetime
.
datetime
.
now
()
.
isoformat
()
+
" writeMetadata__MOV() [s]"
,(
end
-
start
))
print
(
"LOG::TIME: In workflow() extract_ngrams__MOV()"
)
print
(
"LOG::TIME: In workflow() extract_ngrams__MOV()"
)
start
=
time
.
time
()
start
=
time
.
time
()
FreqList
=
self
.
extract_ngrams__MOV
(
theMetadata
,
keys
=
[
'title'
]
)
FreqList
=
self
.
extract_ngrams__MOV
(
theMetadata
,
keys
=
[
'title'
]
)
...
@@ -580,10 +586,14 @@ class Node(CTENode):
...
@@ -580,10 +586,14 @@ class Node(CTENode):
start
=
time
.
time
()
start
=
time
.
time
()
print
(
"LOG::TIME: In workflow() do_coocmatrix()"
)
print
(
"LOG::TIME: In workflow() do_coocmatrix()"
)
jsongraph
=
self
.
do_coocmatrix__MOV
(
resultDict
[
"TERMS"
]
,
resultDict
[
"G"
]
,
n
=
150
)
jsongraph
=
self
.
do_coocmatrix__MOV
(
resultDict
[
"TERMS"
]
,
resultDict
[
"G"
]
,
n
=
150
)
jsongraph
[
"stats"
]
=
resultDict
[
"ii"
]
end
=
time
.
time
()
end
=
time
.
time
()
total
+=
(
end
-
start
)
total
+=
(
end
-
start
)
print
(
"LOG::TIME:_ "
+
datetime
.
datetime
.
now
()
.
isoformat
()
+
" do_coocmatrix() [s]"
,(
end
-
start
))
print
(
"LOG::TIME:_ "
+
datetime
.
datetime
.
now
()
.
isoformat
()
+
" do_coocmatrix() [s]"
,(
end
-
start
))
# import pprint
# pprint.pprint(jsongraph)
print
(
"the user:"
,
self
.
user
)
print
(
"the user:"
,
self
.
user
)
print
(
"the project id:"
,
self
.
parent
.
id
)
print
(
"the project id:"
,
self
.
parent
.
id
)
print
(
"the corpus id:"
,
self
.
id
)
print
(
"the corpus id:"
,
self
.
id
)
...
...
parsing/FileParsers/PubmedFileParser.py
View file @
03ac1095
...
@@ -25,7 +25,7 @@ class PubmedFileParser(FileParser):
...
@@ -25,7 +25,7 @@ class PubmedFileParser(FileParser):
metadata_path
=
{
metadata_path
=
{
"journal"
:
'MedlineCitation/Article/Journal/Title'
,
"journal"
:
'MedlineCitation/Article/Journal/Title'
,
"title"
:
'MedlineCitation/Article/ArticleTitle'
,
"title"
:
'MedlineCitation/Article/ArticleTitle'
,
"abstract"
:
'MedlineCitation/Article/Abstract/AbstractText'
,
#
"abstract" : 'MedlineCitation/Article/Abstract/AbstractText',
"title2"
:
'MedlineCitation/Article/VernacularTitle'
,
"title2"
:
'MedlineCitation/Article/VernacularTitle'
,
"language_iso3"
:
'MedlineCitation/Article/Language'
,
"language_iso3"
:
'MedlineCitation/Article/Language'
,
"doi"
:
'PubmedData/ArticleIdList/ArticleId[@type=doi]'
,
"doi"
:
'PubmedData/ArticleIdList/ArticleId[@type=doi]'
,
...
...
scrap_pubmed/views.py
View file @
03ac1095
...
@@ -31,7 +31,7 @@ def getGlobalStats(request ):
...
@@ -31,7 +31,7 @@ def getGlobalStats(request ):
alist
=
[
"bar"
,
"foo"
]
alist
=
[
"bar"
,
"foo"
]
if
request
.
method
==
"POST"
:
if
request
.
method
==
"POST"
:
N
=
10
0
N
=
10
query
=
request
.
POST
[
"query"
]
query
=
request
.
POST
[
"query"
]
print
(
"LOG::TIME:_ "
+
datetime
.
datetime
.
now
()
.
isoformat
()
+
" query ="
,
query
)
print
(
"LOG::TIME:_ "
+
datetime
.
datetime
.
now
()
.
isoformat
()
+
" query ="
,
query
)
print
(
"LOG::TIME:_ "
+
datetime
.
datetime
.
now
()
.
isoformat
()
+
" N ="
,
N
)
print
(
"LOG::TIME:_ "
+
datetime
.
datetime
.
now
()
.
isoformat
()
+
" N ="
,
N
)
...
@@ -73,8 +73,6 @@ def doTheQuery(request , project_id):
...
@@ -73,8 +73,6 @@ def doTheQuery(request , project_id):
alist
=
[
"hola"
,
"mundo"
]
alist
=
[
"hola"
,
"mundo"
]
if
request
.
method
==
"POST"
:
if
request
.
method
==
"POST"
:
query
=
request
.
POST
[
"query"
]
query
=
request
.
POST
[
"query"
]
name
=
request
.
POST
[
"string"
]
name
=
request
.
POST
[
"string"
]
...
@@ -101,14 +99,12 @@ def doTheQuery(request , project_id):
...
@@ -101,14 +99,12 @@ def doTheQuery(request , project_id):
type_id
=
NodeType
.
objects
.
get
(
name
=
'Document'
)
.
id
type_id
=
NodeType
.
objects
.
get
(
name
=
'Document'
)
.
id
user_id
=
User
.
objects
.
get
(
username
=
request
.
user
)
.
id
user_id
=
User
.
objects
.
get
(
username
=
request
.
user
)
.
id
corpus
=
Node
(
corpus
=
Node
(
user
=
request
.
user
,
user
=
request
.
user
,
parent
=
parent
,
parent
=
parent
,
type
=
node_type
,
type
=
node_type
,
name
=
name
,
name
=
name
,
)
)
corpus
.
save
()
corpus
.
save
()
tasks
=
MedlineFetcher
()
tasks
=
MedlineFetcher
()
...
@@ -132,12 +128,12 @@ def doTheQuery(request , project_id):
...
@@ -132,12 +128,12 @@ def doTheQuery(request , project_id):
# do the WorkFlow
# do the WorkFlow
try
:
try
:
if
DEBUG
is
True
:
if
DEBUG
is
True
:
#
corpus.workflow() # old times...
corpus
.
workflow
()
# old times...
corpus
.
workflow__MOV
()
#
corpus.workflow__MOV()
# corpus.write_everything_to_DB()
# corpus.write_everything_to_DB()
else
:
else
:
#
corpus.workflow.apply_async((), countdown=3)
corpus
.
workflow
.
apply_async
((),
countdown
=
3
)
corpus
.
workflow__MOV
()
# synchronous! because is faaast
#
corpus.workflow__MOV() # synchronous! because is faaast
# corpus.write_everything_to_DB.apply_async((), countdown=3) # asynchronous
# corpus.write_everything_to_DB.apply_async((), countdown=3) # asynchronous
...
...
templates/explorer.html
View file @
03ac1095
...
@@ -260,8 +260,8 @@
...
@@ -260,8 +260,8 @@
</div>
</div>
<div
id=
"topPapers"
></div>
<!--
<div id="tab-container-top" class='tab-container'>
<div id="tab-container-top" class='tab-container'>
<ul class='etabs'>
<ul class='etabs'>
...
@@ -278,6 +278,7 @@
...
@@ -278,6 +278,7 @@
</div>
</div>
</div>
</div>
</div>
</div>
-->
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment