Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
5d951915
Commit
5d951915
authored
Oct 17, 2015
by
PkSM3
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[UPDATE] corpus-comparison: level up!
parent
b1483b9a
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
94 additions
and
66 deletions
+94
-66
urls.py
gargantext_web/urls.py
+2
-0
views.py
gargantext_web/views.py
+1
-20
views_optimized.py
gargantext_web/views_optimized.py
+82
-4
explorer.html
templates/explorer.html
+9
-42
No files found.
gargantext_web/urls.py
View file @
5d951915
...
...
@@ -77,6 +77,8 @@ urlpatterns = patterns('',
url
(
r'^ngrams$'
,
views
.
ngrams
),
# to be removed
url
(
r'^nodeinfo/(\d+)$'
,
views
.
nodeinfo
),
# to be removed ?
url
(
r'^tfidf/(\d+)/(\w+)$'
,
views_optimized
.
tfidf
),
url
(
r'^api/corpusintersection/(\w+)$'
,
views_optimized
.
getCorpusIntersection
),
url
(
r'^api/userportfolio/project/(\d+)/corpuses$'
,
views_optimized
.
getUserPortfolio
),
url
(
r'^project/(\d+)/corpus/(\d+)/(\w+)/update$'
,
views
.
update_nodes
),
# TODO rest to update corpus and information for progress bar
...
...
gargantext_web/views.py
View file @
5d951915
...
...
@@ -566,33 +566,14 @@ def graph(request, project_id, corpus_id, generic=100, specific=100):
project_type_id
=
cache
.
NodeType
[
'Project'
]
.
id
corpus_type_id
=
cache
.
NodeType
[
'Corpus'
]
.
id
results
=
{}
projs
=
session
.
query
(
Node
)
.
filter
(
Node
.
user_id
==
user_id
,
Node
.
type_id
==
project_type_id
)
.
all
()
for
i
in
projs
:
# print(i.id , i.name)
if
i
.
id
not
in
results
:
results
[
i
.
id
]
=
{}
results
[
i
.
id
][
"proj_name"
]
=
i
.
name
results
[
i
.
id
][
"corpuses"
]
=
[]
corpuses
=
session
.
query
(
Node
)
.
filter
(
Node
.
parent_id
==
i
.
id
,
Node
.
type_id
==
corpus_type_id
)
.
all
()
for
j
in
corpuses
:
if
int
(
j
.
id
)
!=
int
(
corpus_id
):
info
=
{
"id"
:
j
.
id
,
"name"
:
j
.
name
}
results
[
i
.
id
][
"corpuses"
]
.
append
(
info
)
# print("\t",j.id , j.name)
# import pprint
# pprint.pprint(results)
# if specific != None and generic != None :
graphurl
=
"corpus/"
+
str
(
corpus_id
)
+
"/node_link.json"
html
=
t
.
render
(
Context
({
\
'debug'
:
settings
.
DEBUG
,
'user'
:
user
,
\
'date'
:
date
,
\
'corpus'
:
corpus
,
\
'project'
:
project
,
\
'corpusinfo'
:
results
,
\
'graphfile'
:
graphurl
,
\
}))
...
...
gargantext_web/views_optimized.py
View file @
5d951915
...
...
@@ -195,7 +195,7 @@ def tfidf(request, corpus_id, ngram_ids):
"""Takes IDs of corpus and ngram and returns list of relevent documents in json format
according to TFIDF score (order is decreasing).
"""
limit
=
6
limit
=
5
nodes_list
=
[]
# filter input
ngram_ids
=
ngram_ids
.
split
(
'a'
)
...
...
@@ -219,7 +219,7 @@ def tfidf(request, corpus_id, ngram_ids):
# print("\tcorpus_id:",corpus_id)
# convert query result to a list of dicts
for
node
,
score
in
nodes_query
:
print
(
"
\t
corpus:"
,
corpus_id
,
"
\t
"
,
node
.
name
)
#
print("\t corpus:",corpus_id,"\t",node.name)
node_dict
=
{
'id'
:
node
.
id
,
'score'
:
score
,
...
...
@@ -229,6 +229,84 @@ def tfidf(request, corpus_id, ngram_ids):
node_dict
[
key
]
=
node
.
hyperdata
[
key
]
nodes_list
.
append
(
node_dict
)
# print("= = = = = = = = \n")
data
=
json
.
dumps
(
nodes_list
)
return
JsonHttpResponse
(
nodes_list
)
def
getCorpusIntersection
(
request
,
corpuses_ids
):
FinalDict
=
False
if
request
.
method
==
'POST'
and
"nodeids"
in
request
.
POST
and
len
(
request
.
POST
[
"nodeids"
])
>
0
:
import
ast
node_ids
=
[
int
(
i
)
for
i
in
(
ast
.
literal_eval
(
request
.
POST
[
"nodeids"
]
))
]
# Here are the visible nodes of the initial semantic map.
corpuses_ids
=
corpuses_ids
.
split
(
'a'
)
corpuses_ids
=
[
int
(
i
)
for
i
in
corpuses_ids
]
# corpus[1] will be the corpus to compare
cooc_type_id
=
cache
.
NodeType
[
'Cooccurrence'
]
.
id
cooc_ids
=
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
user_id
==
request
.
user
.
id
,
Node
.
parent_id
==
corpuses_ids
[
1
]
,
Node
.
type_id
==
cooc_type_id
)
.
first
()
if
len
(
cooc_ids
)
==
0
:
return
JsonHttpResponse
(
FinalDict
)
# If corpus[1] has a coocurrence.id then lets continue
FinalDict
=
{}
import
networkx
as
nx
G
=
nx
.
Graph
()
# I use an undirected graph, because direction doesnt matter here, coocs should be a triangular matrix, so...
ngrams_data1
=
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
cooc_ids
[
0
],
NodeNgramNgram
.
ngramx_id
.
in_
(
node_ids
))
.
all
()
for
ngram
in
ngrams_data1
:
# are there visible nodes in the X-axis of corpus to compare ?
G
.
add_edge
(
ngram
.
ngramx_id
,
ngram
.
ngramy_id
,
weight
=
ngram
.
score
)
ngrams_data2
=
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
cooc_ids
[
0
],
NodeNgramNgram
.
ngramy_id
.
in_
(
node_ids
))
.
all
()
for
ngram
in
ngrams_data2
:
# are there visible nodes in the Y-axis of corpus to compare ?
if
not
G
.
has_edge
(
ngram
.
ngramx_id
,
ngram
.
ngramy_id
):
G
.
add_edge
(
ngram
.
ngramx_id
,
ngram
.
ngramy_id
,
weight
=
ngram
.
score
)
for
e
in
G
.
edges_iter
():
n1
=
e
[
0
]
n2
=
e
[
1
]
print
(
G
[
n1
][
n2
][
"weight"
]
,
"
\t
"
,
n1
,
","
,
n2
)
if
n1
not
in
FinalDict
:
FinalDict
[
n1
]
=
0
if
n2
not
in
FinalDict
:
FinalDict
[
n2
]
=
0
FinalDict
[
n1
]
+=
G
[
n1
][
n2
][
"weight"
]
FinalDict
[
n2
]
+=
G
[
n1
][
n2
][
"weight"
]
for
node
in
FinalDict
:
FinalDict
[
node
]
=
FinalDict
[
node
]
/
G
.
degree
(
node
)
# Getting AVG-COOC of each ngram that exists in the cooc-matrix of the compared-corpus.
return
JsonHttpResponse
(
FinalDict
)
def
getUserPortfolio
(
request
,
project_id
):
user
=
request
.
user
user_id
=
cache
.
User
[
request
.
user
.
username
]
.
id
project_type_id
=
cache
.
NodeType
[
'Project'
]
.
id
corpus_type_id
=
cache
.
NodeType
[
'Corpus'
]
.
id
results
=
{}
projs
=
session
.
query
(
Node
)
.
filter
(
Node
.
user_id
==
user_id
,
Node
.
type_id
==
project_type_id
)
.
all
()
for
i
in
projs
:
# print (i.id,i.name)
if
i
.
id
not
in
results
:
results
[
i
.
id
]
=
{}
results
[
i
.
id
][
"proj_name"
]
=
i
.
name
results
[
i
.
id
][
"corpuses"
]
=
[]
corpuses
=
session
.
query
(
Node
)
.
filter
(
Node
.
parent_id
==
i
.
id
,
Node
.
type_id
==
corpus_type_id
)
.
all
()
for
j
in
corpuses
:
doc_count
=
session
.
query
(
func
.
count
(
Node
.
id
))
.
filter
(
Node
.
parent_id
==
j
.
id
)
.
all
()[
0
][
0
]
if
doc_count
>=
10
:
# print(session.query(Node).filter(Node.id==j.id).first())
info
=
{
"id"
:
j
.
id
,
"name"
:
j
.
name
,
"c"
:
doc_count
}
results
[
i
.
id
][
"corpuses"
]
.
append
(
info
)
print
(
"
\t\t
"
,
j
.
id
,
j
.
name
,
doc_count
)
if
len
(
results
[
i
.
id
][
"corpuses"
])
==
0
:
del
results
[
i
.
id
]
return
JsonHttpResponse
(
results
)
templates/explorer.html
View file @
5d951915
...
...
@@ -308,9 +308,9 @@
</div>
<
div
id=
"topPapers"
></div
>
<!--
<div id="tab-container-top" class='tab-container'>
<
!-- <div id="topPapers"></div> --
>
<div
id=
"tab-container-top"
class=
'tab-container'
style=
"display: none;"
>
<ul
class=
'etabs'
>
<li
id=
"tabmed"
class=
'tab active'
><a
href=
"#tabs3"
>
Medline Pubs
</a></li>
...
...
@@ -326,7 +326,7 @@
</div>
</div>
</div>
-->
...
...
@@ -409,55 +409,22 @@
<div
class=
"modal-header"
>
<button
type=
"button"
class=
"close"
data-dismiss=
"modal"
aria-hidden=
"true"
>
×
</button>
<h
4
class=
"modal-title"
>
Corpus Comparison
</h4
>
<h
3
class=
"modal-title"
>
Corpus Comparison Tool
</h3
>
</div>
<div
class=
"modal-body form-horizontal"
>
Chose another corpus to compare with:
<div
class=
"form-group"
>
<label
class=
"col-lg-2 control-label"
>
</label>
<div
class=
"col-lg-10"
>
<form
id=
"corpuses_form"
role=
"form"
>
<ul>
{% if corpusinfo %}
{% for k1, v1 in corpusinfo.items %}
{% if v1.corpuses|length > 0 %}
<br><li><a
href=
"/project/{{k1}}/"
>
{{v1.proj_name}}
</a><br>
<ul
style=
"list-style-type: none;"
>
{% for c in v1.corpuses %}
<li>
<div
class=
"radio"
>
<label><input
type=
"radio"
id=
"{{c.id}}"
name=
"optradio"
>
<a
href=
"/project/{{k1}}/corpus/{{c.id}}/"
>
{{c.name}}
</a>
</label>
</div>
</li>
{% endfor %}
</ul>
</li>
{% endif %}
{% endfor %}
{% endif %}
</ul>
</form>
<h4>
Choose one corpus:
</h4>
<div
style=
"color:red;"
id=
"selected_corpus"
></div>
<div
id=
"user_portfolio"
>
</div>
</div>
</div>
<div
class=
"modal-footer"
>
<button
id=
"closecorpuses"
type=
"button"
class=
"btn btn-default"
data-dismiss=
"modal"
>
Close
</button>
<button
type=
"button"
class=
"btn btn-primary"
onclick=
'printCorpuses();'
>
Add Tab
</button>
<button
id=
"add_corpus_tab"
type=
"button"
class=
"btn btn-primary"
disabled
onclick=
'printCorpuses();'
>
Add Tab
</button>
</div>
</div>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment