Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
8fcfc803
Commit
8fcfc803
authored
May 25, 2016
by
delanoe
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'romain-refactoring' into unstable
parents
a01472c3
8ba25bfd
Changes
13
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
817 additions
and
494 deletions
+817
-494
constants.py
gargantext/constants.py
+10
-5
__init__.py
gargantext/util/toolchain/__init__.py
+1
-176
main.py
gargantext/util/toolchain/main.py
+274
-0
metric_specificity.py
gargantext/util/toolchain/metric_specificity.py
+1
-1
metric_tfidf.py
gargantext/util/toolchain/metric_tfidf.py
+2
-2
metrics.py
gargantext/views/api/metrics.py
+42
-0
ngramlists.py
gargantext/views/api/ngramlists.py
+72
-20
nodes.py
gargantext/views/api/nodes.py
+1
-1
urls.py
gargantext/views/api/urls.py
+9
-0
NGrams_dyna_chart_and_table.js
static/lib/gargantext/NGrams_dyna_chart_and_table.js
+372
-275
garganrest.js
static/lib/gargantext/garganrest.js
+9
-4
tables.css
static/lib/gargantext/tables.css
+2
-5
project.html
templates/pages/projects/project.html
+22
-5
No files found.
gargantext/constants.py
View file @
8fcfc803
...
@@ -13,10 +13,12 @@ LISTTYPES = {
...
@@ -13,10 +13,12 @@ LISTTYPES = {
'MAINLIST'
:
UnweightedList
,
'MAINLIST'
:
UnweightedList
,
'MAPLIST'
:
UnweightedList
,
'MAPLIST'
:
UnweightedList
,
'SPECIFICITY'
:
WeightedList
,
'SPECIFICITY'
:
WeightedList
,
'OCCURRENCES'
:
WeightedIndex
,
#
todo replace by
WeightedList
'OCCURRENCES'
:
WeightedIndex
,
#
could be
WeightedList
'COOCCURRENCES'
:
WeightedMatrix
,
'COOCCURRENCES'
:
WeightedMatrix
,
'TFIDF-CORPUS'
:
WeightedIndex
,
# todo split -> WeightedList for ti_rank and WeightedIndex for tfidf
'TFIDF-CORPUS'
:
WeightedIndex
,
'TFIDF-GLOBAL'
:
WeightedIndex
,
# todo split -> WeightedList for ti_rank and WeightedIndex for tfidf
'TFIDF-GLOBAL'
:
WeightedIndex
,
'TIRANK-LOCAL'
:
WeightedIndex
,
# could be WeightedList
'TIRANK-GLOBAL'
:
WeightedIndex
# could be WeightedList
}
}
NODETYPES
=
[
NODETYPES
=
[
...
@@ -40,8 +42,11 @@ NODETYPES = [
...
@@ -40,8 +42,11 @@ NODETYPES = [
'TFIDF-CORPUS'
,
# 13
'TFIDF-CORPUS'
,
# 13
'TFIDF-GLOBAL'
,
# 14
'TFIDF-GLOBAL'
,
# 14
# docs subset
# docs subset
'FAVORITES'
# 15
'FAVORITES'
,
# 15
# TODO add ti RANK
# more scores (sorry!)
'TIRANK-LOCAL'
,
# 16
'TIRANK-GLOBAL'
,
# 17
]
]
INDEXED_HYPERDATA
=
{
INDEXED_HYPERDATA
=
{
...
...
gargantext/util/toolchain/__init__.py
View file @
8fcfc803
from
.main
import
parse_extract_indexhyperdata
from
gargantext.settings
import
DEBUG
from
.parsing
import
parse
from
.ngrams_extraction
import
extract_ngrams
from
.hyperdata_indexing
import
index_hyperdata
# in usual run order
from
.list_stop
import
do_stoplist
from
.ngram_groups
import
compute_groups
from
.metric_tfidf
import
compute_occs
,
compute_tfidf_local
,
compute_ti_ranking
from
.list_main
import
do_mainlist
from
.ngram_coocs
import
compute_coocs
from
.metric_specificity
import
compute_specificity
from
.list_map
import
do_maplist
# TEST
from
.mail_notification
import
notify_owner
from
gargantext.util.db
import
session
from
gargantext.models
import
Node
from
datetime
import
datetime
from
celery
import
shared_task
#@shared_task
def
parse_extract
(
corpus
):
# retrieve corpus from database from id
if
isinstance
(
corpus
,
int
):
corpus_id
=
corpus
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
if
corpus
is
None
:
print
(
'NO SUCH CORPUS: #
%
d'
%
corpus_id
)
return
# apply actions
print
(
'CORPUS #
%
d'
%
(
corpus
.
id
))
parse
(
corpus
)
# was there an error in the process ?
if
corpus
.
status
()[
'error'
]:
print
(
"ERROR: aborting parse_extract for corpus #
%
i"
%
corpus_id
)
return
None
print
(
'CORPUS #
%
d: parsed'
%
(
corpus
.
id
))
extract_ngrams
(
corpus
)
print
(
'CORPUS #
%
d: extracted ngrams'
%
(
corpus
.
id
))
@
shared_task
def
parse_extract_indexhyperdata
(
corpus
):
# retrieve corpus from database from id
if
isinstance
(
corpus
,
int
):
corpus_id
=
corpus
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
if
corpus
is
None
:
print
(
'NO SUCH CORPUS: #
%
d'
%
corpus_id
)
return
# Instantiate status
corpus
.
status
(
'Workflow'
,
progress
=
1
)
corpus
.
save_hyperdata
()
session
.
commit
()
# FIXME: 'Workflow' will still be uncomplete when 'Index' and 'Lists' will
# get stacked into hyperdata['statuses'], but doing corpus.status()
# will return only the 1st uncomplete action (corpus.status() doesn't
# understand "subactions")
# apply actions
print
(
'CORPUS #
%
d'
%
(
corpus
.
id
))
parse
(
corpus
)
print
(
'CORPUS #
%
d: parsed'
%
(
corpus
.
id
))
extract_ngrams
(
corpus
)
# Preparing Databse
# Indexing
#
corpus
.
status
(
'Index'
,
progress
=
0
)
corpus
.
save_hyperdata
()
session
.
commit
()
print
(
'CORPUS #
%
d: extracted ngrams'
%
(
corpus
.
id
))
index_hyperdata
(
corpus
)
print
(
'CORPUS #
%
d: indexed hyperdata'
%
(
corpus
.
id
))
# -> 'favorites' node
favs
=
corpus
.
add_child
(
typename
=
'FAVORITES'
,
name
=
'favorite docs in "
%
s"'
%
corpus
.
name
)
session
.
add
(
favs
)
session
.
commit
()
print
(
'CORPUS #
%
d: [
%
s] new favorites node #
%
i'
%
(
corpus
.
id
,
t
(),
favs
.
id
))
corpus
.
status
(
'Index'
,
progress
=
1
,
complete
=
True
)
corpus
.
save_hyperdata
()
session
.
commit
()
# -------------------------------
# temporary ngram lists workflow
# -------------------------------
corpus
.
status
(
'Lists'
,
progress
=
0
)
corpus
.
save_hyperdata
()
session
.
commit
()
print
(
'CORPUS #
%
d: [
%
s] starting ngram lists computation'
%
(
corpus
.
id
,
t
()))
# -> stoplist: filter + write (to Node and NodeNgram)
stop_id
=
do_stoplist
(
corpus
)
print
(
'CORPUS #
%
d: [
%
s] new stoplist node #
%
i'
%
(
corpus
.
id
,
t
(),
stop_id
))
# -> write groups to Node and NodeNgramNgram
group_id
=
compute_groups
(
corpus
,
stoplist_id
=
None
)
print
(
'CORPUS #
%
d: [
%
s] new grouplist node #
%
i'
%
(
corpus
.
id
,
t
(),
group_id
))
# ------------
# -> write occurrences to Node and NodeNodeNgram # (todo: NodeNgram)
occ_id
=
compute_occs
(
corpus
,
groupings_id
=
group_id
)
print
(
'CORPUS #
%
d: [
%
s] new occs node #
%
i'
%
(
corpus
.
id
,
t
(),
occ_id
))
# -> write cumulated ti_ranking (tfidf ranking vector) to Node and NodeNodeNgram (todo: NodeNgram)
tirank_id
=
compute_ti_ranking
(
corpus
,
groupings_id
=
group_id
,
count_scope
=
"global"
)
print
(
'CORPUS #
%
d: [
%
s] new ti ranking node #
%
i'
%
(
corpus
.
id
,
t
(),
tirank_id
))
# -> mainlist: filter + write (to Node and NodeNgram)
mainlist_id
=
do_mainlist
(
corpus
,
ranking_scores_id
=
tirank_id
,
stoplist_id
=
stop_id
)
print
(
'CORPUS #
%
d: [
%
s] new mainlist node #
%
i'
%
(
corpus
.
id
,
t
(),
mainlist_id
))
# -> write local tfidf similarities to Node and NodeNodeNgram
ltfidf_id
=
compute_tfidf_local
(
corpus
,
on_list_id
=
mainlist_id
,
groupings_id
=
group_id
)
print
(
'CORPUS #
%
d: [
%
s] new localtfidf node #
%
i'
%
(
corpus
.
id
,
t
(),
ltfidf_id
))
# => used for doc <=> ngram association
# ------------
# -> cooccurrences on mainlist: compute + write (=> Node and NodeNgramNgram)
coocs
=
compute_coocs
(
corpus
,
on_list_id
=
mainlist_id
,
groupings_id
=
group_id
,
just_pass_result
=
True
)
print
(
'CORPUS #
%
d: [
%
s] computed mainlist coocs for specif rank'
%
(
corpus
.
id
,
t
()))
# -> specificity: compute + write (=> NodeNodeNgram)
spec_id
=
compute_specificity
(
corpus
,
cooc_matrix
=
coocs
)
# no need here for subforms because cooc already counted them in mainform
print
(
'CORPUS #
%
d: [
%
s] new specificity node #
%
i'
%
(
corpus
.
id
,
t
(),
spec_id
))
# maplist: compute + write (to Node and NodeNgram)
map_id
=
do_maplist
(
corpus
,
mainlist_id
=
mainlist_id
,
specificity_id
=
spec_id
,
grouplist_id
=
group_id
)
print
(
'CORPUS #
%
d: [
%
s] new maplist node #
%
i'
%
(
corpus
.
id
,
t
(),
map_id
))
print
(
'CORPUS #
%
d: [
%
s] FINISHED ngram lists computation'
%
(
corpus
.
id
,
t
()))
corpus
.
status
(
'Lists'
,
progress
=
0
,
complete
=
True
)
corpus
.
save_hyperdata
()
session
.
commit
()
if
DEBUG
is
False
:
print
(
'CORPUS #
%
d: [
%
s] FINISHED Sending email notification'
%
(
corpus
.
id
,
t
()))
notify_owner
(
corpus
)
corpus
.
status
(
'Workflow'
,
progress
=
10
,
complete
=
True
)
corpus
.
save_hyperdata
()
session
.
commit
()
def
t
():
return
datetime
.
now
()
.
strftime
(
"
%
Y-
%
m-
%
d_
%
H:
%
M:
%
S"
)
gargantext/util/toolchain/main.py
0 → 100644
View file @
8fcfc803
from
gargantext.settings
import
DEBUG
from
.parsing
import
parse
from
.ngrams_extraction
import
extract_ngrams
from
.hyperdata_indexing
import
index_hyperdata
# in usual run order
from
.list_stop
import
do_stoplist
from
.ngram_groups
import
compute_groups
from
.metric_tfidf
import
compute_occs
,
compute_tfidf_local
,
compute_ti_ranking
from
.list_main
import
do_mainlist
from
.ngram_coocs
import
compute_coocs
from
.metric_specificity
import
compute_specificity
from
.list_map
import
do_maplist
# TEST
from
.mail_notification
import
notify_owner
from
gargantext.util.db
import
session
from
gargantext.models
import
Node
from
datetime
import
datetime
from
celery
import
shared_task
#@shared_task
def
parse_extract
(
corpus
):
# retrieve corpus from database from id
if
isinstance
(
corpus
,
int
):
corpus_id
=
corpus
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
if
corpus
is
None
:
print
(
'NO SUCH CORPUS: #
%
d'
%
corpus_id
)
return
# apply actions
print
(
'CORPUS #
%
d'
%
(
corpus
.
id
))
parse
(
corpus
)
# was there an error in the process ?
if
corpus
.
status
()[
'error'
]:
print
(
"ERROR: aborting parse_extract for corpus #
%
i"
%
corpus_id
)
return
None
print
(
'CORPUS #
%
d: parsed'
%
(
corpus
.
id
))
extract_ngrams
(
corpus
)
print
(
'CORPUS #
%
d: extracted ngrams'
%
(
corpus
.
id
))
@
shared_task
def
parse_extract_indexhyperdata
(
corpus
):
# retrieve corpus from database from id
if
isinstance
(
corpus
,
int
):
corpus_id
=
corpus
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
if
corpus
is
None
:
print
(
'NO SUCH CORPUS: #
%
d'
%
corpus_id
)
return
# Instantiate status
corpus
.
status
(
'Workflow'
,
progress
=
1
)
corpus
.
save_hyperdata
()
session
.
commit
()
# FIXME: 'Workflow' will still be uncomplete when 'Index' and 'Lists' will
# get stacked into hyperdata['statuses'], but doing corpus.status()
# will return only the 1st uncomplete action (corpus.status() doesn't
# understand "subactions")
# apply actions
print
(
'CORPUS #
%
d'
%
(
corpus
.
id
))
parse
(
corpus
)
print
(
'CORPUS #
%
d: parsed'
%
(
corpus
.
id
))
extract_ngrams
(
corpus
)
# Preparing Databse
# Indexing
#
corpus
.
status
(
'Index'
,
progress
=
0
)
corpus
.
save_hyperdata
()
session
.
commit
()
print
(
'CORPUS #
%
d: extracted ngrams'
%
(
corpus
.
id
))
index_hyperdata
(
corpus
)
print
(
'CORPUS #
%
d: indexed hyperdata'
%
(
corpus
.
id
))
# -> 'favorites' node
favs
=
corpus
.
add_child
(
typename
=
'FAVORITES'
,
name
=
'favorite docs in "
%
s"'
%
corpus
.
name
)
session
.
add
(
favs
)
session
.
commit
()
print
(
'CORPUS #
%
d: [
%
s] new favorites node #
%
i'
%
(
corpus
.
id
,
t
(),
favs
.
id
))
corpus
.
status
(
'Index'
,
progress
=
1
,
complete
=
True
)
corpus
.
save_hyperdata
()
session
.
commit
()
# -------------------------------
# temporary ngram lists workflow
# -------------------------------
corpus
.
status
(
'Lists'
,
progress
=
0
)
corpus
.
save_hyperdata
()
session
.
commit
()
print
(
'CORPUS #
%
d: [
%
s] starting ngram lists computation'
%
(
corpus
.
id
,
t
()))
# -> stoplist: filter + write (to Node and NodeNgram)
stop_id
=
do_stoplist
(
corpus
)
print
(
'CORPUS #
%
d: [
%
s] new stoplist node #
%
i'
%
(
corpus
.
id
,
t
(),
stop_id
))
# -> write groups to Node and NodeNgramNgram
group_id
=
compute_groups
(
corpus
,
stoplist_id
=
None
)
print
(
'CORPUS #
%
d: [
%
s] new grouplist node #
%
i'
%
(
corpus
.
id
,
t
(),
group_id
))
# ------------
# -> write occurrences to Node and NodeNodeNgram
occ_id
=
compute_occs
(
corpus
,
groupings_id
=
group_id
)
print
(
'CORPUS #
%
d: [
%
s] new occs node #
%
i'
%
(
corpus
.
id
,
t
(),
occ_id
))
# -> write cumulated ti_ranking (tfidf ranking vector) to Node and NodeNodeNgram
tirank_id
=
compute_ti_ranking
(
corpus
,
groupings_id
=
group_id
,
count_scope
=
"global"
)
print
(
'CORPUS #
%
d: [
%
s] new ti ranking node #
%
i'
%
(
corpus
.
id
,
t
(),
tirank_id
))
# -> mainlist: filter + write (to Node and NodeNgram)
mainlist_id
=
do_mainlist
(
corpus
,
ranking_scores_id
=
tirank_id
,
stoplist_id
=
stop_id
)
print
(
'CORPUS #
%
d: [
%
s] new mainlist node #
%
i'
%
(
corpus
.
id
,
t
(),
mainlist_id
))
# -> write local tfidf similarities to Node and NodeNodeNgram
ltfidf_id
=
compute_tfidf_local
(
corpus
,
on_list_id
=
mainlist_id
,
groupings_id
=
group_id
)
print
(
'CORPUS #
%
d: [
%
s] new localtfidf node #
%
i'
%
(
corpus
.
id
,
t
(),
ltfidf_id
))
# => used for doc <=> ngram association
# ------------
# -> cooccurrences on mainlist: compute + write (=> Node and NodeNgramNgram)
coocs
=
compute_coocs
(
corpus
,
on_list_id
=
mainlist_id
,
groupings_id
=
group_id
,
just_pass_result
=
True
)
print
(
'CORPUS #
%
d: [
%
s] computed mainlist coocs for specif rank'
%
(
corpus
.
id
,
t
()))
# -> specificity: compute + write (=> NodeNodeNgram)
spec_id
=
compute_specificity
(
corpus
,
cooc_matrix
=
coocs
)
# no need here for subforms because cooc already counted them in mainform
print
(
'CORPUS #
%
d: [
%
s] new specificity node #
%
i'
%
(
corpus
.
id
,
t
(),
spec_id
))
# maplist: compute + write (to Node and NodeNgram)
map_id
=
do_maplist
(
corpus
,
mainlist_id
=
mainlist_id
,
specificity_id
=
spec_id
,
grouplist_id
=
group_id
)
print
(
'CORPUS #
%
d: [
%
s] new maplist node #
%
i'
%
(
corpus
.
id
,
t
(),
map_id
))
print
(
'CORPUS #
%
d: [
%
s] FINISHED ngram lists computation'
%
(
corpus
.
id
,
t
()))
corpus
.
status
(
'Lists'
,
progress
=
0
,
complete
=
True
)
corpus
.
save_hyperdata
()
session
.
commit
()
if
DEBUG
is
False
:
print
(
'CORPUS #
%
d: [
%
s] FINISHED Sending email notification'
%
(
corpus
.
id
,
t
()))
notify_owner
(
corpus
)
corpus
.
status
(
'Workflow'
,
progress
=
10
,
complete
=
True
)
corpus
.
save_hyperdata
()
session
.
commit
()
@
shared_task
def
recount
(
corpus
):
"""
Recount essential metrics of the toolchain after group modifications.
==> updates all scores in terms table
==> updates tfidf relationship b/w term and doc
When groups change, the metrics need to be updated because subforms must be
added to their new mainform aggregate values:
- occurrences
- ndocs
- ti_rank
- coocs
- specificity
- tfidf
NB: no new extraction, no list change, just the metrics
"""
# 1) we'll need the new groups and mainlist as basis
group_id
=
corpus
.
children
(
"GROUPLIST"
)
.
first
()
.
id
mainlist_id
=
corpus
.
children
(
"MAINLIST"
)
.
first
()
.
id
# 2) and we're going to overwrite the previous metric nodes
try
:
old_occ_id
=
corpus
.
children
(
"OCCURRENCES"
)
.
first
()
.
id
except
:
old_occ_id
=
None
try
:
old_tirank_id
=
corpus
.
children
(
"TIRANK-GLOBAL"
)
.
first
()
.
id
except
:
old_tirank_id
=
None
try
:
old_spec_id
=
corpus
.
children
(
"SPECIFICITY"
)
.
first
()
.
id
except
:
old_spec_id
=
None
try
:
old_ltfidf_id
=
corpus
.
children
(
"TFIDF-CORPUS"
)
.
first
()
.
id
except
:
old_ltfidf_id
=
None
# 3) we redo the required toolchain parts
# -------------------------------------------
# Instantiate status
corpus
.
status
(
'Recounting mini-workflow'
,
progress
=
1
)
corpus
.
save_hyperdata
()
session
.
commit
()
# -> overwrite occurrences (=> NodeNodeNgram)
occ_id
=
compute_occs
(
corpus
,
groupings_id
=
group_id
,
overwrite_id
=
old_occ_id
)
print
(
'RECOUNT #
%
d: [
%
s] updated occs node #
%
i'
%
(
corpus
.
id
,
t
(),
occ_id
))
# -> write cumulated ti_ranking (tfidf ranking vector) (=> NodeNodeNgram)
tirank_id
=
compute_ti_ranking
(
corpus
,
groupings_id
=
group_id
,
count_scope
=
"global"
,
overwrite_id
=
old_tirank_id
)
print
(
'RECOUNT #
%
d: [
%
s] updated ti ranking node #
%
i'
%
(
corpus
.
id
,
t
(),
tirank_id
))
# -> write local tfidf similarities to (=> NodeNodeNgram)
ltfidf_id
=
compute_tfidf_local
(
corpus
,
on_list_id
=
mainlist_id
,
groupings_id
=
group_id
,
overwrite_id
=
old_ltfidf_id
)
print
(
'RECOUNT #
%
d: [
%
s] updated localtfidf node #
%
i'
%
(
corpus
.
id
,
t
(),
ltfidf_id
))
# => used for doc <=> ngram association
# ------------
# -> cooccurrences on mainlist: compute + write (=> NodeNgramNgram)
coocs
=
compute_coocs
(
corpus
,
on_list_id
=
mainlist_id
,
groupings_id
=
group_id
,
just_pass_result
=
True
)
print
(
'RECOUNT #
%
d: [
%
s] updated mainlist coocs for specif rank'
%
(
corpus
.
id
,
t
()))
# -> specificity: compute + write (=> NodeNgram)
spec_id
=
compute_specificity
(
corpus
,
cooc_matrix
=
coocs
,
overwrite_id
=
old_spec_id
)
print
(
'RECOUNT #
%
d: [
%
s] updated specificity node #
%
i'
%
(
corpus
.
id
,
t
(),
spec_id
))
print
(
'RECOUNT #
%
d: [
%
s] FINISHED metric recounts'
%
(
corpus
.
id
,
t
()))
corpus
.
status
(
'Recounting mini-workflow'
,
progress
=
10
,
complete
=
True
)
corpus
.
save_hyperdata
()
session
.
commit
()
def
t
():
return
datetime
.
now
()
.
strftime
(
"
%
Y-
%
m-
%
d_
%
H:
%
M:
%
S"
)
gargantext/util/toolchain/metric_specificity.py
View file @
8fcfc803
...
@@ -99,7 +99,7 @@ def compute_specificity(corpus, cooc_id=None, cooc_matrix=None, overwrite_id = N
...
@@ -99,7 +99,7 @@ def compute_specificity(corpus, cooc_id=None, cooc_matrix=None, overwrite_id = N
if
overwrite_id
:
if
overwrite_id
:
# overwrite pre-existing id
# overwrite pre-existing id
the_id
=
overwrite_id
the_id
=
overwrite_id
session
.
query
(
NodeN
odeNgram
)
.
filter
(
NodeNodeNgram
.
node1
_id
==
the_id
)
.
delete
()
session
.
query
(
NodeN
gram
)
.
filter
(
NodeNgram
.
node
_id
==
the_id
)
.
delete
()
session
.
commit
()
session
.
commit
()
else
:
else
:
specnode
=
corpus
.
add_child
(
specnode
=
corpus
.
add_child
(
...
...
gargantext/util/toolchain/metric_tfidf.py
View file @
8fcfc803
...
@@ -345,11 +345,11 @@ def compute_ti_ranking(corpus,
...
@@ -345,11 +345,11 @@ def compute_ti_ranking(corpus,
# create the new TFIDF-XXXX node to get an id
# create the new TFIDF-XXXX node to get an id
tir_nd
=
corpus
.
add_child
()
tir_nd
=
corpus
.
add_child
()
if
count_scope
==
"local"
:
if
count_scope
==
"local"
:
tir_nd
.
typename
=
"T
FIDF
-CORPUS"
tir_nd
.
typename
=
"T
IRANK
-CORPUS"
tir_nd
.
name
=
"ti rank (
%
i ngforms in corpus:
%
s)"
%
(
tir_nd
.
name
=
"ti rank (
%
i ngforms in corpus:
%
s)"
%
(
total_ngramforms
,
corpus_id
)
total_ngramforms
,
corpus_id
)
elif
count_scope
==
"global"
:
elif
count_scope
==
"global"
:
tir_nd
.
typename
=
"T
FIDF
-GLOBAL"
tir_nd
.
typename
=
"T
IRANK
-GLOBAL"
tir_nd
.
name
=
"ti rank (
%
i ngforms
%
s in corpora of sourcetype:
%
s)"
%
(
tir_nd
.
name
=
"ti rank (
%
i ngforms
%
s in corpora of sourcetype:
%
s)"
%
(
total_ngramforms
,
total_ngramforms
,
(
"from corpus
%
i"
%
corpus_id
)
if
(
termset_scope
==
"local"
)
else
""
,
(
"from corpus
%
i"
%
corpus_id
)
if
(
termset_scope
==
"local"
)
else
""
,
...
...
gargantext/views/api/metrics.py
0 → 100644
View file @
8fcfc803
from
gargantext.util.db_cache
import
cache
from
gargantext.util.http
import
ValidationException
,
APIView
\
,
HttpResponse
,
JsonHttpResponse
from
gargantext.util.toolchain.main
import
recount
from
datetime
import
datetime
class
CorpusMetrics
(
APIView
):
def
patch
(
self
,
request
,
corpusnode_id
):
"""
PATCH triggers recount of metrics for the specified corpus.
ex PATCH http://localhost:8000/api/metrics/14072
-----
corpus_id
"""
print
(
"==> update metrics request on "
,
corpusnode_id
)
if
not
request
.
user
.
is_authenticated
():
# can't use @requires_auth because of positional 'self' within class
return
HttpResponse
(
'Unauthorized'
,
status
=
401
)
try
:
corpus
=
cache
.
Node
[
int
(
corpusnode_id
)]
except
:
corpus
=
None
if
corpus
is
None
:
raise
ValidationException
(
"
%
s is not a valid corpus node id."
%
corpusnode_id
)
else
:
t_before
=
datetime
.
now
()
# =============
recount
(
corpus
)
# =============
t_after
=
datetime
.
now
()
return
JsonHttpResponse
({
'corpus_id'
:
corpusnode_id
,
'took'
:
"
%
f s."
%
(
t_after
-
t_before
)
.
total_seconds
()
})
gargantext/views/api/ngramlists.py
View file @
8fcfc803
...
@@ -34,24 +34,36 @@ def _query_list(list_id,
...
@@ -34,24 +34,36 @@ def _query_list(list_id,
"""
"""
if
not
details
:
if
not
details
:
# simple contents
# simple contents
query
=
session
.
query
(
NodeNgram
.
ngram_id
)
query
=
session
.
query
(
NodeNgram
.
ngram_id
)
.
filter
(
NodeNgram
.
node_id
==
list_id
)
else
:
else
:
# detailed contents (terms and some NodeNodeNgram for score)
# detailed contents (terms and some NodeNodeNgram for score)
# NB: score can be undefined (eg ex-subform that now became free)
# ==> we need outerjoin
# and the filter needs to have scoring_metric_id so we do it before
ScoresTable
=
(
session
.
query
(
NodeNodeNgram
.
score
,
NodeNodeNgram
.
ngram_id
)
.
filter
(
NodeNodeNgram
.
node1_id
==
scoring_metric_id
)
.
subquery
()
)
query
=
(
session
query
=
(
session
.
query
(
.
query
(
NodeNgram
.
ngram_id
,
NodeNgram
.
ngram_id
,
Ngram
.
terms
,
Ngram
.
terms
,
NodeNodeNgram
.
score
ScoresTable
.
c
.
score
)
)
.
join
(
Ngram
,
NodeNgram
.
ngram_id
==
Ngram
.
id
)
.
join
(
Ngram
,
NodeNgram
.
ngram_id
==
Ngram
.
id
)
.
join
(
NodeNodeNgram
,
NodeNgram
.
ngram_id
==
NodeNodeNgram
.
ngram_id
)
.
filter
(
NodeNodeNgram
.
node1_id
==
scoring_metric_id
)
.
order_by
(
desc
(
NodeNodeNgram
.
score
))
)
# main filter
# main filter ----------------------
# -----------
.
filter
(
NodeNgram
.
node_id
==
list_id
)
query
=
query
.
filter
(
NodeNgram
.
node_id
==
list_id
)
# scores if possible
.
outerjoin
(
ScoresTable
,
ScoresTable
.
c
.
ngram_id
==
NodeNgram
.
ngram_id
)
.
order_by
(
desc
(
ScoresTable
.
c
.
score
))
)
if
pagination_limit
:
if
pagination_limit
:
query
=
query
.
limit
(
pagination_limit
)
query
=
query
.
limit
(
pagination_limit
)
...
@@ -128,13 +140,18 @@ class GroupChange(APIView):
...
@@ -128,13 +140,18 @@ class GroupChange(APIView):
}
}
Chained effect:
Chained effect:
any previous group under mainformA or B will be overwritten
The DELETE HTTP method also works, with same url
(and simple array in the data)
NB: request.user is also checked for current authentication status
NB: request.user is also checked for current authentication status
"""
"""
def
initial
(
self
,
request
):
def
initial
(
self
,
request
):
"""
"""
Before dispatching to post()
Before dispatching to post()
or delete()
Checks current user authentication to prevent remote DB manipulation
Checks current user authentication to prevent remote DB manipulation
"""
"""
...
@@ -150,28 +167,29 @@ class GroupChange(APIView):
...
@@ -150,28 +167,29 @@ class GroupChange(APIView):
=> removes couples where newly reconnected ngrams where involved
=> removes couples where newly reconnected ngrams where involved
=> adds new couples from GroupsBuffer of terms view
=> adds new couples from GroupsBuffer of terms view
TODO recalculate scores after new groups
TODO see use of util.lists.Translations
TODO see use of util.lists.Translations
TODO benchmark selective delete compared to entire list rewrite
POST data:
<QueryDict: {'1228[]': ['891', '1639']}> => creates 1228 - 891
and 1228 - 1639
request.POST.lists() iterator where each elt is like :('1228[]',['891','1639'])
"""
"""
group_node
=
get_parameters
(
request
)[
'node'
]
group_node
=
get_parameters
(
request
)[
'node'
]
all_
nodes_involved
=
[]
all_
mainforms
=
[]
links
=
[]
links
=
[]
for
(
mainform_key
,
subforms_ids
)
in
request
.
POST
.
lists
():
for
(
mainform_key
,
subforms_ids
)
in
request
.
POST
.
lists
():
mainform_id
=
mainform_key
[:
-
2
]
# remove brackets '543[]' -> '543'
mainform_id
=
mainform_key
[:
-
2
]
# remove brackets '543[]' -> '543'
all_
nodes_involved
.
append
(
mainform_id
)
all_
mainforms
.
append
(
mainform_id
)
for
subform_id
in
subforms_ids
:
for
subform_id
in
subforms_ids
:
links
.
append
((
mainform_id
,
subform_id
))
links
.
append
((
mainform_id
,
subform_id
))
all_nodes_involved
.
append
(
subform_id
)
# remove selectively all groupings with these nodes involved
# remove selectively all groupings with these mainforms
# TODO benchmark
# using IN is correct in this case: list of ids is short and external
# see stackoverflow.com/questions/444475/
old_links
=
(
session
.
query
(
NodeNgramNgram
)
old_links
=
(
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
group_node
)
.
filter
(
NodeNgramNgram
.
node_id
==
group_node
)
.
filter
(
or_
(
.
filter
(
NodeNgramNgram
.
ngram1_id
.
in_
(
all_mainforms
))
NodeNgramNgram
.
ngram1_id
.
in_
(
all_nodes_involved
),
NodeNgramNgram
.
ngram2_id
.
in_
(
all_nodes_involved
)))
)
)
n_removed
=
old_links
.
delete
(
synchronize_session
=
False
)
n_removed
=
old_links
.
delete
(
synchronize_session
=
False
)
session
.
commit
()
session
.
commit
()
...
@@ -189,6 +207,40 @@ class GroupChange(APIView):
...
@@ -189,6 +207,40 @@ class GroupChange(APIView):
},
200
)
},
200
)
def
delete
(
self
,
request
):
"""
Deletes some groups from the group node
Send in data format is simply a json { 'keys':'["11492","16438"]' }
==> it means removing any synonym groups having these 2 as mainform
(within the url's groupnode_id)
NB: At reception here it becomes like:
<QueryDict: {'keys[]': ['11492', '16438']}>
"""
# from the url
group_node
=
get_parameters
(
request
)[
'node'
]
print
(
request
.
POST
)
# from the data in body
all_mainforms
=
request
.
POST
.
getlist
(
'keys[]'
)
links_to_remove
=
(
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
group_node
)
.
filter
(
NodeNgramNgram
.
ngram1_id
.
in_
(
all_mainforms
))
)
n_removed
=
links_to_remove
.
delete
(
synchronize_session
=
False
)
session
.
commit
()
return
JsonHttpResponse
({
'count_removed'
:
n_removed
},
200
)
class
ListChange
(
APIView
):
class
ListChange
(
APIView
):
"""
"""
...
...
gargantext/views/api/nodes.py
View file @
8fcfc803
...
@@ -89,7 +89,7 @@ class NodeListResource(APIView):
...
@@ -89,7 +89,7 @@ class NodeListResource(APIView):
response
=
HttpResponse
(
content_type
=
'text/csv'
)
response
=
HttpResponse
(
content_type
=
'text/csv'
)
response
[
'Content-Disposition'
]
=
'attachment; filename="Gargantext_Corpus.csv"'
response
[
'Content-Disposition'
]
=
'attachment; filename="Gargantext_Corpus.csv"'
writer
=
csv
.
writer
(
response
,
delimiter
=
'
\t
'
)
writer
=
csv
.
writer
(
response
,
delimiter
=
'
\t
'
,
quoting
=
csv
.
QUOTE_MINIMAL
)
keys
=
[
'title'
,
'journal'
keys
=
[
'title'
,
'journal'
,
'publication_year'
,
'publication_month'
,
'publication_day'
,
'publication_year'
,
'publication_month'
,
'publication_day'
...
...
gargantext/views/api/urls.py
View file @
8fcfc803
from
django.conf.urls
import
url
from
django.conf.urls
import
url
from
.
import
nodes
from
.
import
nodes
from
.
import
metrics
from
.
import
ngramlists
from
.
import
ngramlists
from
.
import
analytics
from
.
import
analytics
...
@@ -19,6 +20,14 @@ urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view()
...
@@ -19,6 +20,14 @@ urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view()
,
url
(
r'^nodes/(\d+)/favorites$'
,
nodes
.
CorpusFavorites
.
as_view
()
)
,
url
(
r'^nodes/(\d+)/favorites$'
,
nodes
.
CorpusFavorites
.
as_view
()
)
# in these two routes the node is supposed to be a *corpus* node
# in these two routes the node is supposed to be a *corpus* node
,
url
(
r'^metrics/(\d+)$'
,
metrics
.
CorpusMetrics
.
as_view
()
)
# update all metrics for a corpus
# ex: PUT metrics/123
# \
# corpus id
,
url
(
r'^ngramlists/change$'
,
ngramlists
.
ListChange
.
as_view
()
)
,
url
(
r'^ngramlists/change$'
,
ngramlists
.
ListChange
.
as_view
()
)
# add or remove ngram from a list
# add or remove ngram from a list
# ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2
# ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2
...
...
static/lib/gargantext/NGrams_dyna_chart_and_table.js
View file @
8fcfc803
This diff is collapsed.
Click to expand it.
static/lib/gargantext/garganrest.js
View file @
8fcfc803
...
@@ -69,10 +69,11 @@ var Resource = function(url_path) {
...
@@ -69,10 +69,11 @@ var Resource = function(url_path) {
});
});
};
};
// change an item
// change an item
this
.
change
=
this
.
update
=
function
(
i
tem
,
callback
)
{
this
.
change
=
this
.
update
=
function
(
i
d
,
callback
)
{
$
.
ajax
({
$
.
ajax
({
url
:
url_path
+
'/'
+
i
tem
.
i
d
,
url
:
url_path
+
'/'
+
id
,
type
:
'PATCH'
,
type
:
'PATCH'
,
success
:
callback
success
:
callback
});
});
};
};
...
@@ -84,14 +85,18 @@ var Resource = function(url_path) {
...
@@ -84,14 +85,18 @@ var Resource = function(url_path) {
$
.
ajax
({
$
.
ajax
({
url
:
url_path
+
'/'
+
id
,
url
:
url_path
+
'/'
+
id
,
type
:
'DELETE'
,
type
:
'DELETE'
,
success
:
callback
success
:
callback
});
});
};
};
// add an item
// add an item
this
.
add
=
this
.
append
=
function
(
value
,
callback
)
{
this
.
add
=
this
.
append
=
function
(
value
,
callback
)
{
$
.
ajax
({
$
.
ajax
({
// todo define id
url
:
url_path
+
'/'
+
id
,
url
:
url_path
+
'/'
+
id
,
type
:
'POST'
,
type
:
'POST'
,
success
:
callback
success
:
callback
});
});
};
};
...
@@ -99,12 +104,12 @@ var Resource = function(url_path) {
...
@@ -99,12 +104,12 @@ var Resource = function(url_path) {
var
GarganRest
=
function
(
base_path
,
path_list
)
{
var
GarganRest
=
function
(
base_path
,
path_list
)
{
var
that
=
this
;
var
that
=
this
;
$
.
each
(
path_list
,
function
(
p
,
path
){
$
.
each
(
path_list
,
function
(
i
,
path
){
that
[
path
]
=
new
Resource
(
base_path
+
path
);
that
[
path
]
=
new
Resource
(
base_path
+
path
);
});
});
};
};
garganrest
=
new
GarganRest
(
'/api/'
,
[
'nodes'
]);
garganrest
=
new
GarganRest
(
'/api/'
,
[
'nodes'
,
'metrics'
]);
// var log = function(result){console.log(result);};
// var log = function(result){console.log(result);};
...
...
static/lib/gargantext/tables.css
View file @
8fcfc803
...
@@ -61,6 +61,7 @@ span.note {
...
@@ -61,6 +61,7 @@ span.note {
span
.note.glyphicon
{
span
.note.glyphicon
{
color
:
#555
;
color
:
#555
;
top
:
0
;
}
}
p
.note
{
p
.note
{
...
@@ -129,14 +130,10 @@ tr:hover {
...
@@ -129,14 +130,10 @@ tr:hover {
margin-bottom
:
1em
;
margin-bottom
:
1em
;
}
}
.
old
subform
{
.subform
{
color
:
#777
;
color
:
#777
;
}
}
.usersubform
{
color
:
blue
;
}
.dynatable-record-count
{
.dynatable-record-count
{
font-size
:
0.7em
;
font-size
:
0.7em
;
}
}
...
...
templates/pages/projects/project.html
View file @
8fcfc803
...
@@ -88,13 +88,29 @@
...
@@ -88,13 +88,29 @@
{{corpus.name}}, {{ corpus.count }} documents {{ corpus.status_message }}
{{corpus.name}}, {{ corpus.count }} documents {{ corpus.status_message }}
</a>
</a>
</div>
</div>
<div
class=
"col-md-2 content"
>
<div
class=
"col-md-3 content"
>
<a
href=
"/projects/{{project.id}}/corpora/{{corpus.id}}"
>
<a
href=
"/projects/{{project.id}}/corpora/{{corpus.id}}"
title=
"View the corpus"
>
<button
type=
"button"
class=
"btn btn-default"
aria-label=
"Left Align"
>
<button
type=
"button"
class=
"btn btn-default"
aria-label=
"Left Align"
>
<span
class=
"glyphicon glyphicon-eye-open"
aria-hidden=
"true"
></span>
<span
class=
"glyphicon glyphicon-eye-open"
aria-hidden=
"true"
></span>
</button>
</button>
</a>
</a>
<!-- -->
<button
type=
"button"
class=
"btn btn-default yopla"
data-container=
"body"
data-toggle=
"popover"
data-placement=
"bottom"
data-trigger=
"focus"
data-content=
"
<ul>
<li
onclick="
garganrest.metrics.update({{corpus.id}}, function(){alert('The corpus ({{corpus.name|escapejs}}) was updated')});
">
<a href='#'>Recalculate ngram metrics</a> <br/> (can take a little while)
</li>
</ul>
"
>
<span
class=
"glyphicon glyphicon-dashboard"
aria-hidden=
"true"
title=
'Recalculate ngram scores and similarities'
></span>
</button>
<button
type=
"button"
class=
"btn btn-default"
data-container=
"body"
data-toggle=
"popover"
data-placement=
"bottom"
<button
type=
"button"
class=
"btn btn-default"
data-container=
"body"
data-toggle=
"popover"
data-placement=
"bottom"
data-content=
"
data-content=
"
<ul>
<ul>
...
@@ -103,14 +119,15 @@
...
@@ -103,14 +119,15 @@
garganrest.nodes.delete({{corpus.id}}, function(){$('#corpus_'+{{corpus.id}}).remove()});
garganrest.nodes.delete({{corpus.id}}, function(){$('#corpus_'+{{corpus.id}}).remove()});
$(this).parent().parent().remove();
$(this).parent().parent().remove();
">
">
<a href=
"#"
>Delete this</a>
<a href=
'#'
>Delete this</a>
</li>
</li>
</ul>
</ul>
"
>
"
>
<span
class=
"glyphicon glyphicon-trash"
aria-hidden=
"true"
></span>
<span
class=
"glyphicon glyphicon-trash"
aria-hidden=
"true"
title=
'Delete this corpus'
></span>
</button>
</button>
</div>
</div>
<div
class=
"col-md-
4
content"
>
<div
class=
"col-md-
3
content"
>
{% for state in corpus.hyperdata.statuses %}
{% for state in corpus.hyperdata.statuses %}
{% ifequal state.action "Workflow" %}
{% ifequal state.action "Workflow" %}
{% if state.complete %}
{% if state.complete %}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment