Commit d566e4a8 authored by Romain Loth's avatar Romain Loth

Ngrams table: see & add groups

parent 7c12c1d3
......@@ -4,13 +4,14 @@ API views for advanced operations on ngrams and ngramlists
- retrieve several lists together ("family")
- retrieve detailed list infos (ngram_id, term strings, scores...)
- modify NodeNgram lists (PUT/DEL an ngram to a MAINLIST OR MAPLIST...)
- modify NodeNgramNgram groups (POST a list of groupings like {"767":[209,640],"779":[436,265,385]}")
"""
from gargantext.util.http import APIView, get_parameters, JsonHttpResponse,\
ValidationException, Http404
from gargantext.util.db import session, aliased, desc
from gargantext.util.db_cache import cache
from gargantext.models import Ngram, NodeNgram, NodeNodeNgram
from gargantext.util.db import session, aliased, desc, bulk_insert
from gargantext.util.db_cache import cache, or_
from gargantext.models import Ngram, NodeNgram, NodeNodeNgram, NodeNgramNgram
from gargantext.util.lists import UnweightedList, Translations
......@@ -62,6 +63,43 @@ def _query_list(list_id,
def _query_grouped_ngrams(groupings_id, details=False, scoring_metric_id=None):
"""
Listing of "hidden" ngram_ids from the groups
Works only for grouplists
Parameter:
- details: if False, send just the array of ngram_ids
if True, send triples with (ngram_id, term, scoring)
^^^^^^^
- scoring_metric_id: id of a scoring metric node (TFIDF or OCCS)
(for details and sorting)
"""
if not details:
# simple contents
query = session.query(NodeNgramNgram.ngram2_id)
else:
# detailed contents (terms and some NodeNodeNgram for score)
query = (session
.query(
NodeNgramNgram.ngram2_id,
Ngram.terms,
NodeNodeNgram.score
)
.join(Ngram, NodeNgramNgram.ngram2_id == Ngram.id)
.join(NodeNodeNgram, NodeNgramNgram.ngram2_id == NodeNodeNgram.ngram_id)
.filter(NodeNodeNgram.node1_id == scoring_metric_id)
.order_by(desc(NodeNodeNgram.score))
)
# main filter
# -----------
query = query.filter(NodeNgramNgram.node_id == groupings_id)
return query
class List(APIView):
"""
see already available API query api/nodes/<list_id>?fields[]=ngrams
......@@ -69,6 +107,94 @@ class List(APIView):
pass
class GroupChange(APIView):
"""
Modification of some groups
(typically new subform nodes under a mainform)
USAGE EXEMPLE:
HOST/api/ngramlists/groups?node=43
vvvvvv
group node
to modify
We use POST HTTP method to send group data with structure like:
{
mainform_A: [subform_A1],
mainformB: [subform_B1,subform_B2,subform_B3]
...
}
Chained effect:
NB: request.user is also checked for current authentication status
"""
def initial(self, request):
"""
Before dispatching to post()
Checks current user authentication to prevent remote DB manipulation
"""
if not request.user.is_authenticated():
raise Http404()
# can't use return in initial() (although 401 maybe better than 404)
# can't use @requires_auth because of positional 'self' within class
def post(self, request):
"""
Rewrites the group node **selectively**
=> removes couples where newly reconnected ngrams where involved
=> adds new couples from GroupsBuffer of terms view
TODO see use of util.lists.Translations
TODO benchmark selective delete compared to entire list rewrite
"""
group_node = get_parameters(request)['node']
all_nodes_involved = []
links = []
print([i for i in request.POST.lists()])
pass
for (mainform_key, subforms_ids) in request.POST.lists():
mainform_id = mainform_key[:-2] # remove brackets '543[]' -> '543'
all_nodes_involved.append(mainform_id)
for subform_id in subforms_ids:
links.append((mainform_id,subform_id))
all_nodes_involved.append(subform_id)
# remove selectively all groupings with these nodes involved
# TODO benchmark
old_links = (session.query(NodeNgramNgram)
.filter(NodeNgramNgram.node_id == group_node)
.filter(or_(
NodeNgramNgram.ngram1_id.in_(all_nodes_involved),
NodeNgramNgram.ngram2_id.in_(all_nodes_involved)))
)
n_removed = old_links.count()
old_links.delete(synchronize_session='fetch')
print('n_removed', n_removed)
print("links", links)
print(
[i for i in ((group_node, mainform, subform, 1.0) for (mainform,subform) in links)]
)
bulk_insert(
NodeNgramNgram,
('node_id', 'ngram1_id', 'ngram2_id', 'weight'),
((group_node, mainform, subform, 1.0) for (mainform,subform) in links)
)
return JsonHttpResponse({
'count_removed': n_removed,
'count_added': len(links),
}, 200)
class ListChange(APIView):
"""
Any ngram action on standard NodeNgram lists (MAIN, MAP, STOP)
......@@ -270,10 +396,17 @@ class ListFamily(APIView):
pagination_limit = glance_limit,
scoring_metric_id= scores_id)
else:
# infos for all ngrams
# infos for all ngrams from mainlist
mainlist_query = _query_list(mainlist_id, details=True,
scoring_metric_id= scores_id)
# infos for grouped ngrams, absent from mainlist
hidden_ngrams_query = _query_grouped_ngrams(groups_id, details=True,
scoring_metric_id= scores_id)
# and for the other lists (stop and map)
# no details needed here, just the member ids
# - maplist ngrams will already be in ngraminfos b/c of mainlist
# - stoplist ngrams will not be shown in detail
for li in other_list_ids:
li_elts = _query_list(other_list_ids[li], details=False
).all()
......@@ -286,11 +419,14 @@ class ListFamily(APIView):
linkinfo = links.groups
# the output form
for ng in mainlist_query.all():
for ng in mainlist_query.all() + hidden_ngrams_query.all():
ng_id = ng[0]
# id => [term, weight]
ngraminfo[ng_id] = ng[1:]
# NB the client js will sort mainlist ngs from hidden ngs after ajax
# using linkinfo (otherwise needs redundant listmembers for main)
return JsonHttpResponse({
'ngraminfos' : ngraminfo,
'listmembers' : listmembers,
......
......@@ -16,6 +16,11 @@ urlpatterns = [
# rm <=> DEL ngramlists/change?list=42&ngrams=1,2
url(r'^ngramlists/change$', ngramlists.ListChange.as_view()),
# modify grouping couples of a group node
# ex: POST ngramlists/groups?node=43
# post data looks like : {"767":[209,640],"779":[436,265,385]}"
url(r'^ngramlists/groups$', ngramlists.GroupChange.as_view()),
# get entire combination of lists from a corpus
# (or any combination of lists that go together :
# - a mainlist
......
......@@ -30,21 +30,37 @@ th a {
/* notes under table titles */
th p.note {
color: #ccc;
th .note {
color: #ccc;
}
td .note {
color: #444;
}
span.note {
font-size: 10px;
color: #333;
}
p.note {
font-size: 0.6em;
margin: 1em 0 0 0 ;
}
th p.note > input {
p.note > input {
float: left;
margin: 0 .2em 0 0 ;
}
th p.note > label {
p.note > label {
float: left;
}
.note.greyed {
opacity: 0.2;
}
tr:hover {
cursor: pointer;
font-weight: bold;
......@@ -69,19 +85,34 @@ tr:hover {
cursor: default;
}
/* group box with + */
.group_box {
/* group box row on top of table with + */
#group_box {
font-size: 90%;
border: 1px solid blue;
}
.group_box .header {
font-size: 120%;
#group_flag {
font-size: 0.7em;
margin-top: 3em;
margin-bottom: 1em;
}
#group_box_mainform {
margin-bottom: 0;
line-height: 1 ;
margin-left: .05em ;
}
.group_box .content {
border: 1px solid yellow;
#group_box_content {
line-height: 1 ;
margin-bottom: 1em;
}
#group_flag {
.oldsubform {
color: #777 ;
}
.usersubform {
color: blue ;
}
.dynatable-record-count {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment