Merge branch 'romain-refactoring' into unstable

8fcfc803 · delanoe · a01472c3 · 8ba25bfd · 8fcfc803 · 8fcfc803
Commit 8fcfc803 authored May 25, 2016 by delanoe
13 changed files
--- a/gargantext/constants.py
+++ b/gargantext/constants.py
@@ -13,10 +13,12 @@ LISTTYPES = {
    'MAINLIST'     : UnweightedList,
    'MAPLIST'      : UnweightedList,
    'SPECIFICITY'  : WeightedList,
-    'OCCURRENCES'  : WeightedIndex,   # todo replace by WeightedList
+    'OCCURRENCES'  : WeightedIndex,   # could be WeightedList
    'COOCCURRENCES': WeightedMatrix,
-    'TFIDF-CORPUS' : WeightedIndex,   # todo split -> WeightedList for ti_rank and WeightedIndex for tfidf
-    'TFIDF-GLOBAL' : WeightedIndex,   # todo split -> WeightedList for ti_rank and WeightedIndex for tfidf
+    'TFIDF-CORPUS' : WeightedIndex,
+    'TFIDF-GLOBAL' : WeightedIndex,
+    'TIRANK-LOCAL' : WeightedIndex,   # could be WeightedList
+    'TIRANK-GLOBAL' : WeightedIndex   # could be WeightedList
 }

 NODETYPES = [
@@ -40,8 +42,11 @@ NODETYPES = [
    'TFIDF-CORPUS',          # 13
    'TFIDF-GLOBAL',          # 14
    # docs subset
-    'FAVORITES'              # 15
-    # TODO add ti RANK
+    'FAVORITES',             # 15
+
+    # more scores (sorry!)
+    'TIRANK-LOCAL',          # 16
+    'TIRANK-GLOBAL',         # 17
 ]

 INDEXED_HYPERDATA = {

--- a/gargantext/util/toolchain/__init__.py
+++ b/gargantext/util/toolchain/__init__.py
-
-from gargantext.settings  import DEBUG
-from .parsing             import parse
-from .ngrams_extraction   import extract_ngrams
-from .hyperdata_indexing  import index_hyperdata
-
-# in usual run order
-from .list_stop           import do_stoplist
-from .ngram_groups        import compute_groups
-from .metric_tfidf        import compute_occs, compute_tfidf_local, compute_ti_ranking
-from .list_main           import do_mainlist
-from .ngram_coocs         import compute_coocs
-from .metric_specificity  import compute_specificity
-from .list_map            import do_maplist     # TEST
-from .mail_notification   import notify_owner
-from gargantext.util.db   import session
-from gargantext.models    import Node
-
-from datetime             import datetime
-from celery               import shared_task
-
-#@shared_task
-def parse_extract(corpus):
-    # retrieve corpus from database from id
-    if isinstance(corpus, int):
-        corpus_id = corpus
-        corpus = session.query(Node).filter(Node.id == corpus_id).first()
-        if corpus is None:
-            print('NO SUCH CORPUS: #%d' % corpus_id)
-            return
-    # apply actions
-    print('CORPUS #%d' % (corpus.id))
-    parse(corpus)
-
-    # was there an error in the process ?
-    if corpus.status()['error']:
-        print("ERROR: aborting parse_extract for corpus #%i" % corpus_id)
-        return None
-
-    print('CORPUS #%d: parsed' % (corpus.id))
-    extract_ngrams(corpus)
-    print('CORPUS #%d: extracted ngrams' % (corpus.id))
-
-@shared_task
-def parse_extract_indexhyperdata(corpus):
-    # retrieve corpus from database from id
-    if isinstance(corpus, int):
-        corpus_id = corpus
-        corpus = session.query(Node).filter(Node.id == corpus_id).first()
-        if corpus is None:
-            print('NO SUCH CORPUS: #%d' % corpus_id)
-            return
-    # Instantiate status
-    corpus.status('Workflow', progress=1)
-    corpus.save_hyperdata()
-    session.commit()
-    # FIXME: 'Workflow' will still be uncomplete when 'Index' and 'Lists' will
-    #        get stacked into hyperdata['statuses'], but doing corpus.status()
-    #        will return only the 1st uncomplete action (corpus.status() doesn't
-    #        understand "subactions")
-
-    # apply actions
-    print('CORPUS #%d' % (corpus.id))
-    parse(corpus)
-    print('CORPUS #%d: parsed' % (corpus.id))
-    extract_ngrams(corpus)
-
-    # Preparing Databse
-    # Indexing
-    #
-
-    corpus.status('Index', progress=0)
-    corpus.save_hyperdata()
-    session.commit()
-
-
-    print('CORPUS #%d: extracted ngrams' % (corpus.id))
-    index_hyperdata(corpus)
-    print('CORPUS #%d: indexed hyperdata' % (corpus.id))
-
-    # -> 'favorites' node
-    favs = corpus.add_child(
-            typename='FAVORITES', name='favorite docs in "%s"' % corpus.name
-            )
-    session.add(favs)
-    session.commit()
-    print('CORPUS #%d: [%s] new favorites node #%i' % (corpus.id, t(), favs.id))
-
-
-    corpus.status('Index', progress=1, complete=True)
-    corpus.save_hyperdata()
-    session.commit()
-
-
-    # -------------------------------
-    # temporary ngram lists workflow
-    # -------------------------------
-
-    corpus.status('Lists', progress=0)
-    corpus.save_hyperdata()
-    session.commit()
-
-
-    print('CORPUS #%d: [%s] starting ngram lists computation' % (corpus.id, t()))
-
-    # -> stoplist: filter + write (to Node and NodeNgram)
-    stop_id = do_stoplist(corpus)
-    print('CORPUS #%d: [%s] new stoplist node #%i' % (corpus.id, t(), stop_id))
-
-    # -> write groups to Node and NodeNgramNgram
-    group_id = compute_groups(corpus, stoplist_id = None)
-    print('CORPUS #%d: [%s] new grouplist node #%i' % (corpus.id, t(), group_id))
-
-    # ------------
-    # -> write occurrences to Node and NodeNodeNgram # (todo: NodeNgram)
-    occ_id = compute_occs(corpus, groupings_id = group_id)
-    print('CORPUS #%d: [%s] new occs node #%i' % (corpus.id, t(), occ_id))
-
-    # -> write cumulated ti_ranking (tfidf ranking vector) to Node and NodeNodeNgram (todo: NodeNgram)
-    tirank_id = compute_ti_ranking(corpus,
-                                   groupings_id = group_id,
-                                   count_scope="global")
-    print('CORPUS #%d: [%s] new ti ranking node #%i' % (corpus.id, t(), tirank_id))
-
-    # -> mainlist: filter + write (to Node and NodeNgram)
-    mainlist_id = do_mainlist(corpus,
-                              ranking_scores_id = tirank_id,
-                              stoplist_id = stop_id)
-    print('CORPUS #%d: [%s] new mainlist node #%i' % (corpus.id, t(), mainlist_id))
-
-    # -> write local tfidf similarities to Node and NodeNodeNgram
-    ltfidf_id = compute_tfidf_local(corpus,
-                                    on_list_id=mainlist_id,
-                                    groupings_id = group_id)
-    print('CORPUS #%d: [%s] new localtfidf node #%i' % (corpus.id, t(), ltfidf_id))
-    # => used for doc <=> ngram association
-
-    # ------------
-    # -> cooccurrences on mainlist: compute + write (=> Node and NodeNgramNgram)
-    coocs = compute_coocs(corpus,
-                            on_list_id = mainlist_id,
-                            groupings_id = group_id,
-                            just_pass_result = True)
-    print('CORPUS #%d: [%s] computed mainlist coocs for specif rank' % (corpus.id, t()))
-
-    # -> specificity: compute + write (=> NodeNodeNgram)
-    spec_id = compute_specificity(corpus,cooc_matrix = coocs)
-    # no need here for subforms because cooc already counted them in mainform
-    print('CORPUS #%d: [%s] new specificity node #%i' % (corpus.id, t(), spec_id))
-
-    # maplist: compute + write (to Node and NodeNgram)
-    map_id = do_maplist(corpus,
-                        mainlist_id = mainlist_id,
-                        specificity_id=spec_id,
-                        grouplist_id=group_id
-                        )
-    print('CORPUS #%d: [%s] new maplist node #%i' % (corpus.id, t(), map_id))
-
-    print('CORPUS #%d: [%s] FINISHED ngram lists computation' % (corpus.id, t()))
-
-    corpus.status('Lists', progress=0, complete=True)
-    corpus.save_hyperdata()
-    session.commit()
-
-
-    if DEBUG is False:
-        print('CORPUS #%d: [%s] FINISHED Sending email notification' % (corpus.id, t()))
-        notify_owner(corpus)
-
-    corpus.status('Workflow', progress=10, complete=True)
-    corpus.save_hyperdata()
-    session.commit()
-
-
-def t():
-    return datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
+from .main import parse_extract_indexhyperdata
--- a/gargantext/util/toolchain/main.py
+++ b/gargantext/util/toolchain/main.py
+
+from gargantext.settings  import DEBUG
+from .parsing             import parse
+from .ngrams_extraction   import extract_ngrams
+from .hyperdata_indexing  import index_hyperdata
+
+# in usual run order
+from .list_stop           import do_stoplist
+from .ngram_groups        import compute_groups
+from .metric_tfidf        import compute_occs, compute_tfidf_local, compute_ti_ranking
+from .list_main           import do_mainlist
+from .ngram_coocs         import compute_coocs
+from .metric_specificity  import compute_specificity
+from .list_map            import do_maplist     # TEST
+from .mail_notification   import notify_owner
+from gargantext.util.db   import session
+from gargantext.models    import Node
+
+from datetime             import datetime
+from celery               import shared_task
+
+#@shared_task
+def parse_extract(corpus):
+    # retrieve corpus from database from id
+    if isinstance(corpus, int):
+        corpus_id = corpus
+        corpus = session.query(Node).filter(Node.id == corpus_id).first()
+        if corpus is None:
+            print('NO SUCH CORPUS: #%d' % corpus_id)
+            return
+    # apply actions
+    print('CORPUS #%d' % (corpus.id))
+    parse(corpus)
+
+    # was there an error in the process ?
+    if corpus.status()['error']:
+        print("ERROR: aborting parse_extract for corpus #%i" % corpus_id)
+        return None
+
+    print('CORPUS #%d: parsed' % (corpus.id))
+    extract_ngrams(corpus)
+    print('CORPUS #%d: extracted ngrams' % (corpus.id))
+
+@shared_task
+def parse_extract_indexhyperdata(corpus):
+    # retrieve corpus from database from id
+    if isinstance(corpus, int):
+        corpus_id = corpus
+        corpus = session.query(Node).filter(Node.id == corpus_id).first()
+        if corpus is None:
+            print('NO SUCH CORPUS: #%d' % corpus_id)
+            return
+    # Instantiate status
+    corpus.status('Workflow', progress=1)
+    corpus.save_hyperdata()
+    session.commit()
+    # FIXME: 'Workflow' will still be uncomplete when 'Index' and 'Lists' will
+    #        get stacked into hyperdata['statuses'], but doing corpus.status()
+    #        will return only the 1st uncomplete action (corpus.status() doesn't
+    #        understand "subactions")
+
+    # apply actions
+    print('CORPUS #%d' % (corpus.id))
+    parse(corpus)
+    print('CORPUS #%d: parsed' % (corpus.id))
+    extract_ngrams(corpus)
+
+    # Preparing Databse
+    # Indexing
+    #
+
+    corpus.status('Index', progress=0)
+    corpus.save_hyperdata()
+    session.commit()
+
+
+    print('CORPUS #%d: extracted ngrams' % (corpus.id))
+    index_hyperdata(corpus)
+    print('CORPUS #%d: indexed hyperdata' % (corpus.id))
+
+    # -> 'favorites' node
+    favs = corpus.add_child(
+            typename='FAVORITES', name='favorite docs in "%s"' % corpus.name
+            )
+    session.add(favs)
+    session.commit()
+    print('CORPUS #%d: [%s] new favorites node #%i' % (corpus.id, t(), favs.id))
+
+
+    corpus.status('Index', progress=1, complete=True)
+    corpus.save_hyperdata()
+    session.commit()
+
+
+    # -------------------------------
+    # temporary ngram lists workflow
+    # -------------------------------
+
+    corpus.status('Lists', progress=0)
+    corpus.save_hyperdata()
+    session.commit()
+
+
+    print('CORPUS #%d: [%s] starting ngram lists computation' % (corpus.id, t()))
+
+    # -> stoplist: filter + write (to Node and NodeNgram)
+    stop_id = do_stoplist(corpus)
+    print('CORPUS #%d: [%s] new stoplist node #%i' % (corpus.id, t(), stop_id))
+
+    # -> write groups to Node and NodeNgramNgram
+    group_id = compute_groups(corpus, stoplist_id = None)
+    print('CORPUS #%d: [%s] new grouplist node #%i' % (corpus.id, t(), group_id))
+
+    # ------------
+    # -> write occurrences to Node and NodeNodeNgram
+    occ_id = compute_occs(corpus, groupings_id = group_id)
+    print('CORPUS #%d: [%s] new occs node #%i' % (corpus.id, t(), occ_id))
+
+    # -> write cumulated ti_ranking (tfidf ranking vector) to Node and NodeNodeNgram
+    tirank_id = compute_ti_ranking(corpus,
+                                   groupings_id = group_id,
+                                   count_scope="global")
+    print('CORPUS #%d: [%s] new ti ranking node #%i' % (corpus.id, t(), tirank_id))
+
+    # -> mainlist: filter + write (to Node and NodeNgram)
+    mainlist_id = do_mainlist(corpus,
+                              ranking_scores_id = tirank_id,
+                              stoplist_id = stop_id)
+    print('CORPUS #%d: [%s] new mainlist node #%i' % (corpus.id, t(), mainlist_id))
+
+    # -> write local tfidf similarities to Node and NodeNodeNgram
+    ltfidf_id = compute_tfidf_local(corpus,
+                                    on_list_id=mainlist_id,
+                                    groupings_id = group_id)
+    print('CORPUS #%d: [%s] new localtfidf node #%i' % (corpus.id, t(), ltfidf_id))
+    # => used for doc <=> ngram association
+
+    # ------------
+    # -> cooccurrences on mainlist: compute + write (=> Node and NodeNgramNgram)
+    coocs = compute_coocs(corpus,
+                            on_list_id = mainlist_id,
+                            groupings_id = group_id,
+                            just_pass_result = True)
+    print('CORPUS #%d: [%s] computed mainlist coocs for specif rank' % (corpus.id, t()))
+
+    # -> specificity: compute + write (=> NodeNodeNgram)
+    spec_id = compute_specificity(corpus,cooc_matrix = coocs)
+    # no need here for subforms because cooc already counted them in mainform
+    print('CORPUS #%d: [%s] new specificity node #%i' % (corpus.id, t(), spec_id))
+
+    # maplist: compute + write (to Node and NodeNgram)
+    map_id = do_maplist(corpus,
+                        mainlist_id = mainlist_id,
+                        specificity_id=spec_id,
+                        grouplist_id=group_id
+                        )
+    print('CORPUS #%d: [%s] new maplist node #%i' % (corpus.id, t(), map_id))
+
+    print('CORPUS #%d: [%s] FINISHED ngram lists computation' % (corpus.id, t()))
+
+    corpus.status('Lists', progress=0, complete=True)
+    corpus.save_hyperdata()
+    session.commit()
+
+
+    if DEBUG is False:
+        print('CORPUS #%d: [%s] FINISHED Sending email notification' % (corpus.id, t()))
+        notify_owner(corpus)
+
+    corpus.status('Workflow', progress=10, complete=True)
+    corpus.save_hyperdata()
+    session.commit()
+
+
+
+@shared_task
+def recount(corpus):
+    """
+    Recount essential metrics of the toolchain after group modifications.
+
+    ==> updates all scores in terms table
+    ==> updates tfidf relationship b/w term and doc
+
+    When groups change, the metrics need to be updated because subforms must be
+    added to their new mainform aggregate values:
+         - occurrences
+         - ndocs
+         - ti_rank
+         - coocs
+         - specificity
+         - tfidf
+
+    NB: no new extraction, no list change, just the metrics
+    """
+    # 1) we'll need the new groups and mainlist as basis
+    group_id = corpus.children("GROUPLIST").first().id
+    mainlist_id = corpus.children("MAINLIST").first().id
+
+    # 2) and we're going to overwrite the previous metric nodes
+    try:
+        old_occ_id    = corpus.children("OCCURRENCES").first().id
+    except:
+        old_occ_id    = None
+
+    try:
+        old_tirank_id = corpus.children("TIRANK-GLOBAL").first().id
+    except:
+        old_tirank_id = None
+
+    try:
+        old_spec_id   = corpus.children("SPECIFICITY").first().id
+    except:
+        old_spec_id   = None
+
+    try:
+        old_ltfidf_id = corpus.children("TFIDF-CORPUS").first().id
+    except:
+        old_ltfidf_id = None
+
+    # 3) we redo the required toolchain parts
+    # -------------------------------------------
+
+    # Instantiate status
+    corpus.status('Recounting mini-workflow', progress=1)
+    corpus.save_hyperdata()
+    session.commit()
+
+    # -> overwrite occurrences (=> NodeNodeNgram)
+    occ_id = compute_occs(corpus,
+                            groupings_id = group_id,
+                            overwrite_id=old_occ_id)
+    print('RECOUNT #%d: [%s] updated occs node #%i' % (corpus.id, t(), occ_id))
+
+    # -> write cumulated ti_ranking (tfidf ranking vector) (=> NodeNodeNgram)
+    tirank_id = compute_ti_ranking(corpus,
+                                   groupings_id = group_id,
+                                   count_scope="global",
+                                   overwrite_id=old_tirank_id)
+    print('RECOUNT #%d: [%s] updated ti ranking node #%i' % (corpus.id, t(), tirank_id))
+
+    # -> write local tfidf similarities to (=> NodeNodeNgram)
+    ltfidf_id = compute_tfidf_local(corpus,
+                                    on_list_id = mainlist_id,
+                                    groupings_id = group_id,
+                                    overwrite_id = old_ltfidf_id)
+    print('RECOUNT #%d: [%s] updated localtfidf node #%i' % (corpus.id, t(), ltfidf_id))
+    # => used for doc <=> ngram association
+
+    # ------------
+    # -> cooccurrences on mainlist: compute + write (=> NodeNgramNgram)
+    coocs = compute_coocs(corpus,
+                            on_list_id = mainlist_id,
+                            groupings_id = group_id,
+                            just_pass_result = True)
+    print('RECOUNT #%d: [%s] updated mainlist coocs for specif rank' % (corpus.id, t()))
+
+    # -> specificity: compute + write (=> NodeNgram)
+    spec_id = compute_specificity(corpus,cooc_matrix = coocs, overwrite_id = old_spec_id)
+
+
+    print('RECOUNT #%d: [%s] updated specificity node #%i' % (corpus.id, t(), spec_id))
+
+    print('RECOUNT #%d: [%s] FINISHED metric recounts' % (corpus.id, t()))
+
+    corpus.status('Recounting mini-workflow', progress=10, complete=True)
+    corpus.save_hyperdata()
+    session.commit()
+
+
+
+
+
+def t():
+    return datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
--- a/gargantext/util/toolchain/metric_specificity.py
+++ b/gargantext/util/toolchain/metric_specificity.py
@@ -99,7 +99,7 @@ def compute_specificity(corpus, cooc_id=None, cooc_matrix=None, overwrite_id = N
    if overwrite_id:
        # overwrite pre-existing id
        the_id = overwrite_id
-        session.query(NodeNodeNgram).filter(NodeNodeNgram.node1_id==the_id).delete()
+        session.query(NodeNgram).filter(NodeNgram.node_id==the_id).delete()
        session.commit()
    else:
        specnode = corpus.add_child(

--- a/gargantext/util/toolchain/metric_tfidf.py
+++ b/gargantext/util/toolchain/metric_tfidf.py
@@ -345,11 +345,11 @@ def compute_ti_ranking(corpus,
        # create the new TFIDF-XXXX node to get an id
        tir_nd = corpus.add_child()
        if count_scope == "local":
-            tir_nd.typename  = "TFIDF-CORPUS"
+            tir_nd.typename  = "TIRANK-CORPUS"
            tir_nd.name      = "ti rank (%i ngforms in corpus:%s)" % (
                                     total_ngramforms, corpus_id)
        elif count_scope == "global":
-            tir_nd.typename  = "TFIDF-GLOBAL"
+            tir_nd.typename  = "TIRANK-GLOBAL"
            tir_nd.name      = "ti rank (%i ngforms %s in corpora of sourcetype:%s)" % (
                                       total_ngramforms,
                                       ("from corpus %i" % corpus_id) if (termset_scope == "local") else "" ,

--- a/gargantext/views/api/metrics.py
+++ b/gargantext/views/api/metrics.py
+from gargantext.util.db_cache   import cache
+from gargantext.util.http       import ValidationException, APIView \
+                                     , HttpResponse, JsonHttpResponse
+from gargantext.util.toolchain.main import recount
+from datetime                   import datetime
+
+class CorpusMetrics(APIView):
+
+    def patch(self, request, corpusnode_id):
+        """
+        PATCH triggers recount of metrics for the specified corpus.
+
+        ex PATCH http://localhost:8000/api/metrics/14072
+                                                   -----
+                                                 corpus_id
+        """
+        print("==> update metrics request on ", corpusnode_id)
+
+        if not request.user.is_authenticated():
+            # can't use @requires_auth because of positional 'self' within class
+            return HttpResponse('Unauthorized', status=401)
+
+        try:
+            corpus = cache.Node[int(corpusnode_id)]
+        except:
+            corpus = None
+
+        if corpus is None:
+            raise ValidationException("%s is not a valid corpus node id."
+                                        % corpusnode_id)
+
+        else:
+            t_before = datetime.now()
+            # =============
+            recount(corpus)
+            # =============
+            t_after = datetime.now()
+
+            return JsonHttpResponse({
+                'corpus_id' : corpusnode_id,
+                'took': "%f s." % (t_after - t_before).total_seconds()
+            })
--- a/gargantext/views/api/ngramlists.py
+++ b/gargantext/views/api/ngramlists.py
@@ -34,24 +34,36 @@ def _query_list(list_id,
    """
    if not details:
        # simple contents
-        query = session.query(NodeNgram.ngram_id)
+        query = session.query(NodeNgram.ngram_id).filter(NodeNgram.node_id == list_id)
    else:
        # detailed contents (terms and some NodeNodeNgram for score)
+
+        # NB: score can be undefined (eg ex-subform that now became free)
+        #     ==> we need outerjoin
+        #     and the filter needs to have scoring_metric_id so we do it before
+        ScoresTable = (session
+                        .query(NodeNodeNgram.score, NodeNodeNgram.ngram_id)
+                        .filter(NodeNodeNgram.node1_id == scoring_metric_id)
+                        .subquery()
+                        )
+
        query = (session
                    .query(
                        NodeNgram.ngram_id,
                        Ngram.terms,
-                        NodeNodeNgram.score
+                        ScoresTable.c.score
                     )
                    .join(Ngram, NodeNgram.ngram_id == Ngram.id)
-                    .join(NodeNodeNgram, NodeNgram.ngram_id == NodeNodeNgram.ngram_id)
-                    .filter(NodeNodeNgram.node1_id == scoring_metric_id)
-                    .order_by(desc(NodeNodeNgram.score))
-                )

-    # main filter
-    # -----------
-    query = query.filter(NodeNgram.node_id == list_id)
+                    # main filter ----------------------
+                    .filter(NodeNgram.node_id == list_id)
+
+                    # scores if possible
+                    .outerjoin(ScoresTable,
+                               ScoresTable.c.ngram_id == NodeNgram.ngram_id)
+
+                    .order_by(desc(ScoresTable.c.score))
+                )

    if pagination_limit:
        query = query.limit(pagination_limit)
@@ -128,13 +140,18 @@ class GroupChange(APIView):
     }

    Chained effect:
+        any previous group under mainformA or B will be overwritten
+
+
+    The DELETE HTTP method also works, with same url
+                 (and simple array in the data)

    NB: request.user is also checked for current authentication status
    """

    def initial(self, request):
        """
-        Before dispatching to post()
+        Before dispatching to post() or delete()

        Checks current user authentication to prevent remote DB manipulation
        """
@@ -150,28 +167,29 @@ class GroupChange(APIView):
          => removes couples where newly reconnected ngrams where involved
          => adds new couples from GroupsBuffer of terms view

-        TODO recalculate scores after new groups
        TODO see use of util.lists.Translations
-        TODO benchmark selective delete compared to entire list rewrite
+
+        POST data:
+            <QueryDict: {'1228[]': ['891', '1639']}> => creates 1228 - 891
+                                                            and 1228 - 1639
+        request.POST.lists() iterator where each elt is like :('1228[]',['891','1639'])
        """
        group_node = get_parameters(request)['node']
-        all_nodes_involved = []
+        all_mainforms = []
        links = []

        for (mainform_key, subforms_ids) in request.POST.lists():
            mainform_id = mainform_key[:-2]   # remove brackets '543[]' -> '543'
-            all_nodes_involved.append(mainform_id)
+            all_mainforms.append(mainform_id)
            for subform_id in subforms_ids:
                links.append((mainform_id,subform_id))
-                all_nodes_involved.append(subform_id)

-        # remove selectively all groupings with these nodes involved
-        # TODO benchmark
+        # remove selectively all groupings with these mainforms
+        # using IN is correct in this case: list of ids is short and external
+        # see stackoverflow.com/questions/444475/
        old_links = (session.query(NodeNgramNgram)
                    .filter(NodeNgramNgram.node_id == group_node)
-                    .filter(or_(
-                            NodeNgramNgram.ngram1_id.in_(all_nodes_involved),
-                            NodeNgramNgram.ngram2_id.in_(all_nodes_involved)))
+                    .filter(NodeNgramNgram.ngram1_id.in_(all_mainforms))
                )
        n_removed = old_links.delete(synchronize_session=False)
        session.commit()
@@ -189,6 +207,40 @@ class GroupChange(APIView):
            }, 200)


+    def delete(self, request):
+        """
+        Deletes some groups from the group node
+
+        Send in data format is simply a json { 'keys':'["11492","16438"]' }
+
+        ==> it means removing any synonym groups having these 2 as mainform
+            (within the url's groupnode_id)
+
+        NB: At reception here it becomes like:
+                <QueryDict: {'keys[]': ['11492', '16438']}>
+
+        """
+
+        # from the url
+        group_node = get_parameters(request)['node']
+
+        print(request.POST)
+
+        # from the data in body
+        all_mainforms = request.POST.getlist('keys[]')
+
+        links_to_remove = (session.query(NodeNgramNgram)
+                    .filter(NodeNgramNgram.node_id == group_node)
+                    .filter(NodeNgramNgram.ngram1_id.in_(all_mainforms))
+                )
+        n_removed = links_to_remove.delete(synchronize_session=False)
+        session.commit()
+
+        return JsonHttpResponse({
+            'count_removed': n_removed
+            }, 200)
+
+

 class ListChange(APIView):
    """

--- a/gargantext/views/api/nodes.py
+++ b/gargantext/views/api/nodes.py
@@ -89,7 +89,7 @@ class NodeListResource(APIView):
            response = HttpResponse(content_type='text/csv')
            response['Content-Disposition'] = 'attachment; filename="Gargantext_Corpus.csv"'

-            writer = csv.writer(response, delimiter='\t')
+            writer = csv.writer(response, delimiter='\t', quoting=csv.QUOTE_MINIMAL)

            keys =  [ 'title'   , 'journal'
                    , 'publication_year', 'publication_month', 'publication_day'

--- a/gargantext/views/api/urls.py
+++ b/gargantext/views/api/urls.py
 from django.conf.urls import url

 from . import nodes
+from . import metrics
 from . import ngramlists
 from . import analytics

@@ -19,6 +20,14 @@ urlpatterns = [ url(r'^nodes$'                , nodes.NodeListResource.as_view()
              , url(r'^nodes/(\d+)/favorites$', nodes.CorpusFavorites.as_view()      )
              # in these two routes the node is supposed to be a *corpus* node

+
+              , url(r'^metrics/(\d+)$',         metrics.CorpusMetrics.as_view()      )
+                # update all metrics for a corpus
+                #  ex: PUT metrics/123
+                #                     \
+                #                   corpus id
+
+
              , url(r'^ngramlists/change$', ngramlists.ListChange.as_view()          )
                # add or remove ngram from a list
                #  ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2

--- a/static/lib/gargantext/NGrams_dyna_chart_and_table.js
+++ b/static/lib/gargantext/NGrams_dyna_chart_and_table.js
--- a/static/lib/gargantext/garganrest.js
+++ b/static/lib/gargantext/garganrest.js
@@ -69,10 +69,11 @@ var Resource = function(url_path) {
        });
    };
    // change an item
-    this.change = this.update = function(item, callback) {
+    this.change = this.update = function(id, callback) {
        $.ajax({
-            url: url_path + '/' + item.id,
+            url: url_path + '/' + id,
            type: 'PATCH',
+
            success: callback
        });
    };
@@ -84,14 +85,18 @@ var Resource = function(url_path) {
        $.ajax({
            url: url_path + '/' + id,
            type: 'DELETE',
+
            success: callback
        });
    };
    // add an item
    this.add = this.append = function(value, callback) {
        $.ajax({
+
+            // todo define id
            url: url_path + '/' + id,
            type: 'POST',
+
            success: callback
        });
    };
@@ -99,12 +104,12 @@ var Resource = function(url_path) {

 var GarganRest = function(base_path, path_list) {
    var that = this;
-    $.each(path_list, function(p, path){
+    $.each(path_list, function(i, path){
        that[path] = new Resource(base_path + path);
    });
 };

-garganrest = new GarganRest('/api/', ['nodes']);
+garganrest = new GarganRest('/api/', ['nodes', 'metrics']);


 // var log = function(result){console.log(result);};

--- a/static/lib/gargantext/tables.css
+++ b/static/lib/gargantext/tables.css
@@ -61,6 +61,7 @@ span.note {

 span.note.glyphicon {
  color: #555;
+  top:0;
 }

 p.note {
@@ -129,14 +130,10 @@ tr:hover {
    margin-bottom: 1em;
 }

-.oldsubform {
+.subform {
    color: #777 ;
 }

-.usersubform {
-    color: blue ;
-}
-
 .dynatable-record-count {
  font-size: 0.7em;
 }

--- a/templates/pages/projects/project.html
+++ b/templates/pages/projects/project.html
@@ -88,13 +88,29 @@
                                            {{corpus.name}}, {{ corpus.count }} documents {{ corpus.status_message }}
                                        </a>
                                    </div>
-                                    <div class="col-md-2 content">
-                                        <a href="/projects/{{project.id}}/corpora/{{corpus.id}}">
+                                    <div class="col-md-3 content">
+                                        <a href="/projects/{{project.id}}/corpora/{{corpus.id}}"
+                                        title="View the corpus">
                                        <button type="button" class="btn btn-default" aria-label="Left Align">
                                              <span class="glyphicon glyphicon-eye-open" aria-hidden="true"></span>
                                        </button>

                                        </a>
+                                        <!--  -->
+                                        <button type="button" class="btn btn-default yopla" data-container="body" data-toggle="popover" data-placement="bottom"  data-trigger="focus"
+                                            data-content="
+                                            <ul>
+                                                <li
+                                                onclick=&quot;
+                                                        garganrest.metrics.update({{corpus.id}}, function(){alert('The corpus ({{corpus.name|escapejs}}) was updated')});
+                                                        &quot;>
+                                                    <a href='#'>Recalculate ngram metrics</a> <br/> (can take a little while)
+                                                </li>
+                                            </ul>
+                                            ">
+                                            <span class="glyphicon glyphicon-dashboard" aria-hidden="true"
+                                            title='Recalculate ngram scores and similarities'></span>
+                                        </button>
                                        <button type="button" class="btn btn-default" data-container="body" data-toggle="popover" data-placement="bottom"
                                            data-content="
                                            <ul>
@@ -103,14 +119,15 @@
                                                        garganrest.nodes.delete({{corpus.id}}, function(){$('#corpus_'+{{corpus.id}}).remove()});
                                                        $(this).parent().parent().remove();
                                                    &quot;>
-                                                    <a href=&quot;#&quot;>Delete this</a>
+                                                    <a href='#'>Delete this</a>
                                                </li>
                                            </ul>
                                            ">
-                                            <span class="glyphicon glyphicon-trash" aria-hidden="true"></span>
+                                            <span class="glyphicon glyphicon-trash" aria-hidden="true"
+                                            title='Delete this corpus'></span>
                                        </button>
                                    </div>
-                                    <div class="col-md-4 content">
+                                    <div class="col-md-3 content">
                                        {% for state in corpus.hyperdata.statuses %}
                                                {% ifequal state.action "Workflow" %}
                                                    {% if state.complete %}