Merge remote-tracking branch 'origin/romain-testing' into testing-merge

e42e2b8d · delanoe · 9f252d49 · 1697b72c · e42e2b8d · e42e2b8d
Commit e42e2b8d authored Sep 21, 2016 by delanoe
7 changed files
--- a/annotations/static/annotations/app.css
+++ b/annotations/static/annotations/app.css
@@ -151,12 +151,15 @@
  transition: all 0.25s linear;
 }

-.selection {
+/* this was used for the p or div that *contained* a selection */
+/*.selection {
  color: #aaa;
-}
+}*/
+
+/* this is used for the selected text itself */
 ::selection {
  color: black;
-  background-color: rgba(0, 0, 0, 0.4);
+  background-color: #aaa;
 }

 .noselection {

--- a/annotations/static/annotations/app.js
+++ b/annotations/static/annotations/app.js
@@ -97,6 +97,21 @@
            //             +propToRead+" ("+cache[propToRead]+")")
            params[key] = cache[propToRead]
          }
+          else if (typeof val == "object" && val["fromCacheIfElse"]) {
+            var propToReadIf = val["fromCacheIfElse"][0]
+            var propToReadElse = val["fromCacheIfElse"][1]
+            // console.log("reading from cache: response data property " +
+            //             "if:"+propToReadIf+" ("+cache[propToReadIf]+")"+
+            //             " else:"+propToReadElse+" ("+cache[propToReadElse]+")")
+            var valueIf = cache[propToReadIf]
+            var valueElse = cache[propToReadElse]
+            if (valueIf && valueIf != 'null' && valueIf != '') {
+              params[key] = valueIf
+            }
+            else {
+              params[key] = valueElse
+            }
+          }
      }

      // Now we run the call
@@ -149,8 +164,8 @@
    // -------------------------------------------------------------------------

    // debug
-    // console.log("==> $rootScope <==")
-    // console.log($rootScope)
+    console.log("==> $rootScope <==")
+    console.log($rootScope)
  });

 })(window);
--- a/annotations/static/annotations/highlight.js
+++ b/annotations/static/annotations/highlight.js
--- a/annotations/static/annotations/http.js
+++ b/annotations/static/annotations/http.js
@@ -90,17 +90,20 @@
  * MainApiAddNgramHttpService: Create and index a new ngram
  * ===========================
  * route: PUT api/ngrams?text=mynewngramstring&corpus=corpus_id
-  * ------
+  *
+  * NB it also checks if ngram exists (returns the preexisting id)
+  *    and if it has a mainform/group (via 'testgroup' option)
+  *                                   (useful if we add it to a list afterwards)
  *
  */
  http.factory('MainApiAddNgramHttpService', function($resource) {
    return $resource(
       // adding explicit "http://" b/c this a cross origin request
      'http://' + window.GARG_ROOT_URL
-                + "/api/ngrams?text=:ngramStr&corpus=:corpusId",
+                + "/api/ngrams?text=:ngramStr&corpus=:corpusId&testgroup",
      {
        ngramStr: '@ngramStr',
-        corpusId: '@corpusId'
+        corpusId: '@corpusId',
      },
      {
        put: {

--- a/annotations/static/annotations/ngramlist.js
+++ b/annotations/static/annotations/ngramlist.js
@@ -141,9 +141,9 @@
            crudCallsToMake = [
                {'service': MainApiAddNgramHttpService, 'action': 'put',
                'params' : {'ngramStr':value, corpusId: $rootScope.corpusId},
-                'dataPropertiesToCache': ['id'] },
+                'dataPropertiesToCache': ['id', 'group'] },
                {'service': MainApiChangeNgramHttpService, 'action': 'put',
-                'params' : {'listId':tgtListId, 'ngramIdList': {'fromCache': 'id'} } }
+                'params' : {'listId':tgtListId, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
            ];
            break;

@@ -151,9 +151,9 @@
            crudCallsToMake = [
                {'service': MainApiAddNgramHttpService, 'action': 'put',
                 'params' : {'ngramStr':value, corpusId: $rootScope.corpusId},
-                 'dataPropertiesToCache': ['id'] },
+                 'dataPropertiesToCache': ['id', 'group'] },
                {'service': MainApiChangeNgramHttpService, 'action': 'put',
-                 'params' : {'listId':tgtListId, 'ngramIdList': {'fromCache': 'id'} } }
+                 'params' : {'listId':tgtListId, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
            ];
            break;

@@ -161,11 +161,11 @@
            crudCallsToMake = [
                {'service': MainApiAddNgramHttpService, 'action': 'put',
                'params' : {'ngramStr':value, corpusId: $rootScope.corpusId},
-                'dataPropertiesToCache': ['id'] },
+                'dataPropertiesToCache': ['id', 'group'] },
                {'service': MainApiChangeNgramHttpService, 'action': 'put',
-                 'params' : {'listId':$rootScope.listIds.MAINLIST, 'ngramIdList': {'fromCache': 'id'} } },
+                 'params' : {'listId':$rootScope.listIds.MAINLIST, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } },
                {'service': MainApiChangeNgramHttpService, 'action': 'put',
-                'params' : {'listId':tgtListId, 'ngramIdList': {'fromCache': 'id'} } }
+                'params' : {'listId':tgtListId, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
            ];
            break;
      }

--- a/gargantext/util/toolchain/ngrams_addition.py
+++ b/gargantext/util/toolchain/ngrams_addition.py
@@ -19,6 +19,7 @@ procedure:

 from gargantext.models   import Ngram, Node, NodeNgram
 from gargantext.util.db  import session, bulk_insert
+from gargantext.util.db  import bulk_insert_ifnotexists # £TODO debug
 from sqlalchemy          import distinct
 from re                  import findall, IGNORECASE

@@ -41,20 +42,13 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
    @param keys: the hyperdata fields to index
    """

-    # check the ngrams we won't process (those that were already indexed)
-    indexed_ngrams_subquery = (session
-                                .query(distinct(NodeNgram.ngram_id))
-                                .join(Node, Node.id == NodeNgram.node_id)
-                                .filter(Node.parent_id == corpus.id)
-                                .filter(Node.typename == 'DOCUMENT')
-                                .subquery()
-                                )
-
-    # retrieve the ngrams from our list, filtering out the already indexed ones
+    # retrieve *all* the ngrams from our list
+    # (even if some relations may be already indexed
+    #  b/c they were perhaps not extracted in all docs
+    #   => we'll use already_indexed later)
    todo_ngrams = (session
                    .query(Ngram)
                    .filter(Ngram.id.in_(ngram_ids))
-                    .filter(~ Ngram.id.in_(indexed_ngrams_subquery))
                    .all()
                    )

@@ -90,22 +84,49 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
                    else:
                        node_ngram_to_write[doc.id][ngram.id] += n_occs

+    # debug
+    # print("new node_ngrams before filter:", node_ngram_to_write)
+
+    # check the relations we won't insert (those that were already indexed)
+    # NB costly but currently impossible with bulk_insert_ifnotexists
+    #                                         b/c double uniquekey
+    already_indexed = (session
+                        .query(NodeNgram.node_id, NodeNgram.ngram_id)
+                        .join(Node, Node.id == NodeNgram.node_id)
+                        .filter(Node.parent_id == corpus.id)
+                        .filter(Node.typename == 'DOCUMENT')
+                        .all()
+                        )
+    filter_out = {(nd_id,ng_id) for (nd_id,ng_id) in already_indexed}
+    # POSSIBLE update those that are filtered out if wei_previous != wei
+
    # integrate all at the end
    my_new_rows = []
    add_new_row = my_new_rows.append
    for doc_id in node_ngram_to_write:
        for ngram_id in node_ngram_to_write[doc_id]:
-            wei = node_ngram_to_write[doc_id][ngram_id]
-            add_new_row([doc_id, ngram_id, wei])
+            if (doc_id, ngram_id) not in filter_out:
+                wei = node_ngram_to_write[doc_id][ngram_id]
+                add_new_row([doc_id, ngram_id, wei])

    del node_ngram_to_write

+    # debug
+    # print("new node_ngrams after filter:", my_new_rows)
+
    bulk_insert(
        table = NodeNgram,
        fields = ('node_id', 'ngram_id', 'weight'),
        data = my_new_rows
    )

+    # bulk_insert_ifnotexists(
+    #     model = NodeNgram,
+    #     uniquekey = ('node_id','ngram_id'),        <= currently impossible
+    #     fields = ('node_id', 'ngram_id', 'weight'),
+    #     data = my_new_rows
+    # )
+
    n_added = len(my_new_rows)
    print("index_new_ngrams: added %i new NodeNgram rows" % n_added)


--- a/gargantext/views/api/ngrams.py
+++ b/gargantext/views/api/ngrams.py
@@ -2,8 +2,8 @@ from gargantext.util.http       import ValidationException, APIView \
                                     , get_parameters, JsonHttpResponse\
                                     , HttpResponse
 from gargantext.util.db         import session, func
-from gargantext.util.db_cache   import cache 
-from gargantext.models          import Node, Ngram, NodeNgram
+from gargantext.util.db_cache   import cache
+from gargantext.models          import Node, Ngram, NodeNgram, NodeNgramNgram
 from sqlalchemy.orm             import aliased
 from re                         import findall

@@ -21,7 +21,7 @@ class ApiNgrams(APIView):
        """
        Used for analytics
        ------------------
-        
+
        Get ngram listing + counts in a given scope
        """
        # parameters retrieval and validation
@@ -83,24 +83,30 @@ class ApiNgrams(APIView):

         1 - checks user authentication before any changes

-         2 - adds the ngram to Ngram table in DB
+         2 - checks if ngram to Ngram table in DB
+              if yes returns ngram_id and optionally mainform_id
+              otherwise continues
+
+         3 - adds the ngram to Ngram table in DB

-         3 - (if corpus param is present)
+         4 - (if corpus param is present)
             adds the ngram doc counts to NodeNgram table in DB
             (aka "index the ngram" throught the docs of the corpus)

-         4 - returns json with:
-             'msg'   => a success msg 
+         5 - returns json with:
+             'msg'   => a success msg
             'text'  => the initial text content
             'term'  => the normalized text content
             'id'    => the new ngram_id
             'count' => the number of docs with the ngram in the corpus
                        (if corpus param is present)
+             'group' => the mainform_id if applicable

        possible inline parameters
        --------------------------
        @param    text=<ngram_string>         [required]
        @param    corpus=<CORPUS_ID>          [optional]
+        @param    testgroup (true if present) [optional, requires corpus]
        """

        # 1 - check user authentication
@@ -122,6 +128,9 @@ class ApiNgrams(APIView):
                                        It requires a "text" parameter,\
                                        for instance /api/ngrams?text=hydrometallurgy')

+        if ('testgroup' in params) and (not ('corpus' in params)):
+            raise ValidationException("'testgroup' param requires 'corpus' param")
+
        # if we have a 'corpus' param (to do the indexing)...
        do_indexation = False
        if 'corpus' in params:
@@ -143,10 +152,33 @@ class ApiNgrams(APIView):
        try:
            log_msg = ""
            ngram_id = None
+            mainform_id = None
+
            preexisting = session.query(Ngram).filter(Ngram.terms==ngram_str).first()
+
            if preexisting is not None:
                ngram_id = preexisting.id
                log_msg += "ngram already existed (id %i)\n" % ngram_id
+
+                # in the context of a corpus we can also check if has mainform
+                # (useful for)
+                if 'testgroup' in params:
+                    groupings_id = (session.query(Node.id)
+                                           .filter(Node.parent_id == corpus_id)
+                                           .filter(Node.typename == 'GROUPLIST')
+                                           .first()
+                                    )
+                    had_mainform = (session.query(NodeNgramNgram.ngram1_id)
+                                          .filter(NodeNgramNgram.node_id == groupings_id)
+                                          .filter(NodeNgramNgram.ngram2_id == preexisting.id)
+                                          .first()
+                                    )
+                    if had_mainform:
+                        mainform_id = had_mainform[0]
+                        log_msg += "ngram had mainform (id %i) in this corpus" % mainform_id
+                    else:
+                        log_msg += "ngram was not in any group for this corpus"
+
            else:
                # 2 - insert into Ngrams
                new_ngram = Ngram(terms=ngram_str, n=ngram_size)
@@ -165,6 +197,7 @@ class ApiNgrams(APIView):
                'text': original_text,
                'term': ngram_str,
                'id' : ngram_id,
+                'group' : mainform_id,
                'count': n_added if do_indexation else 'no corpus provided for indexation'
                }, 200)