Merge remote-tracking branch 'origin/romain-testing' into testing-merge

e42e2b8d · delanoe · 9f252d49 · 1697b72c · e42e2b8d · e42e2b8d
Commit e42e2b8d authored Sep 21, 2016 by delanoe
7 changed files
--- a/annotations/static/annotations/app.css
+++ b/annotations/static/annotations/app.css
@@ -151,12 +151,15 @@
  transition: all 0.25s linear;
 }

-.selection {
+/* this was used for the p or div that *contained* a selection */
+/*.selection {
  color: #aaa;
-}
+}*/
+
+/* this is used for the selected text itself */
 ::selection {
  color: black;
-  background-color: rgba(0, 0, 0, 0.4);
+  background-color: #aaa;
 }

 .noselection {

--- a/annotations/static/annotations/app.js
+++ b/annotations/static/annotations/app.js
@@ -97,6 +97,21 @@
            //             +propToRead+" ("+cache[propToRead]+")")
            params[key] = cache[propToRead]
          }
+          else if (typeof val == "object" && val["fromCacheIfElse"]) {
+            var propToReadIf = val["fromCacheIfElse"][0]
+            var propToReadElse = val["fromCacheIfElse"][1]
+            // console.log("reading from cache: response data property " +
+            //             "if:"+propToReadIf+" ("+cache[propToReadIf]+")"+
+            //             " else:"+propToReadElse+" ("+cache[propToReadElse]+")")
+            var valueIf = cache[propToReadIf]
+            var valueElse = cache[propToReadElse]
+            if (valueIf && valueIf != 'null' && valueIf != '') {
+              params[key] = valueIf
+            }
+            else {
+              params[key] = valueElse
+            }
+          }
      }

      // Now we run the call
@@ -149,8 +164,8 @@
    // -------------------------------------------------------------------------

    // debug
-    // console.log("==> $rootScope <==")
-    // console.log($rootScope)
+    console.log("==> $rootScope <==")
+    console.log($rootScope)
  });

 })(window);
--- a/annotations/static/annotations/highlight.js
+++ b/annotations/static/annotations/highlight.js
@@ -50,6 +50,9 @@

      /*
      * Universal text selection
+      *
+      * "universal" <=> (Chrome, Firefox, IE, Safari, Opera...)
+      *                 cf. quirksmode.org/dom/range_intro.html
      */
      function getSelected() {
          if (window.getSelection) {
@@ -67,19 +70,15 @@
          }
          return false;
      }
-      // we only need one singleton at a time
-      var selection = getSelected();

-      /*
-      * When mouse selection is started, we highlight it
-      */
-      function toggleSelectionHighlight(text) {
-        if (text.trim() !== "" && !$element.hasClass('menu-is-opened')) {
-          $(".text-panel").addClass("selection");
-        } else {
-          $(".text-panel").removeClass("selection");
-        }
-      }
+      // £TODO extend "double click selection" on hyphen words
+      //       and reduce it on apostrophe ones (except firefox)
+      //       cf. stackoverflow.com/a/39005881/2489184
+      //           jsfiddle.net/avvhsruu/
+
+      // we only need one singleton at a time
+      // (<=> is only created once per doc, but value of annotation changes)
+      var selectionObj = getSelected();

      /*
      * Dynamically construct the selection menu scope
@@ -107,10 +106,11 @@
            $scope.selection_text = angular.copy(annotation);

            // debug
+            // console.log("toggleMenu with context:", context) ;
+            // console.log("toggleMenu with annotation: '" + JSON.stringify(annotation) +"'") ;
            // console.log("toggleMenu with \$scope.selection_text: '" + JSON.stringify($scope.selection_text) +"'") ;

            if (angular.isObject(annotation) && !$element.hasClass('menu-is-opened')) {
-
              // existing ngram
              var ngramId = annotation.uuid
              var mainformId = annotation.group
@@ -210,7 +210,7 @@
            }

            // "add" actions for non-existing ngram
-            else if (annotation.trim() !== "" && !$element.hasClass('menu-is-opened')) {
+            else if (annotation.trim() !== "" && ! context) {
              var newNgramText = annotation.trim()
              // new ngram (first call creates then like previous case for list)
              $scope.menuItems.push({
@@ -219,9 +219,9 @@
                      'crudCalls':[
                      {'service': MainApiAddNgramHttpService, 'action': 'put',
                       'params' : {'ngramStr':newNgramText, corpusId: $rootScope.corpusId},
-                       'dataPropertiesToCache': ['id'] },
+                       'dataPropertiesToCache': ['id', 'group'] },
                      {'service': MainApiChangeNgramHttpService, 'action': 'put',
-                       'params' : {'listId':stoplist_id, 'ngramIdList': {'fromCache': 'id'} } }
+                       'params' : {'listId':stoplist_id, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
                      ]
                  }) ;
              $scope.menuItems.push({
@@ -230,9 +230,9 @@
                      'crudCalls':[
                      {'service': MainApiAddNgramHttpService, 'action': 'put',
                       'params' : {'ngramStr':newNgramText, corpusId: $rootScope.corpusId},
-                       'dataPropertiesToCache': ['id'] },
+                       'dataPropertiesToCache': ['id', 'group'] },
                      {'service': MainApiChangeNgramHttpService, 'action': 'put',
-                       'params' : {'listId':mainlist_id, 'ngramIdList': {'fromCache': 'id'} } }
+                       'params' : {'listId':mainlist_id, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
                      ]
                  }) ;
              $scope.menuItems.push({
@@ -241,23 +241,27 @@
                      'crudCalls':[
                      {'service': MainApiAddNgramHttpService, 'action': 'put',
                       'params' : {'ngramStr':newNgramText, corpusId: $rootScope.corpusId},
-                       'dataPropertiesToCache': ['id'] },
+                       'dataPropertiesToCache': ['id', 'group'] },
                      {'service': MainApiChangeNgramHttpService, 'action': 'put',
-                       'params' : {'listId':mainlist_id, 'ngramIdList': {'fromCache': 'id'} } },
+                       'params' : {'listId':mainlist_id, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } },
                      {'service': MainApiChangeNgramHttpService, 'action': 'put',
-                       'params' : {'listId':maplist_id, 'ngramIdList': {'fromCache': 'id'} } }
+                       'params' : {'listId':maplist_id, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
                      ]
                  }) ;

              // show the menu
              $element.fadeIn(50);
              $element.addClass('menu-is-opened');
+              // console.warn("FADE IN menu", $element)
            }
            else {
+              // console.warn("=> else")
+
              // close the menu
              $scope.menuItems = [];
              $element.fadeOut(50);
              $element.removeClass('menu-is-opened');
+              // console.warn("FADE OUT menu", $element)
            }
          });
        });
@@ -283,26 +287,15 @@
      });

      /*
-      * Finish positioning the menu then display the menu
+      * Toggle the menu when clicking on an existing ngram or a free selection
      */
      $(".text-container").mouseup(function(e){
        $(".text-container").unbind("mousemove", positionMenu);
        $rootScope.$emit("positionAnnotationMenu", e.pageX, e.pageY);
-        toggleSelectionHighlight(selection.toString().trim());
-        toggleMenu(null, selection.toString().trim());
-      });

-      /*
-      * Toggle the menu when clicking on an existing ngram keyword
-      *
-      *  £TODO test: apparently this is never used ?
-      *  (superseded by TextSelectionController.onClick)
-      */
-      $(".text-container").delegate(':not("#selection")', "click", function(e) {
-        // if ($(e.target).hasClass("keyword-inline")) return;
        positionMenu(e);
-        toggleSelectionHighlight(selection.toString().trim());
-        toggleMenu(null, selection.toString().trim());
+        // console.warn("calling toggleMenu from *mouseup*")
+        toggleMenu(null, selectionObj.toString().trim());
      });

      $rootScope.$on("positionAnnotationMenu", positionElement);
@@ -322,9 +315,11 @@
        $rootScope.makeChainedCalls(0,   todoCrudCalls,   $rootScope.refresh)
        // syntax: (step_to_run_first,   list_of_steps,     lastCallback)

-        // hide the highlighted text and the menu element
-        $(".text-panel").removeClass("selection");
+        // hide the menu element
        $element.fadeOut(100);
+
+        // the highlighted text hides itself when deselected
+        // (thx to browser and css ::selection)
      };
    }
  ]);
@@ -407,10 +402,68 @@
        var template = templateBegin + templateEnd;
        var templateBeginRegexp = "<span ng-controller='TextSelectionController' ng-click='onClick\(\$event\)' class='keyword-inline'>";

-        var startPattern = "\\b((?:"+templateBeginRegexp+")*";
+        var startPattern = "(\\W|^)((?:"+templateBeginRegexp+")*";
        var middlePattern = "(?:<\/span>)*\\s(?:"+templateBeginRegexp+")*";
        var middlePattern = " ";
-        var endPattern = "(?:<\/span>)*)\\b";
+        var endPattern = "(?:<\/span>)*)(?=\\W|$)";
+
+        // --------------------------------------------------------------------------------
+        // Remarks about /\b/ and /(\W|^)/ and /(?=\W|$)/  etc.
+        //
+        // -----------------
+        // 1) we need to match entire words only
+        //
+        //  ex: "the manifestation manifest".match(/manifest/g)
+        //
+        //      => not good because it would hilight the substr
+        //         inside 2nd word "the manifestation manifest"
+        //                              ^^^^^^^^      ^^^^^^^^
+        //
+        //   so in this situation one usually uses \b (boundary)
+        //
+        //  ex: "the manifestation manifest".match(/\bmanifest\b/g)
+        //
+        //       ok: now only 3rd word is highlighted:
+        //               "the manifestation manifest"
+        //                                  ^^^^^^^^
+        // -----------------
+        //
+        // 2) but we can't really use boundary \b when we have accented chars
+        // ex:
+        //  no accent: "la moitié".match(/la/)         => ["la"]
+        //             "la moitié".match(/\bla\b/)     => ["la"]
+        //
+        //  but      "la moitié".match(/moitié/)     => ["moitié"]
+        //           "la moitié".match(/\bmoitié\b/) => []           <~~~ problem !
+        //
+        // cf. stackoverflow.com/questions/23458872/javascript-regex-word-boundary-b-issue
+        //     stackoverflow.com/questions/2881445/utf-8-word-boundary-regex-in-javascript
+        // -----------------
+        //
+        // 3) normally the typical replacement for \b would be:
+        //      - at start of string: /(?<=\W|^)/  (lookbehind boundary)
+        //      - at end  of string:  /(?=\W|$)/   (lookahead boundary)
+        //
+        //   ...
+        //    but lookbehind not supported in js !! (sept 2016)
+        //    cf. stackoverflow.com/questions/30118815
+        // -----------------
+        //
+        // 4) so in conclusion we will use this strategy:
+        //
+        //      - at start of string:  /(\W|^)/        (boundary, may capture ' ' or '' into $1)
+        //      - for the html+word:   /<aa>bla</aa>/  (same pattern as before)
+        //      - at end  of string:   /(?=\W|$)/      (lookahead boundary)
+        //      - in replacement:     $1+anchor
+        //
+        //  => This way if $1 was ' ' (or other non word char),
+        //       then we re-add the char that we are replacing,
+        //     and if $1 was '' (beginning of str)
+        //       then we re-add nothing ;) )
+        //
+        // ex: "la moitié".replace(/(\s|^)moitié(?=\s|$)/, '$1hello') => "la hello"
+        //     "moitié la".replace(/(\s|^)moitié(?=\s|$)/, '$1hello') => "hello la"
+        // ---------------------------------------------------------------------------------

        // hash of flags filled in first pass loop : (== did annotation i match ?)
        var isDisplayedIntraText = {};
@@ -453,8 +506,8 @@
          //  var myPattern = new RegExp("\\b"+escapeRegExp(annotation.text)+"\\b", 'igm');
          // previously:
              var words = annotation.text.split(" ").map(escapeRegExp);
-              var myPattern = new RegExp(startPattern + words.join(middlePattern) + endPattern, 'gmi');

+              var myPattern = new RegExp(startPattern + words.join(middlePattern) + endPattern, 'gmi');

          // -------------------------------------------
          // replace in text: matched annots by anchors
@@ -472,6 +525,7 @@
                  // £dbgcount here unnecessary nbMatches (can go straight to ICI)
                  var matches = eltLongtext.match(myPattern)
                  var nbMatches = matches ? eltLongtext.match(myPattern).length : 0
+
                  if (nbMatches > 0) {
                      k += nbMatches ;

@@ -480,7 +534,7 @@
                      l ++ ;
                  // ------------------------------------------------------------
                      // ICI we update each time
-                      textMapping[eltId] = eltLongtext.replace(myPattern, myAnchor);
+                      textMapping[eltId] = eltLongtext.replace(myPattern, "$1"+myAnchor);

                      // ex longtext -- "Background Few previous studies have
                      //                 examined non-wealth-based inequalities etc"

--- a/annotations/static/annotations/http.js
+++ b/annotations/static/annotations/http.js
@@ -90,17 +90,20 @@
  * MainApiAddNgramHttpService: Create and index a new ngram
  * ===========================
  * route: PUT api/ngrams?text=mynewngramstring&corpus=corpus_id
-  * ------
+  *
+  * NB it also checks if ngram exists (returns the preexisting id)
+  *    and if it has a mainform/group (via 'testgroup' option)
+  *                                   (useful if we add it to a list afterwards)
  *
  */
  http.factory('MainApiAddNgramHttpService', function($resource) {
    return $resource(
       // adding explicit "http://" b/c this a cross origin request
      'http://' + window.GARG_ROOT_URL
-                + "/api/ngrams?text=:ngramStr&corpus=:corpusId",
+                + "/api/ngrams?text=:ngramStr&corpus=:corpusId&testgroup",
      {
        ngramStr: '@ngramStr',
-        corpusId: '@corpusId'
+        corpusId: '@corpusId',
      },
      {
        put: {

--- a/annotations/static/annotations/ngramlist.js
+++ b/annotations/static/annotations/ngramlist.js
@@ -141,9 +141,9 @@
            crudCallsToMake = [
                {'service': MainApiAddNgramHttpService, 'action': 'put',
                'params' : {'ngramStr':value, corpusId: $rootScope.corpusId},
-                'dataPropertiesToCache': ['id'] },
+                'dataPropertiesToCache': ['id', 'group'] },
                {'service': MainApiChangeNgramHttpService, 'action': 'put',
-                'params' : {'listId':tgtListId, 'ngramIdList': {'fromCache': 'id'} } }
+                'params' : {'listId':tgtListId, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
            ];
            break;

@@ -151,9 +151,9 @@
            crudCallsToMake = [
                {'service': MainApiAddNgramHttpService, 'action': 'put',
                 'params' : {'ngramStr':value, corpusId: $rootScope.corpusId},
-                 'dataPropertiesToCache': ['id'] },
+                 'dataPropertiesToCache': ['id', 'group'] },
                {'service': MainApiChangeNgramHttpService, 'action': 'put',
-                 'params' : {'listId':tgtListId, 'ngramIdList': {'fromCache': 'id'} } }
+                 'params' : {'listId':tgtListId, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
            ];
            break;

@@ -161,11 +161,11 @@
            crudCallsToMake = [
                {'service': MainApiAddNgramHttpService, 'action': 'put',
                'params' : {'ngramStr':value, corpusId: $rootScope.corpusId},
-                'dataPropertiesToCache': ['id'] },
+                'dataPropertiesToCache': ['id', 'group'] },
                {'service': MainApiChangeNgramHttpService, 'action': 'put',
-                 'params' : {'listId':$rootScope.listIds.MAINLIST, 'ngramIdList': {'fromCache': 'id'} } },
+                 'params' : {'listId':$rootScope.listIds.MAINLIST, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } },
                {'service': MainApiChangeNgramHttpService, 'action': 'put',
-                'params' : {'listId':tgtListId, 'ngramIdList': {'fromCache': 'id'} } }
+                'params' : {'listId':tgtListId, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
            ];
            break;
      }

--- a/gargantext/util/toolchain/ngrams_addition.py
+++ b/gargantext/util/toolchain/ngrams_addition.py
@@ -19,6 +19,7 @@ procedure:

 from gargantext.models   import Ngram, Node, NodeNgram
 from gargantext.util.db  import session, bulk_insert
+from gargantext.util.db  import bulk_insert_ifnotexists # £TODO debug
 from sqlalchemy          import distinct
 from re                  import findall, IGNORECASE

@@ -41,20 +42,13 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
    @param keys: the hyperdata fields to index
    """

-    # check the ngrams we won't process (those that were already indexed)
-    indexed_ngrams_subquery = (session
-                                .query(distinct(NodeNgram.ngram_id))
-                                .join(Node, Node.id == NodeNgram.node_id)
-                                .filter(Node.parent_id == corpus.id)
-                                .filter(Node.typename == 'DOCUMENT')
-                                .subquery()
-                                )
-
-    # retrieve the ngrams from our list, filtering out the already indexed ones
+    # retrieve *all* the ngrams from our list
+    # (even if some relations may be already indexed
+    #  b/c they were perhaps not extracted in all docs
+    #   => we'll use already_indexed later)
    todo_ngrams = (session
                    .query(Ngram)
                    .filter(Ngram.id.in_(ngram_ids))
-                    .filter(~ Ngram.id.in_(indexed_ngrams_subquery))
                    .all()
                    )

@@ -90,22 +84,49 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
                    else:
                        node_ngram_to_write[doc.id][ngram.id] += n_occs

+    # debug
+    # print("new node_ngrams before filter:", node_ngram_to_write)
+
+    # check the relations we won't insert (those that were already indexed)
+    # NB costly but currently impossible with bulk_insert_ifnotexists
+    #                                         b/c double uniquekey
+    already_indexed = (session
+                        .query(NodeNgram.node_id, NodeNgram.ngram_id)
+                        .join(Node, Node.id == NodeNgram.node_id)
+                        .filter(Node.parent_id == corpus.id)
+                        .filter(Node.typename == 'DOCUMENT')
+                        .all()
+                        )
+    filter_out = {(nd_id,ng_id) for (nd_id,ng_id) in already_indexed}
+    # POSSIBLE update those that are filtered out if wei_previous != wei
+
    # integrate all at the end
    my_new_rows = []
    add_new_row = my_new_rows.append
    for doc_id in node_ngram_to_write:
        for ngram_id in node_ngram_to_write[doc_id]:
-            wei = node_ngram_to_write[doc_id][ngram_id]
-            add_new_row([doc_id, ngram_id, wei])
+            if (doc_id, ngram_id) not in filter_out:
+                wei = node_ngram_to_write[doc_id][ngram_id]
+                add_new_row([doc_id, ngram_id, wei])

    del node_ngram_to_write

+    # debug
+    # print("new node_ngrams after filter:", my_new_rows)
+
    bulk_insert(
        table = NodeNgram,
        fields = ('node_id', 'ngram_id', 'weight'),
        data = my_new_rows
    )

+    # bulk_insert_ifnotexists(
+    #     model = NodeNgram,
+    #     uniquekey = ('node_id','ngram_id'),        <= currently impossible
+    #     fields = ('node_id', 'ngram_id', 'weight'),
+    #     data = my_new_rows
+    # )
+
    n_added = len(my_new_rows)
    print("index_new_ngrams: added %i new NodeNgram rows" % n_added)


--- a/gargantext/views/api/ngrams.py
+++ b/gargantext/views/api/ngrams.py
@@ -2,8 +2,8 @@ from gargantext.util.http       import ValidationException, APIView \
                                     , get_parameters, JsonHttpResponse\
                                     , HttpResponse
 from gargantext.util.db         import session, func
-from gargantext.util.db_cache   import cache 
-from gargantext.models          import Node, Ngram, NodeNgram
+from gargantext.util.db_cache   import cache
+from gargantext.models          import Node, Ngram, NodeNgram, NodeNgramNgram
 from sqlalchemy.orm             import aliased
 from re                         import findall

@@ -21,7 +21,7 @@ class ApiNgrams(APIView):
        """
        Used for analytics
        ------------------
-        
+
        Get ngram listing + counts in a given scope
        """
        # parameters retrieval and validation
@@ -83,24 +83,30 @@ class ApiNgrams(APIView):

         1 - checks user authentication before any changes

-         2 - adds the ngram to Ngram table in DB
+         2 - checks if ngram to Ngram table in DB
+              if yes returns ngram_id and optionally mainform_id
+              otherwise continues
+
+         3 - adds the ngram to Ngram table in DB

-         3 - (if corpus param is present)
+         4 - (if corpus param is present)
             adds the ngram doc counts to NodeNgram table in DB
             (aka "index the ngram" throught the docs of the corpus)

-         4 - returns json with:
-             'msg'   => a success msg 
+         5 - returns json with:
+             'msg'   => a success msg
             'text'  => the initial text content
             'term'  => the normalized text content
             'id'    => the new ngram_id
             'count' => the number of docs with the ngram in the corpus
                        (if corpus param is present)
+             'group' => the mainform_id if applicable

        possible inline parameters
        --------------------------
        @param    text=<ngram_string>         [required]
        @param    corpus=<CORPUS_ID>          [optional]
+        @param    testgroup (true if present) [optional, requires corpus]
        """

        # 1 - check user authentication
@@ -122,6 +128,9 @@ class ApiNgrams(APIView):
                                        It requires a "text" parameter,\
                                        for instance /api/ngrams?text=hydrometallurgy')

+        if ('testgroup' in params) and (not ('corpus' in params)):
+            raise ValidationException("'testgroup' param requires 'corpus' param")
+
        # if we have a 'corpus' param (to do the indexing)...
        do_indexation = False
        if 'corpus' in params:
@@ -143,10 +152,33 @@ class ApiNgrams(APIView):
        try:
            log_msg = ""
            ngram_id = None
+            mainform_id = None
+
            preexisting = session.query(Ngram).filter(Ngram.terms==ngram_str).first()
+
            if preexisting is not None:
                ngram_id = preexisting.id
                log_msg += "ngram already existed (id %i)\n" % ngram_id
+
+                # in the context of a corpus we can also check if has mainform
+                # (useful for)
+                if 'testgroup' in params:
+                    groupings_id = (session.query(Node.id)
+                                           .filter(Node.parent_id == corpus_id)
+                                           .filter(Node.typename == 'GROUPLIST')
+                                           .first()
+                                    )
+                    had_mainform = (session.query(NodeNgramNgram.ngram1_id)
+                                          .filter(NodeNgramNgram.node_id == groupings_id)
+                                          .filter(NodeNgramNgram.ngram2_id == preexisting.id)
+                                          .first()
+                                    )
+                    if had_mainform:
+                        mainform_id = had_mainform[0]
+                        log_msg += "ngram had mainform (id %i) in this corpus" % mainform_id
+                    else:
+                        log_msg += "ngram was not in any group for this corpus"
+
            else:
                # 2 - insert into Ngrams
                new_ngram = Ngram(terms=ngram_str, n=ngram_size)
@@ -165,6 +197,7 @@ class ApiNgrams(APIView):
                'text': original_text,
                'term': ngram_str,
                'id' : ngram_id,
+                'group' : mainform_id,
                'count': n_added if do_indexation else 'no corpus provided for indexation'
                }, 200)