Merge branch 'refactoring' of ssh://delanoe.org:1979/gargantext into c24b-cern

[Merge OK]

Merge branch 'refactoring' of ssh://delanoe.org:1979/gargantext into c24b-cern
[Merge OK]
f4599719 · c24b · 3ba747fa · 5e7c5603 · f4599719 · f4599719
Commit f4599719 authored May 12, 2016 by c24b
13 changed files
--- a/annotations/static/annotations/activelists.js
+++ b/annotations/static/annotations/activelists.js
@@ -10,10 +10,14 @@
      $scope.activeListsChange = function() {
        var selected = $('.selectpicker option:selected').val();
        var newActive = {};
-        $('.selectpicker option:selected').each(function(item, value) {
-          var id = value.id.split("---", 2)[1];
-          newActive[id] = value.value;
+        $('.selectpicker option:selected').each(function(item, opt) {
+          // ex opt:
+          // <option id="list---748" value="MAINLIST">MAINLIST</option>
+          var id = opt.id.split("---", 2)[1];
+          newActive[id] = opt.value;
        });
+
+        // ex: {745: "MAINLIST", 748: "MAPLIST"}
        $rootScope.activeLists = newActive;
      };


--- a/annotations/static/annotations/app.css
+++ b/annotations/static/annotations/app.css
@@ -9,7 +9,8 @@
 .MAPLIST {
  color: black;
  /* green */
-  background-color: rgba(60, 118, 61, .7);
+  background-color: rgba(23, 255, 189, .7);
+  /* background-color: rgba(60, 118, 61, .7); */
  cursor: pointer;
 }


--- a/annotations/static/annotations/highlight.js
+++ b/annotations/static/annotations/highlight.js
@@ -441,15 +441,12 @@
        var middlePattern = " ";
        var endPattern = "(?:<\/span>)*)\\b";

-        console.log("highlight annotations length: " + annotations.length)
+        // hash of flags filled in first pass loop : (== did annotation i match ?)
+        var isDisplayedIntraText = {};

-        var sortedSizeAnnotations = lengthSort(annotations, "text"),
-            extraNgramList = angular.copy($rootScope.extraNgramList);
+        console.log("highlight annotations length: " + annotations.length)

-        // reinitialize an empty list
-        extraNgramList = angular.forEach(extraNgramList, function(name, id) {
-          extraNgramList[id] = [];
-        });
+        var sortedSizeAnnotations = lengthSort(annotations, "text")

        // rl: £dbg counters
        var i = 0 ;
@@ -506,6 +503,9 @@
                  var nbMatches = matches ? eltLongtext.match(myPattern).length : 0
                  if (nbMatches > 0) {
                      k += nbMatches ;
+
+                      // remember that this annotation.text matched
+                      isDisplayedIntraText[annotation.uuid] = annotation
                      l ++ ;
                  // ------------------------------------------------------------
                      // ICI we update each time
@@ -549,81 +549,72 @@

          // highlight anchors as html spans
          // -------------------------------
-          angular.forEach(textMapping, function(text, eltId) {
+          angular.forEach(textMapping, function(textContent, eltId) {
            //   console.log(anchorPattern)
-            if(text) {
+            if(textContent) {
              textMapping[eltId] = replaceAnchorByTemplate(
-                  text,
+                  textContent,
                  annotation,
                  template,
                  anchorPattern);
            }
          });
-
-          // rloth: for now let's show *all* ngrams of the active list
-          //        in the left side
-          extraNgramList[annotation.list_id] = extraNgramList[annotation.list_id].concat(annotation);
        });

+        // let's show just the ngrams that matched
+        //        in the left side
+        var sortedDisplayedKeys = Object.keys(isDisplayedIntraText).sort()
+                                                        // sorts on ngram_id

+        // new update ngramsInPanel
+        angular.forEach(sortedDisplayedKeys, function(id) {
+          var the_annot = isDisplayedIntraText[id] ;
+          var the_list_id = the_annot.list_id ;
+          $rootScope.ngramsInPanel[the_list_id].push(the_annot)
+        });

+        // debug
+        //console.warn("$rootScope.ngramsInPanel :")
+        //console.warn($rootScope.ngramsInPanel)

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-        // update extraNgramList
-        $rootScope.extraNgramList = angular.forEach(extraNgramList, function(name, id) {
-          extraNgramList[id] = lengthSort(extraNgramList[id], 'text');
-        });
        // return the object of element ID with the corresponding HTML
        return textMapping;
      }

+
+      /*
+      * main refresh
+      */
      $rootScope.refreshDisplay = function() {
        console.log("annotations.highlight.refreshDisplay()")
        if ($rootScope.annotations === undefined) return;
        if ($rootScope.activeLists === undefined) return;
        if (_.keys($rootScope.activeLists).length === 0) return;

-        // initialize extraNgramList
-        var extraNgramList = {};
-        $rootScope.extraNgramList = angular.forEach($rootScope.activeLists, function(name, id) {
-          this[id] = [];
-        }, extraNgramList);
-        $rootScope.extraNgramList = extraNgramList;
+        // initialize ngramsInPanel
+        // ------------------------
+        //  $rootScope.ngramsInPanel = {
+        //    activelist1_id : [
+        //            annotation_a,
+        //            annotation_b,
+        //            annotation_c
+        //    ] ,
+        //    activelist2_id : [
+        //            annotation_x,
+        //            annotation_y,
+        //            annotation_z
+        //    ] ,
+        //      ....
+        //    }
+        //
+        var ngramsInPanel = {};
+        $rootScope.ngramsInPanel = angular.forEach($rootScope.activeLists, function(name, list_id) {
+          this[list_id] = [];
+        }, ngramsInPanel);
+        $rootScope.ngramsInPanel = ngramsInPanel;

        /*
-        * Transform text into HTML with higlighted ngrams
+        * Transform text into HTML with higlighted ngrams via compileNgramsHtml
        */
        var result = compileNgramsHtml(
          $rootScope.annotations,
@@ -643,6 +634,8 @@
          angular.element(elt).replaceWith($compile(elt)($rootScope.$new(true)));
        });
      }
+
+
      /*
      * Listen changes on the ngram data
      */

--- a/annotations/static/annotations/ngramlist.js
+++ b/annotations/static/annotations/ngramlist.js
@@ -30,7 +30,7 @@
              // (then used for left-side flatlist AND inline annots)
              $rootScope.annotations = data[$rootScope.corpusId.toString()][$rootScope.docId.toString()];
              // TODO £NEW : lookup obj[list_id][term_text] = {terminfo}
-              // $rootScope.lookup = 
+              // $rootScope.lookup =
              $rootScope.refreshDisplay();
            },
            function(data) {
@@ -49,7 +49,7 @@
  annotationsAppNgramList.controller('NgramListPaginationController',
    ['$scope', '$rootScope', function ($scope, $rootScope) {

-    $rootScope.$watchCollection('extraNgramList', function (newValue, oldValue) {
+    $rootScope.$watchCollection('ngramsInPanel', function (newValue, oldValue) {
      $scope.currentListPage = 0;
      $scope.pageSize = 15;

@@ -62,8 +62,8 @@
      };

      $scope.totalListPages = function(listId) {
-        if ($rootScope.extraNgramList[listId] === undefined) return 0;
-        return Math.ceil($rootScope.extraNgramList[listId].length / $scope.pageSize);
+        if ($rootScope.ngramsInPanel[listId] === undefined) return 0;
+        return Math.ceil($rootScope.ngramsInPanel[listId].length / $scope.pageSize);
      };
    });
  }]);
@@ -93,12 +93,12 @@

      var value = angular.element(inputEltId).val().trim();
      if (value === "") return;
-      
+
      // £TEST locally check if already in annotations NodeNgrams ------
-      
+
      // $rootScope.annotations = array of ngram objects like:
      // {"list_id":805,"occurrences":2,"uuid":9386,"text":"petit échantillon"}
-      
+
      console.log('looking for "' + value + '" in list:' + listId)
      var already_in_list = false ;
      angular.forEach($rootScope.annotations, function(annot,i) {
@@ -112,7 +112,7 @@
      );
      if (already_in_list) { return ; }
      // ---------------------------------------------------------------
-      
+
      // will check if there's a preexisting ngramId for this value
      // TODO: if maplist => also add to miam
      NgramHttpService.post(
@@ -136,11 +136,11 @@
              },
              function(data) {
                $rootScope.annotations = data[$rootScope.corpusId.toString()][$rootScope.docId.toString()];
-              
+
                // TODO £NEW : lookup obj[list_id][term_text] = {terminfo}
-                // $rootScope.lookup = 
-                
-                
+                // $rootScope.lookup =
+
+
                $rootScope.refreshDisplay();
              },
              function(data) {

--- a/annotations/templates/annotations/main.html
+++ b/annotations/templates/annotations/main.html
@@ -32,12 +32,12 @@
            </ul>
            <div class="tab-content">
              <div ng-controller="NgramListPaginationController" ng-repeat="(listId, listName) in activeLists" ng-class="{active: $first == true}" class="tab-pane" id="tab-{[{listId}]}">
-                <div ng-if="extraNgramList[listId].length == 0" class="alert alert-info" role="alert">
+                <div ng-if="ngramsInPanel[listId].length == 0" class="alert alert-info" role="alert">
                  Input any keyword you want to link to this article and the list named '{[{listName}]}'
                </div>

                <ul class="list-group words-list clearfix">
-                  <li ng-repeat="keyword in extraNgramList[listId] | startFrom:currentListPage * pageSize | limitTo:pageSize" class="keyword-group-item">
+                  <li ng-repeat="keyword in ngramsInPanel[listId] | startFrom:currentListPage * pageSize | limitTo:pageSize" class="keyword-group-item">
                    <div ng-controller="NgramController" keyword-template class="keyword-container"></div>
                  </li>
                </ul>

--- a/annotations/views.py
+++ b/annotations/views.py
@@ -14,7 +14,7 @@ from rest_framework.authentication import SessionAuthentication, BasicAuthentica

 # 2016-03-24: refactoring, new paths
 from gargantext.models.ngrams import Node, NodeNgram, Ngram
-from gargantext.util.db       import session
+from gargantext.util.db       import session, aliased
 from gargantext.util.db_cache import cache
 from gargantext.util.http     import requires_auth

@@ -47,16 +47,25 @@ class NgramList(APIView):
        doc_ngram_list = []
        lists = {}

+        corpus_nod = cache.Node[corpus_id]
+        doc_nod = cache.Node[doc_id]
+        scores_nod = corpus_nod.children(typename="OCCURRENCES").first()
+
        for list_type in ['MAINLIST', 'MAPLIST', 'STOPLIST']:
-            corpus_nod = cache.Node[corpus_id]
            list_nod = corpus_nod.children(typename=list_type).first()
            list_id = list_nod.id
            lists["%s" % list_id] = list_type

+            ListsTable = aliased(NodeNgram)
+
+            # doc_nod.ngrams iff we just need the occurrences in the doc (otherwise do manually)
+            q = doc_nod.ngrams.join(ListsTable).filter(ListsTable.node_id == list_id)
+
            # add to results
-            doc_ngram_list += [(obj.id, obj.terms, w, list_id) for (w,obj) in list_nod.ngrams.all()]
+            doc_ngram_list += [(obj.id, obj.terms, w, list_id) for (w,obj) in q.all()]

-        print("annotations.views.NgramList.doc_ngram_list: ", doc_ngram_list)
+        # debug
+        # print("annotations.views.NgramList.doc_ngram_list: ", doc_ngram_list)
        data = { '%s' % corpus_id : {
            '%s' % doc_id :
                [

--- a/gargantext/constants.py
+++ b/gargantext/constants.py
@@ -212,17 +212,17 @@ RESOURCETYPES = [


 # linguistic extraction parameters ---------------------------------------------
-DEFAULT_TFIDF_CUTOFF_RATIO      = .45        # MAINLIST maximum terms in %
+DEFAULT_TFIDF_CUTOFF_RATIO      = .75        # MAINLIST maximum terms in %

-DEFAULT_TFIDF_HARD_LIMIT        = 750        # MAINLIST maximum terms abs
+DEFAULT_TFIDF_HARD_LIMIT        = 5000       # MAINLIST maximum terms abs
                                             # (makes COOCS larger ~ O(N²) /!\)

 DEFAULT_COOC_THRESHOLD          = 2          # inclusive minimum for COOCS coefs
                                             # (makes COOCS more sparse)

-DEFAULT_MAPLIST_MAX             = 300        # MAPLIST maximum terms
+DEFAULT_MAPLIST_MAX             = 350        # MAPLIST maximum terms

-DEFAULT_MAPLIST_MONOGRAMS_RATIO = .5         # part of monograms in MAPLIST
+DEFAULT_MAPLIST_MONOGRAMS_RATIO = .15         # part of monograms in MAPLIST

 DEFAULT_MAX_NGRAM_LEN           = 7          # limit used after POStagging rule
                                             # (initial ngrams number is a power law of this /!\)

--- a/gargantext/util/parsers/CSV.py
+++ b/gargantext/util/parsers/CSV.py
@@ -124,7 +124,8 @@ class CSVParser(Parser):
                for columnum in range( Coords["column"],len(tokens) ):
                    data = tokens[columnum]
                    RecordDict[ Headers_Int2Str[columnum] ] = data
-                hyperdata_list.append( RecordDict )
+                if len(RecordDict.keys())>0:
+                    hyperdata_list.append( RecordDict )
        # # = = = = [ / Reading the whole CSV and saving ] = = = = #

        return hyperdata_list
--- a/gargantext/util/toolchain/list_stop.py
+++ b/gargantext/util/toolchain/list_stop.py
@@ -27,10 +27,10 @@ def is_stop_word(ngram, stop_words=None):
            # , "(.*)(\.)(.*)"         trop fort (enlève les sigles !)
            , "(.*)(\,)(.*)"
            , "(.*)(< ?/?p ?>)(.*)"       # marques de paragraphes
-            , "(.*)(study)(.*)"
+            , "(.*)(study|elsevier)(.*)"
            , "(.*)\b(xx|xi|xv)\b(.*)"
            , "(.*)(result)(.*)"
-            , "(.*)(année|nombre|moitié)(.*)"
+            , "(.*)(year|année|nombre|moitié)(.*)"
            , "(.*)(temps)(.*)"
            , "(.*)(%)(.*)"
            , "(.*)(\{)(.*)"

--- a/gargantext/views/api/nodes.py
+++ b/gargantext/views/api/nodes.py
@@ -84,9 +84,10 @@ class NodeListResource(APIView):
            response = HttpResponse(content_type='text/csv')
            response['Content-Disposition'] = 'attachment; filename="Gargantext_Corpus.csv"'

-            writer = csv.writer(response)
+            writer = csv.writer(response, delimiter='\t')

-            keys =  [ 'title'   , 'journal', 'publication_date'
+            keys =  [ 'title'   , 'journal'
+                    , 'publication_year', 'publication_month', 'publication_day'
                    , 'abstract', 'authors']

            writer.writerow(keys)

--- a/static/js/gargantext/Docs_dyna_chart_and_table.js
+++ b/static/js/gargantext/Docs_dyna_chart_and_table.js
@@ -170,7 +170,7 @@ function toggleFavstatus (rec_id) {
    var myHttpAction = statusBefore ? 'DELETE' : 'PUT'

    $.ajax({
-      url: 'http://localhost:8000/api/nodes/'+corpus_id+'/favorites?docs='+doc_id,
+      url: window.location.origin + '/api/nodes/'+corpus_id+'/favorites?docs='+doc_id,
      type: myHttpAction,
      beforeSend: function(xhr) {
        xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
@@ -600,7 +600,7 @@ $.ajax({
  success: function(maindata){
      // unfortunately favorites info is in a separate request (other nodes)
      $.ajax({
-        url: 'http://localhost:8000/api/nodes/'+corpus_id+'/favorites',
+        url: window.location.origin + '/api/nodes/'+corpus_id+'/favorites',
        success: function(favdata){
          // initialize favs lookup
          for (var i in favdata['favdocs']) {

--- a/templates/pages/main/about.html
+++ b/templates/pages/main/about.html
@@ -41,7 +41,7 @@
                            <li>
                                Version 3.0.0
                                <ul>
-                                    <li>[NAME] Blue Jasmine</li>
+                                    <li>[NAME] Blue Jasmin</li>
                                    <li>[CODE] Refactored</li>
                                    <li>[DATABASE] New schema</li>
                                </ul>

--- a/templates/pages/menu.html
+++ b/templates/pages/menu.html
@@ -151,15 +151,17 @@
                            <br>
                            <br>
                                <div class="row">
-                                    <div class="col-md-3">
-                                            <h3>
-                                                <a href="/projects/{{project.id}}">
-                                                    <span class="glyphicon glyphicon-book" aria-hidden="true"></span>
-                                                    {{ project.name | truncatechars:15}}
-                                                </a>
-                                            </h3>
+                                    <h3>
+                                        <a href="/projects/{{project.id}}">
+                                            <span class="glyphicon glyphicon-book" aria-hidden="true"></span>
+                                            {{ project.name | truncatechars:50}}
+                                        </a>
+                                    </h3>
+                                </div>
+                                <div class="row">
+                                    <div class="col-md-1">
                                    </div>
-                                    <div class="col-md-5">
+                                    <div class="col-md-6">
                                            <h3>
                                                <span class="glyphicon glyphicon-cd" aria-hidden="true"></span>
                                                {{ resourcename | truncatechars:20 }}
@@ -167,23 +169,20 @@
                                            <h3>
                                                <span class="glyphicon glyphicon-file" aria-hidden="true"></span>
                                                {{ corpus.name | truncatechars:20 }}
-                                                <a class="btn btn-default" role="button" href="/api/nodes?parent_id={{corpus.id}}&types[]=DOCUMENT&pagination_limit=100000&formated=csv">
+                                                <a class="btn btn-primary" role="button" href="/api/nodes?parent_id={{corpus.id}}&types[]=DOCUMENT&pagination_limit=100000&formated=csv">
                                                    <span class="glyphicon glyphicon-download" aria-hidden="true"></span>
                                                </a>
                                            </h3>
                                    
                                    </div>
-                                    <div class="col-md-4">
+                                    <div class="col-md-5">
                                            <h3>
                                                <span class="glyphicon glyphicon-calendar" aria-hidden="true"></span> {{ corpus.date }}
                                            </h3>
                                            <h3>
                                                <span class="glyphicon glyphicon-user" aria-hidden="true"></span>
-                                                Author(s): 
+                                                Author(s): {{ user.username | truncatechars:15}} 
                                            </h3>
-                                            <h4>
-                                                {{ user.username | truncatechars:15}} 
-                                            </h4>
                                    </div>
                                </div>
                        </div>