Merge branch 'refactoring-rom' into refactoring

5c07a501 · delanoe · 8cc92549 · 35c3de76 · 5c07a501 · 5c07a501
Commit 5c07a501 authored Apr 10, 2016 by delanoe
27 changed files
--- a/annotations/romain_infos.md
+++ b/annotations/romain_infos.md
+ANNOTATIONS
+===========
+2016-01
+## Routines de manipulation de ngrams dans les listes
+#### Trajectoire globale des actions choisies
+ 1. angular: ngramlist.js (user input) or highlight.js (user menu controller)
+ 2. angular: http.js configuration object
+    { 'action': 'post', 'listId': miamlist_id, ..}
+ 3. AJAX POST/DELETE
+ 4. "API locale" (=> annotations.views)
+ 5. DB insert/delete
+Remarque:
+Dans le code annotations d'Elias, il y a une "API locale" qui transmet les actions client vers le serveur.
+  => l'interconnexion est configurée pour angular dans annotations/static/annotations/app.js qui lance son propre main sur la fenêtre en prenant les paramètres depuis l'url et en s'isolant de django
+  => les routes amont sont définies pour django dans annotations.urls et reprises pour angular dans http.js
+#### Par ex: l'étape AJAX pour suppression
+`curl -XDELETE http://localhost:8000/annotations/lists/7129/ngrams/4497`
+via annotations.views.NgramEdit.as_view())
+#### ajout d'un ngram
+```
+curl -XPOST http://localhost:8000/annotations/lists/1866/ngrams/create \
+     -H "Content-Type: application/json" \
+     -d '{"text":"yooooooooo"}' > response_to_ngrams_create.html
+```
+## Points d'interaction côté client (GUI)
+Add Ngram via dialog box :
+ - controller:
+   ngramlist.annotationsAppNgramList.controller('NgramInputController')
+ - effect:
+   1. NgramHttpService.post()
+Add Ngram via select new + menu
+ - controller: 
+   highlight.annotationsAppHighlight.controller('TextSelectionMenuController')
+     1. toggleMenu (sets action = X) 
+     2. onMenuClick
+ - effect:
+   1. NgramHttpService[action]
--- a/annotations/static/annotations/activelists.js
+++ b/annotations/static/annotations/activelists.js
@@ -24,18 +24,21 @@
        });
      });
+      // FIXME: est-ce qu'on ne pourrait pas directement utiliser lists
+      // au lieu  de recopier dans allListsSelect ?
      $rootScope.$watchCollection('lists', function (newValue, oldValue) {
        if (newValue === undefined) return;
        // reformat lists to allListsSelect
        var allListsSelect = [];
-        console.log($rootScope.lists)
+        // console.log($rootScope.lists)
        angular.forEach($rootScope.lists, function(value, key) {
          this.push({
            'id': key,
            'label': value
          });
-          // initialize activeLists with the MiamList by default
+          // initialize activeLists with the MAPLIST by default
-          if (value == 'MAINLIST') {
+          if (value == 'MAPLIST') {
            $rootScope.activeLists = {};
            $rootScope.activeLists[key] = value;
          }
@@ -45,7 +48,7 @@
        $timeout(function() {
          $('.selectpicker').selectpicker();
-          $('.selectpicker').selectpicker('val', ['MAINLIST']);
+          $('.selectpicker').selectpicker('val', ['MAPLIST']);
        });
      });

--- a/annotations/static/annotations/app.css
+++ b/annotations/static/annotations/app.css
@@ -6,14 +6,23 @@
 */
+.MAPLIST {
+  color: black;
+  /* green */
+  background-color: rgba(60, 118, 61, .7);
+  cursor: pointer;
+}
 .MAINLIST {
  color: black;
-  background-color: rgba(60, 118, 61, 0.5);
+  /* background-color: rgba(60, 118, 61, 0.5); */
+  background-color: orange;
  cursor: pointer;
 }
 .STOPLIST {
  color: black;
+  /* grey */
  background-color: rgba(169, 68, 66, 0.2);
  cursor: pointer;
 }

--- a/annotations/static/annotations/document.js
+++ b/annotations/static/annotations/document.js
@@ -8,6 +8,8 @@
      // dataLoading = signal pour afficher wait
      $scope.dataLoading = true ;
+      console.log("annotations.document.DocController.DocumentHttpService.get():before")
      $rootScope.documentResource = DocumentHttpService.get(
        {'docId': $rootScope.docId},
@@ -21,6 +23,7 @@
          $rootScope.docId = data.id;
          $rootScope.full_text = data.full_text;
          $rootScope.abstract_text = data.abstract_text;
+          console.log("annotations.document.DocController.getannotations")
          // GET the annotationss
          NgramListHttpService.get(
            {

--- a/annotations/static/annotations/highlight.js
+++ b/annotations/static/annotations/highlight.js
--- a/annotations/static/annotations/http.js
+++ b/annotations/static/annotations/http.js
@@ -8,7 +8,28 @@
    $httpProvider.defaults.xsrfCookieName = 'csrftoken';
  }]);
  /*
-  * Read Document
+  * DocumentHttpService: Read Document
+  * ===================
+  *
+  * route: annotations/documents/@d_id
+  * ------
+  *
+  * exemple:
+  * --------
+  * {
+  *   "id": 556,
+  *   "publication_date": "01/01/66",
+  *   "title": "Megalithic astronomy: Indications in standing stones",
+  *   "abstract_text": "An account is given of a number of surveys of
+  *                   stone circles, alignments, etc., found in Britain.
+  *                   The geometry of the rings is discussed in so far
+  *                   as it affects the determination of the azimuths
+  *                   to outliers and other circles.",
+  *   "full_text": null,
+  *   "journal": "Vistas in Astronomy",
+  *   "authors": "A. Thom"
+  * }
+  *
  */
  http.factory('DocumentHttpService', function($resource) {
    return $resource(
@@ -26,7 +47,28 @@
  });
  /*
-  * Read all Ngrams
+  * NgramListHttpService: Read all Ngrams
+  * =====================
+  *
+  * route: annotations/corpora/@c_id/documents/@d_id
+  * ------
+  *
+  * json return format:
+  * -------------------
+  *   corpus_id : {
+  *                lists:   {(list_id:name)+}
+  *                doc_id : [ngrams_objects]+,
+  *               }
+  *
+  * exemple:
+  * --------
+  * "554": {
+  *  "lists": { "558": "StopList",  "564": "MiamList",  "565": "MapList" }
+  *  "556": [{ "uuid": 2368, "occurrences": 1.0, "text": "idea", "list_id": 564 },
+  *          { "uuid": 5031, "occurrences": 1.0, "text": "indications", "list_id": 564},
+  *          { "uuid": 5015, "occurrences": 3.0, "text": "star", "list_id": 565 },
+  *           ... ],
+  *   }
  */
  http.factory('NgramListHttpService', function ($resource) {
    return $resource(
@@ -45,7 +87,21 @@
  });
  /*
-  * Create, modify or delete 1 Ngram
+  * NgramHttpService: Create, modify or delete 1 Ngram
+  * =================
+  *
+  * TODO REACTIVATE IN urls.py
+  *
+  * if new ngram:
+  *   -> ngram_id will be "create"
+  *   -> route: annotations/lists/@node_id/ngrams/create
+  *   -> will land on views.NgramCreate
+  *
+  * else:
+  *   -> ngram_id is a real ngram id
+  *   -> route: annotations/lists/@node_id/ngrams/@ngram_id
+  *   -> will land on views.NgramCreate
+  *
  */
  http.factory('NgramHttpService', function ($resource) {
    return $resource(

--- a/annotations/static/annotations/ngramlist.js
+++ b/annotations/static/annotations/ngramlist.js
@@ -24,7 +24,13 @@
              'docId': $rootScope.docId
            },
            function(data) {
+              // $rootScope.annotations
+              // ----------------------
+              // is the union of all lists, one being later "active"
+              // (then used for left-side flatlist AND inline annots)
              $rootScope.annotations = data[$rootScope.corpusId.toString()][$rootScope.docId.toString()];
+              // TODO £NEW : lookup obj[list_id][term_text] = {terminfo}
+              // $rootScope.lookup = 
              $rootScope.refreshDisplay();
            },
            function(data) {
@@ -87,7 +93,28 @@
      var value = angular.element(inputEltId).val().trim();
      if (value === "") return;
+      // £TEST locally check if already in annotations NodeNgrams ------
+      // $rootScope.annotations = array of ngram objects like:
+      // {"list_id":805,"occurrences":2,"uuid":9386,"text":"petit échantillon"}
+      console.log('looking for "' + value + '" in list:' + listId)
+      var already_in_list = false ;
+      angular.forEach($rootScope.annotations, function(annot,i) {
+        // console.log(i + ' => ' + annot.text + ',' + annot.list_id) ;
+        if (value == annot.text && listId == annot.list_id) {
+          console.log('the term "' + value + '" was already present in list')
+          // no creation
+          already_in_list = true ;
+        }
+      }
+      );
+      if (already_in_list) { return ; }
+      // ---------------------------------------------------------------
+      // will check if there's a preexisting ngramId for this value
+      // TODO: if maplist => also add to miam
      NgramHttpService.post(
        {
          'listId': listId,
@@ -97,6 +124,7 @@
          'text': value
        },
        function(data) {
+          console.warn("refresh attempt");
          // on success
          if (data) {
            angular.element(inputEltId).val("");
@@ -108,6 +136,11 @@
              },
              function(data) {
                $rootScope.annotations = data[$rootScope.corpusId.toString()][$rootScope.docId.toString()];
+                // TODO £NEW : lookup obj[list_id][term_text] = {terminfo}
+                // $rootScope.lookup = 
                $rootScope.refreshDisplay();
              },
              function(data) {

--- a/annotations/templates/annotations/main.html
+++ b/annotations/templates/annotations/main.html
@@ -55,10 +55,14 @@
                </div>
              </div>
            </div>
-            <div>
+            <div class="list-selector">
              <h5>Select lists</h5>
              <select class="selectpicker" multiple ng-change="activeListsChange()" ng-model="lists" ng-controller="ActiveListsController">
-                  <option ng-repeat="item in allListsSelect" id="list---{[{item.id}]}" ng-disabled="{[{ item.label == 'MAINLIST' }]}">{[{item.label}]}</option>
+                  <option ng-repeat="item in allListsSelect" id="list---{[{item.id}]}">{[{item.label}]}</option>
+                  <!-- to disallow unchecking MapList add this into <option> element:  ng-disabled="{[{ item.label == 'MapList' }]}" -->
              </select>
            </div>
          </div>

--- a/annotations/urls.py
+++ b/annotations/urls.py
@@ -10,6 +10,10 @@ urlpatterns = [
    #      publication_date
    #      abstract_text,full_text
    url(r'^documents/(?P<doc_id>[0-9]+)$', views.Document.as_view()), # document view
+    # GET: 
+    #    was : lists ∩ document   (ngram_ids intersection if connected to list node_id and doc node_id)
+    #    fixed 2016-01: just lists (because document doesn't get updated by POST create cf. ngram.lists.DocNgram filter commented)
    url(r'^corpora/(?P<corpus_id>[0-9]+)/documents/(?P<doc_id>[0-9]+)$', views.NgramList.as_view()), # the list associated with an ngram
    # 2016-03-24: refactoring, deactivated NgramEdit and NgramCreate

--- a/annotations/views.py
+++ b/annotations/views.py
@@ -34,33 +34,51 @@ def main(request, project_id, corpus_id, document_id):
    }, context_instance=RequestContext(request))
 class NgramList(APIView):
-    """Read and Write Annotations"""
+    """Read the lists of ngrams (terms) that will become annotations"""
    renderer_classes = (JSONRenderer,)
    def get(self, request, corpus_id, doc_id):
        """Get All for a doc id"""
        corpus_id = int(corpus_id)
        doc_id = int(doc_id)
+        # our results: ngrams for the corpus_id (ignoring doc_id for the moment)
+        doc_ngram_list = []
        lists = {}
-        for list_type in ['MAINLIST']:
+        for list_type in ['MAINLIST', 'MAPLIST', 'STOPLIST']:
            corpus_nod = cache.Node[corpus_id]
            list_nod = corpus_nod.children(typename=list_type).first()
            list_id = list_nod.id
            lists["%s" % list_id] = list_type
-        # ngrams for the corpus_id (ignoring doc_id for the moment):
+            # add to results
-        doc_ngram_list = [(obj.id, obj.terms, w) for (w,obj) in list_nod.ngrams.all()]
+            doc_ngram_list += [(obj.id, obj.terms, w, list_id) for (w,obj) in list_nod.ngrams.all()]
+        print("annotations.views.NgramList.doc_ngram_list: ", doc_ngram_list)
        data = { '%s' % corpus_id : {
-            '%s' % doc_id : [
+            '%s' % doc_id :
-                {
+                [
-                    'uuid': ngram_id,
+                    {'uuid': ngram_id,
-                    'text': ngram_text,
+                     'text': ngram_text,
-                    'occurrences': ngram_occurrences,
+                     'occurrences': ngram_occurrences,
-                    'list_id': list_id,
+                     'list_id': list_id,}
-                }
+                for (ngram_id,ngram_text,ngram_occurrences,list_id) in doc_ngram_list
-                for ngram_id, ngram_text, ngram_occurrences in doc_ngram_list],
+                ],
            'lists': lists
        }}
+        # format alternatif de transmission des "annotations", classé par listes puis ngram_id
+        # { 'corpus_id' : {
+        #    list_id_stop: {term_stop1: {term_data}, term_stop2: {term_data}..},
+        #    list_id_miam: {term_miam1: {term_data}, term_miam2: {term_data}..},
+        #    list_id_map:  {term_map1:  {term_data}, term_map2:  {term_data}..},
+        #   }
+        #   'lists' : {"list_id" : "list_type" ... }
+        # }
+        # NB 3rd possibility: unicity of ngram_text could also allow us to use it
+        #    as key and could enhance lookup later (frequent checks if term exists)
        return Response(data)

--- a/gargantext/constants.py
+++ b/gargantext/constants.py
@@ -85,19 +85,19 @@ INDEXED_HYPERDATA = {
        , 'convert_from_db': str
        },
-    'text':
+    # 'text':
-        { 'id'             : 7
+    #     { 'id'             : 7
-        , 'type'           : str
+    #     , 'type'           : str
-        , 'convert_to_db'  : str
+    #     , 'convert_to_db'  : str
-        , 'convert_from_db': str
+    #     , 'convert_from_db': str
-        },
+    #     },
+    #
-    'page':
+    # 'page':
-        { 'id'             : 8
+    #     { 'id'             : 8
-        , 'type'           : int
+    #     , 'type'           : int
-        , 'convert_to_db'  : int
+    #     , 'convert_to_db'  : int
-        , 'convert_from_db': int
+    #     , 'convert_from_db': int
-        },
+    #     },
 }
@@ -121,38 +121,47 @@ from gargantext.util.parsers import \
    EuropressParser, RISParser, PubmedParser, ISIParser, CSVParser, ISTexParser
 RESOURCETYPES = [
+    # type 0
    {   'name': 'Europress (English)',
        'parser': EuropressParser,
        'default_language': 'en',
    },
+    # type 1
    {   'name': 'Europress (French)',
        'parser': EuropressParser,
        'default_language': 'fr',
    },
+    # type 2
    {   'name': 'Jstor (RIS format)',
        'parser': RISParser,
        'default_language': 'en',
    },
+    # type 3
    {   'name': 'Pubmed (XML format)',
        'parser': PubmedParser,
        'default_language': 'en',
    },
+    # type 4
    {   'name': 'Scopus (RIS format)',
        'parser': RISParser,
        'default_language': 'en',
    },
+    # type 5
    {   'name': 'Web of Science (ISI format)',
        'parser': ISIParser,
        'default_language': 'fr',
    },
+    # type 6
    {   'name': 'Zotero (RIS format)',
        'parser': RISParser,
        'default_language': 'en',
    },
+    # type 7
    {   'name': 'CSV',
        'parser': CSVParser,
        'default_language': 'en',
    },
+    # type 8
    {   'name': 'ISTex',
        'parser': ISTexParser,
        'default_language': 'en',
@@ -165,7 +174,7 @@ DEFAULT_TFIDF_CUTOFF_RATIO      = .45        # MAINLIST maximum terms in %
 DEFAULT_TFIDF_HARD_LIMIT        = 750        # MAINLIST maximum terms abs
                                             # (makes COOCS larger ~ O(N²) /!\)
-DEFAULT_COOC_THRESHOLD          = 3          # inclusive minimum for COOCS coefs
+DEFAULT_COOC_THRESHOLD          = 2          # inclusive minimum for COOCS coefs
                                             # (makes COOCS more sparse)
 DEFAULT_MAPLIST_MAX             = 300        # MAPLIST maximum terms
@@ -206,4 +215,3 @@ BATCH_NGRAMSEXTRACTION_SIZE = 1024
 # Scrapers config
 QUERY_SIZE_N_MAX     = 1000
 QUERY_SIZE_N_DEFAULT = 1000
--- a/gargantext/util/parsers/CSV.py
+++ b/gargantext/util/parsers/CSV.py
@@ -18,11 +18,11 @@ class CSVParser(Parser):
        return Freqs
-    def parse(self, filename):
+    def parse(self, filebuf):
        print("CSV: parsing (assuming UTF-8 and LF line endings)")
-        contents = filename.read().decode("UTF-8").split("\n")
+        contents = filebuf.read().decode("UTF-8").split("\n")
        sample_size = 10
        sample_contents = contents[0:sample_size]

--- a/gargantext/util/parsers/ISTex.py
+++ b/gargantext/util/parsers/ISTex.py
-from lxml import etree
 from ._Parser import Parser
 from datetime import datetime
 from io import BytesIO
@@ -6,10 +5,10 @@ import json
 class ISTexParser(Parser):
-    def parse(self, thefile):
+    def parse(self, filebuf):
-        json_data=open(thefile,"r")
+        contents = filebuf.read().decode("UTF-8")
-        data = json.load(json_data)
+        data = json.loads(contents)
-        json_data.close()
+        filebuf.close()
        json_docs = data["hits"]
        hyperdata_list = []
        hyperdata_path = {

--- a/gargantext/util/parsers/_Parser.py
+++ b/gargantext/util/parsers/_Parser.py
@@ -145,5 +145,6 @@ class Parser:
            try:
                file.seek(0)
            except:pass
+            # debug: print(self.parse)  # do we have correct parser ?
            for hyperdata in self.parse(file):
                yield self.format_hyperdata(hyperdata)
--- a/gargantext/util/toolchain/ngram_groups.py
+++ b/gargantext/util/toolchain/ngram_groups.py
@@ -3,6 +3,8 @@ For initial ngram groups via stemming
 Exemple:
   - groups['copper engrav'] = {'copper engraving':3, 'coppers engraver':1...}
   - groups['post']          = {'poste':3, 'poster':5, 'postés':2...}
+   TODO use groups for aggregated occurrences/coocs counts !
 """
 from gargantext.models        import Node, NodeNgramNgram
@@ -25,8 +27,9 @@ def prepare_stemmers(corpus):
        '__unknown__' : SnowballStemmer("english")
    }
    for lgiso2 in corpus.hyperdata['languages'].keys():
-        lgname = languages[lgiso2].name.lower()
+        if (lgiso2 != '__skipped__'):
-        stemmers_by_lg[lgiso2] = SnowballStemmer(lgname)
+            lgname = languages[lgiso2].name.lower()
+            stemmers_by_lg[lgiso2] = SnowballStemmer(lgname)
    return stemmers_by_lg
 def compute_groups(corpus, stoplist_id = None, overwrite_id = None):

--- a/gargantext/util/toolchain/ngrams_extraction.py
+++ b/gargantext/util/toolchain/ngrams_extraction.py
@@ -45,15 +45,27 @@ def extract_ngrams(corpus, keys=('title', 'abstract', )):
        ngrams_data = set()
        # extract ngrams
        resource_type_index = corpus.resources()[0]['type']
        resource_type = RESOURCETYPES[resource_type_index]
        default_language_iso2 = resource_type['default_language']
        for documents_count, document in enumerate(corpus.children('DOCUMENT')):
            # get ngrams extractor for the current document
            language_iso2 = document.hyperdata.get('language_iso2', default_language_iso2)
            try:
+                # this looks for a parser in constants.LANGUAGES
                ngramsextractor = ngramsextractors[language_iso2]
            except KeyError:
-                print('Unrecognized language: `%s`' % (language_iso2, ))
+                # skip document
+                print('Unsupported language: `%s`' % (language_iso2, ))
+                # and remember that for later processes (eg stemming)
+                document.hyperdata['__skipped__'] = 'ngrams_extraction'
+                document.save_hyperdata()
+                session.commit()
+                if language_iso2 in corpus.hyperdata['languages']:
+                    skipped_lg_infos = corpus.hyperdata['languages'].pop(language_iso2)
+                    corpus.hyperdata['languages']['__skipped__'][language_iso2] = skipped_lg_infos
+                    corpus.save_hyperdata()
+                    session.commit()
                continue
            # extract ngrams on each of the considered keys
            for key in keys:

--- a/gargantext/util/toolchain/parsing.py
+++ b/gargantext/util/toolchain/parsing.py
@@ -49,8 +49,10 @@ def parse(corpus):
                documents_count += 1
            # update info about the resource
            resource['extracted'] = True
-        # add a corpus-level info about languages
+        # add a corpus-level info about languages...
        corpus.hyperdata['languages'] = observed_languages
+        # ...with a special key inside for skipped languages at ngrams_extraction
+        corpus.hyperdata['languages']['__skipped__'] = {}
        # commit all changes
        corpus.status('parsing', progress=documents_count, complete=True)
        corpus.save_hyperdata()

--- a/gargantext/views/api/ngramlists.py
+++ b/gargantext/views/api/ngramlists.py
@@ -148,6 +148,7 @@ class GroupChange(APIView):
          => removes couples where newly reconnected ngrams where involved
          => adds new couples from GroupsBuffer of terms view
+        TODO recalculate scores after new groups
        TODO see use of util.lists.Translations
        TODO benchmark selective delete compared to entire list rewrite
        """
@@ -155,10 +156,6 @@ class GroupChange(APIView):
        all_nodes_involved = []
        links = []
-        print([i for i in request.POST.lists()])
-        pass
        for (mainform_key, subforms_ids) in request.POST.lists():
            mainform_id = mainform_key[:-2]   # remove brackets '543[]' -> '543'
            all_nodes_involved.append(mainform_id)
@@ -414,10 +411,19 @@ class ListFamily(APIView):
                links = Translations(groups_id)
                linkinfo = links.groups
-        # the output form
+        # list of
-        for ng in mainlist_query.all() + hidden_ngrams_query.all():
+        ngrams_which_need_detailed_info = []
+        if "head" in parameters:
+            # head triggered simplified form: just the top of the mainlist
+            # TODO add maplist membership
+            ngrams_which_need_detailed_info = mainlist_query.all()
+        else:
+            ngrams_which_need_detailed_info = mainlist_query.all() + hidden_ngrams_query.all()
+        # the output form of details is:
+        # ngraminfo[id] => [term, weight]
+        for ng in ngrams_which_need_detailed_info:
            ng_id   = ng[0]
-            # id => [term, weight]
            ngraminfo[ng_id] = ng[1:]
            # NB the client js will sort mainlist ngs from hidden ngs after ajax

--- a/gargantext/views/api/urls.py
+++ b/gargantext/views/api/urls.py
@@ -7,15 +7,18 @@ urlpatterns = [ url(r'^nodes$'                , nodes.NodeListResource.as_view()
              , url(r'^nodes/(\d+)$'          , nodes.NodeResource.as_view()    )
              , url(r'^nodes/(\d+)/facets$'   , nodes.CorpusFacet.as_view()     )
              , url(r'^nodes/(\d+)/having$'   , nodes.NodeListHaving.as_view()  )
                # get a list of ngram_ids or ngram_infos by list_id
                # url(r'^ngramlists/(\d+)$', ngramlists.List.as_view()),
+              , url(r'^ngramlists/change$', ngramlists.ListChange.as_view()     )
+                # add or remove ngram from a list
+                #  ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2
+                #       rm <=> DEL ngramlists/change?list=42&ngrams=1,2
              ,  url(r'^ngramlists/groups$', ngramlists.GroupChange.as_view())
                # modify grouping couples of a group node
                #  ex: POST ngramlists/groups?node=43
-                #           post data looks like : {"767":[209,640],"779":[436,265,385]}"
+                # post data looks like : {"767":[209,640],"779":[436,265,385]}"
              , url(r'^ngramlists/family$'     , ngramlists.ListFamily.as_view())
                # entire combination of lists from a corpus
@@ -24,5 +27,4 @@ urlpatterns = [ url(r'^nodes$'                , nodes.NodeListResource.as_view()
                #   - an optional stoplist
                #   - an optional maplist
                #   - an optional grouplist
              ]
--- a/scrapers/istex.py
+++ b/scrapers/istex.py
@@ -2,6 +2,7 @@
 from time import sleep
 import datetime
 import threading
+from traceback                  import print_tb
 #from gargantext.settings import MEDIA_ROOT, BASE_DIR
 from django.shortcuts import redirect
@@ -111,7 +112,7 @@ def save(request , project_id):
            user_id = request.user.id,
            parent_id = project_id,
            typename = 'CORPUS',
-                        hyperdata    = { "action"        : "Scraping data"
+                        hyperdata    = { "action"        : "Scrapping data"
                                        , "language_id" : None
                                        }
        )
@@ -137,9 +138,9 @@ def save(request , project_id):
        for filename in tasks.firstResults:
            if filename!=False:
                # add the uploaded resource to the corpus
-                # add the uploaded resource to the corpus
+                corpus.add_resource(
-                corpus.add_resource( type = 3
+                  type = 8     # cf. constants.RESOURCETYPES
-                                   , path = filename
+                , path = filename
                                   )
                dwnldsOK+=1
@@ -152,14 +153,17 @@ def save(request , project_id):
        except Exception as error:
            print('WORKFLOW ERROR')
            print(error)
+            try:
+                print_tb(error.__traceback__)
+            except:
+                pass
+            # IMPORTANT ---------------------------------
+            # sanitize session after interrupted transact
+            session.rollback()
+            # --------------------------------------------
        sleep(1)
        return HttpResponseRedirect('/projects/' + str(project_id))
    data = [query_string,query,N]
    return JsonHttpResponse(data)
--- a/scrapers/pubmed.py
+++ b/scrapers/pubmed.py
@@ -12,6 +12,7 @@ import json
 import datetime
 from os import path
 import threading
+from traceback                  import print_tb
 #from gargantext.settings import MEDIA_ROOT, BASE_DIR
 from django.shortcuts import redirect
@@ -159,10 +160,16 @@ def save( request , project_id ) :
        except Exception as error:
            print('WORKFLOW ERROR')
            print(error)
+            try:
+                print_tb(error.__traceback__)
+            except:
+                pass
+            # IMPORTANT ---------------------------------
+            # sanitize session after interrupted transact
+            session.rollback()
+            # --------------------------------------------
        sleep(1)
        return HttpResponseRedirect('/projects/' + str(project_id))
    data = alist
    return JsonHttpResponse(data)
--- a/static/css/gargantext/tables.css
+++ b/static/css/gargantext/tables.css
@@ -22,6 +22,19 @@ th a {
  font-size: 0.7em;
 }
+#dynatable-query-search-my-ajax-table {
+    min-width: 20em;
+}
+.dynatable-search {
+    margin-left: 2em;
+    font-size: 16px;
+}
+.dynatable-per-page-label {
+    margin-left: 2em;
+    font-size: 16px;
+}
 #corpusdisplayer {
    width:200px;
    margin:0 auto;
@@ -60,8 +73,8 @@ p.note > label {
  float: left;
 }
-.note.greyed {
+.greyed {
-  opacity: 0.2;
+  opacity: 0.3;
 }
 tr:hover {
@@ -77,9 +90,11 @@ tr:hover {
 .delete {
  color:red;
  opacity: 0.8;
+  text-decoration: line-through;
 }
 .keep {
  color:green;
+  font-style: italic;
 }
 .group {

--- a/static/js/extras_explorerjs.js
+++ b/static/js/extras_explorerjs.js
@@ -591,6 +591,7 @@ function getTopPapers(type){
                    if(pub["title"]) {
                        var gquery = "https://searx.laquadrature.net/?categories=general&q="+pub["title"].replace(" "+"+")
+                        // ex url_elems = ["http:", "", "localhost:8000", "projects", "1", "corpora", "2690", "explorer?field1=ngrams&field2=ngrams&distance=conditional&bridgeness=5"]
                        var url_elems = window.location.href.split("/")
                        var url_mainIDs = {}
                        for(var i=0; i<url_elems.length; i++) {
@@ -598,7 +599,10 @@ function getTopPapers(type){
                            url_mainIDs[url_elems[i-1]] = Number(url_elems[i]);
                          }
                        }
-                        var getpubAPI = window.location.origin+'/project/'+url_mainIDs["project"]+'/corpus/'+ url_mainIDs["corpus"] + '/document/'+pub["id"]
+                        // ex url_mainIDs = {projects: 1, corpora: 2690}
+                        // link to matching document
+                        var getpubAPI = window.location.origin+'/projects/'+url_mainIDs["projects"]+'/corpora/'+ url_mainIDs["corpora"] + '/documents/'+pub["id"]
                        var ifjournal="",ifauthors="",ifkeywords="",ifdate="",iftitle="";

--- a/static/js/gargantext/Docs_dyna_chart_and_table.js
+++ b/static/js/gargantext/Docs_dyna_chart_and_table.js
@@ -33,15 +33,9 @@ var latest,oldest;
 var TheBuffer = false
 function Push2Buffer( NewVal ) {
-    console.log( " = = = = = = = = " )
-    console.log( "Push2Buffer()" )
-    console.log( "\t"+NewVal )
    if ( TheBuffer == false) {
        if( ! NewVal ) {
-            // var limits = [ new Date( oldest[0],oldest[1],oldest[2] ) , new Date( latest[0],latest[1],latest[2] ) ];
+            var limits = [ oldest , latest ];
-            var limits = [new Date(oldest[0],oldest[1]-1,oldest[2]), new Date(latest[0],latest[1]-1,latest[2] ) ];
-            limits[0] = new Date(limits[0].setDate(limits[0].getDate()-1) );
-            limits[1] = new Date(limits[1].setDate(limits[1].getDate()+1) );
            NewVal = limits;
        }
        console.log( " - - - - - - " )
@@ -57,9 +51,7 @@ function Push2Buffer( NewVal ) {
        var past = TheBuffer[0]+"_"+TheBuffer[1]
        if( ! NewVal ) {
-            var limits = [new Date(oldest[0],oldest[1]-1,oldest[2]), new Date(latest[0],latest[1]-1,latest[2] ) ];
+            var limits = [ oldest , latest ];
-            limits[0] = new Date(limits[0].setDate(limits[0].getDate()-1) );
-            limits[1] = new Date(limits[1].setDate(limits[1].getDate()+1) );
            NewVal = limits;
        }
        var now = NewVal[0]+"_"+NewVal[1]
@@ -101,18 +93,19 @@ function Final_UpdateTable( action ) {
    var dataini = TheBuffer[0];
    var datafin = TheBuffer[1];
    pr("show me the pubs of the selected period")
-    console.log( TheBuffer )
+    // console.log( TheBuffer )
    pr("\tfrom ["+dataini+"] to ["+datafin+"]")
    TimeRange = []
-    console.log(dataini, datafin)
+    // console.log("dataini, datafin")
+    // console.log(dataini, datafin)
    $.each(AjaxRecords, function(i, node) {
        if (node.date >= dataini && node.date >= dataini) {
            // pr( AjaxRecords[i].date+" : "+AjaxRecords[i].id )
            TimeRange.push(node);
        }
    });
-    console.log(TimeRange)
+    // console.log(TimeRange)
    MyTable = $('#my-ajax-table').dynatable({
        dataset: {
@@ -307,6 +300,7 @@ function Main_test( Data , SearchFilter ) {
  var t1 = get_node_date(AjaxRecords.slice(-1)[0]);
  oldest = t0;
  latest = t1;
+  console.log('t0, t1')
  console.log(t0, t1)
  TheBuffer = [t0, t1];

--- a/static/js/gargantext/NGrams_dyna_chart_and_table.js
+++ b/static/js/gargantext/NGrams_dyna_chart_and_table.js
--- a/templates/pages/corpora/terms.html
+++ b/templates/pages/corpora/terms.html
@@ -58,10 +58,25 @@
                        <p id="corpusdisplayer" onclick='Final_UpdateTable("click")' class="btn btn-primary btn-lg">
                          Close term list
                        </p>
+                        <!-- <button title='run test function' onclick="doATest()">
+                            TEST
+                        </button> -->
                    </a>
+                    <!-- see in javascript function queries.functions['my_state_filter'] -->
+                    <div class="pull-left" style="margin-top:1.85em;">
+                        Filter:
+                        <select id="picklistmenu" name="my_state_filter">
+                          <option value='reset' selected="selected" >All terms</option>
+                          <option value='0'>Mainlist only</option>
+                          <option value='1'>Maplist only</option>
+                          <option value='2'>Stoplist only</option>
+                        </select>
+                    </div>
                  </h4>
                </div>
                <div id="terms_table" class="panel-collapse collapse in no-transition" role="tabpanel">
                    <div class="panel-body">
                      <div id="div-table">
@@ -78,9 +93,9 @@
            </div> <!-- /div panel -->
        </div> <!-- /row with the dynatable panels -->
-    </div> 
+    </div>
-    <!-- /jumbotron 
+    <!-- /jumbotron
    <button id="ImportList" onclick="GetUserPortfolio();" class="btn btn-warning">
        Import a Corpus-List
    </button>
@@ -90,6 +105,11 @@
+    <!--
+    # stub to import a list (aka orange button)
+    <button id="ImportList" onclick="GetUserPortfolio();" class="btn btn-warning">Import a Corpus-List</button>
+    -->
 <script type="text/javascript" src="{% static "js/jquery/jquery.min.js" %}"></script>
 <script type="text/javascript" src="{% static "js/bootstrap/bootstrap.min.js" %}"></script>

--- a/templates/pages/corpora/titles.html
+++ b/templates/pages/corpora/titles.html
@@ -32,7 +32,7 @@
                    </center>
                </div>
            </div>
        <div class="row">
            <div id="monthly-volume-chart"></div>
        </div>