Merge remote-tracking branch 'origin/romain-testing' into testing

4af0bade · delanoe · 0112af6d · 4af0bade · 4af0bade · 4af0bade
Commit 4af0bade authored Sep 30, 2016 by delanoe
14 changed files
--- a/annotations/static/annotations/document.js
+++ b/annotations/static/annotations/document.js
@@ -15,7 +15,7 @@
        {'docId': $rootScope.docId},
        function(data, responseHeaders) {
          $scope.authors = data.authors;
-          $scope.journal = data.journal;
+          $scope.source = data.source;
          $scope.publication_date = data.publication_date;
          //$scope.current_page_number = data.current_page_number;
          //$scope.last_page_number = data.last_page_number;
@@ -23,25 +23,34 @@
          $rootScope.docId = data.id;
          $rootScope.full_text = data.full_text;
          $rootScope.abstract_text = data.abstract_text;
-          console.log("annotations.document.DocController.getannotations")
-          // GET the annotationss
-          NgramListHttpService.get(
-            {
-              'corpusId': $rootScope.corpusId,
-              'docId': $rootScope.docId
-            },
-            function(data) {
-              $rootScope.annotations = data[$rootScope.corpusId.toString()][$rootScope.docId.toString()];
-              // eg id => 'MAPLIST'
-              $rootScope.lists = data[$rootScope.corpusId.toString()].lists;
-              // inverted 'MAPLIST' => id
-              $rootScope.listIds = _.invert($rootScope.lists)
-              $scope.dataLoading = false ;
-            },
-            function(data) {
-              console.error("unable to get the list of ngrams");
-            }
-          );
+          $rootScope.workflow_finished = data.corpus_status['complete'] ;
+
+          console.log("workflow status", $rootScope.workflow_finished)
+
+          if ($scope.workflow_finished) {
+            console.log("annotations.document.DocController.getannotations")
+            // GET the annotationss
+            NgramListHttpService.get(
+              {
+                'corpusId': $rootScope.corpusId,
+                'docId': $rootScope.docId
+              },
+              function(data) {
+                $rootScope.annotations = data[$rootScope.corpusId.toString()][$rootScope.docId.toString()];
+                // eg id => 'MAPLIST'
+                $rootScope.lists = data[$rootScope.corpusId.toString()].lists;
+                // inverted 'MAPLIST' => id
+                $rootScope.listIds = _.invert($rootScope.lists)
+                $scope.dataLoading = false ;
+              },
+              function(data) {
+                console.error("unable to get the list of ngrams");
+              }
+            );
+          }
+          else {
+            $scope.dataLoading = false ;
+          }

      });


--- a/annotations/templates/annotations/main.html
+++ b/annotations/templates/annotations/main.html
@@ -24,7 +24,7 @@
    <div id="annotationsApp" ng-cloak>
      <div class="container-fluid">
        <div class="row-fluid main-panel" ng-controller="NGramHighlightController">
-          <div class="col-md-4 col-xs-4 tabbable words-panel">
+          <div ng-if="workflow_finished" class="col-md-4 col-xs-4 tabbable words-panel">
              <div class="list-selector">
                <h5>Select highlighted list(s)
                <select class="selectpicker" multiple ng-change="activeListsChange()" ng-model="lists" ng-controller="ActiveListsController">
@@ -89,7 +89,7 @@
            </div>
            <div class="row-fluid">
              <ul class="list-group clearfix">
-                <li class="list-group-item small"><span class="badge">journal</span>{[{journal}]}</li>
+                <li class="list-group-item small"><span class="badge">source</span>{[{source}]}</li>
                <li class="list-group-item small"><span class="badge">authors</span>{[{authors}]}</li>
                <li class="list-group-item small"><span class="badge">date</span>{[{publication_date}]}</li>
              </ul>
@@ -108,12 +108,14 @@
                <span class="badge">abstract</span>
            </div>
            <p id="abstract-text" class="text-container">
+                {[{abstract_text}]}
                <div ng-if="abstract_text == null" class="alert alert-info small" role="alert">Empty abstract text</div>
            </p>
            <div ng-if="full_text != null">
                <span class="badge">full article</span>
            </div>
            <p id="full-text" class="text-container">
+                {[{full_text}]}
                <div ng-if="full_text == null" class="alert alert-info small" role="alert">Empty full text</div>
            </p>
          </div>

--- a/annotations/views.py
+++ b/annotations/views.py
@@ -172,8 +172,9 @@ class Document(APIView):

    def get(self, request, doc_id):
        """Document by ID"""
-        # implicit global session
        node = session.query(Node).filter(Node.id == doc_id).first()
+        corpus = session.query(Node).filter(Node.id == node.parent_id).first()
+        corpus_workflow_status = corpus.hyperdata['statuses'][0]
        if node is None:
            raise APIException('This node does not exist', 404)

@@ -185,9 +186,10 @@ class Document(APIView):
            pub_date = node.hyperdata.get('publication_date')

        data = {
+            'corpus_status': corpus_workflow_status,
            'title': node.hyperdata.get('title'),
            'authors': node.hyperdata.get('authors'),
-            'journal': node.hyperdata.get('journal'),
+            'source': node.hyperdata.get('source'),
            'publication_date': pub_date,
            'full_text': node.hyperdata.get('full_text'),
            'abstract_text': node.hyperdata.get('abstract'),

--- a/gargantext/constants.py
+++ b/gargantext/constants.py
@@ -382,6 +382,9 @@ BATCH_NGRAMSEXTRACTION_SIZE = 3000   # how many new node-ngram relations before
 QUERY_SIZE_N_MAX     = 1000
 QUERY_SIZE_N_DEFAULT = 1000

+# Refresh corpora workflow status for project view's progressbar
+PROJECT_VIEW_REFRESH_INTERVAL  = 3000     # 1st refresh in ms (then increasing arithmetically)
+PROJECT_VIEW_MAX_REFRESH_ATTEMPTS = 10    # how many times before we give up

 # ------------------------------------------------------------------------------
 # Graph <=> nodes API parameters

--- a/gargantext/util/db_cache.py
+++ b/gargantext/util/db_cache.py
@@ -46,6 +46,9 @@ class ModelCache(dict):
 class Cache:

    def __getattr__(self, key):
+        '''
+        lazy init of new modelcaches: self.Node, self.User...
+        '''
        try:
            model = getattr(models, key)
        except AttributeError:
@@ -54,4 +57,15 @@ class Cache:
        setattr(self, key, modelcache)
        return modelcache

+
+    def clean_all(self):
+        '''
+        re-init any existing modelcaches
+        '''
+        for modelname in self.__dict__:
+            old_modelcache = getattr(cache, modelname)
+            new_modelcache = ModelCache(old_modelcache._model)
+            del old_modelcache
+            setattr(cache, modelname, new_modelcache)
+
 cache = Cache()
--- a/gargantext/util/http.py
+++ b/gargantext/util/http.py
@@ -32,6 +32,10 @@ def requires_auth(func):
            from gargantext.util.db import session
            session.rollback()
            print("=== session rollback ok!")
+            # re init the global cache (it must still have detached instances)
+            from gargantext.util.db_cache import cache
+            cache.clean_all()
+            print("=== cache reinit ok!")
            # and relogin for safety
            url = '/auth/login/?next=%s' % urlencode(request.path)
            return redirect(url)

--- a/gargantext/util/toolchain/ngrams_addition.py
+++ b/gargantext/util/toolchain/ngrams_addition.py
@@ -23,6 +23,8 @@ from gargantext.util.db  import bulk_insert_ifnotexists # £TODO debug
 from sqlalchemy          import distinct
 from re                  import findall, IGNORECASE

+from gargantext.util.toolchain.main import t   # timer
+
 # TODO from gargantext.constants import LIST_OF_KEYS_TO_INDEX = title, abstract

 def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
@@ -40,6 +42,8 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
    @param corpus: the CORPUS node

    @param keys: the hyperdata fields to index
+
+    # FIXME too slow: index_new_ngrams should be faster via tsvector on DB
    """

    # retrieve *all* the ngrams from our list
@@ -56,7 +60,11 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
    node_ngram_to_write = {}

    # loop throught the docs and their text fields
-    for doc in corpus.children('DOCUMENT'):
+    for (i, doc) in enumerate(corpus.children('DOCUMENT')):
+
+        if (i % 100 == 0):
+            print('CORPUS #%d: [%s] ngrams_addition: doc %i' % (corpus.id, t(), i))
+            print()

        # a new empty counting subdict
        node_ngram_to_write[doc.id] = {}

--- a/gargantext/views/api/ngramlists.py
+++ b/gargantext/views/api/ngramlists.py
@@ -373,6 +373,10 @@ class ListChange(APIView):
    def put(self, request):
        """
        Adds one or more ngrams to a list.
+
+        NB: we assume ngram_ids don't contain subforms !!
+            (this assumption is not checked here because it would be
+             slow: if you want to add a subform, send the mainform's id)
        """
        # union of items ----------------------------
        new_list = self.base_list + self.change_list

--- a/gargantext/views/api/nodes.py
+++ b/gargantext/views/api/nodes.py
@@ -17,7 +17,11 @@ _node_available_fields = ['id', 'parent_id', 'name', 'typename', 'hyperdata', 'n
 _node_default_fields = ['id', 'parent_id', 'name', 'typename']
 _node_available_types = NODETYPES

-#_hyperdata_available_fields = ['title', 'resourcetype']
+_hyperdata_available_fields = ['title', 'source', 'abstract', 'statuses',
+                               'language_name', 'language_iso3','language_iso2','language_id',
+                               'publication_date',
+                               'publication_year','publication_month', 'publication_day',
+                               'publication_hour','publication_minute','publication_second']
 #_node_available_formats = ['json', 'csv', 'bibex']


@@ -32,24 +36,38 @@ def _query_nodes(request, node_id=None):
    # parameters validation
    # fixme: this validation does not allow custom keys in url (eg '?name=' for rename action)
    parameters = get_parameters(request)
+
    parameters = validate(parameters, {'type': dict, 'items': {
        'formated': {'type': str, 'required' : False, 'default': 'json'},
-#        'hyperdata': {'type': list, 'default' : _hyperdata_available_fields, 'items': {
-#            'type': str, 'range' : _node_available_fields,
-#        }},
+
        'pagination_limit': {'type': int, 'default': 10},
        'pagination_offset': {'type': int, 'default': 0},
        'fields': {'type': list, 'default': _node_default_fields, 'items': {
            'type': str, 'range': _node_available_fields,
        }},
+        # choice of hyperdata fields
+        'hyperdata_filter': {'type': list, 'required':False,
+            'items': {
+                'type': str, 'range': _hyperdata_available_fields,
+            }},
        # optional filtering parameters
        'types': {'type': list, 'required': False, 'items': {
            'type': str, 'range': _node_available_types,
        }},
        'parent_id': {'type': int, 'required': False},
    }})
+
+    # debug
+    # print('PARAMS', parameters)
+
+    # additional validation for hyperdata_filter
+    if (('hyperdata_filter' in parameters)
+        and (not ('hyperdata' in parameters['fields']))):
+        raise ValidationException("Using the hyperdata_filter filter requires fields[]=hyperdata")
+
    # start the query
    query = user.nodes()
+
    # filter by id
    if node_id is not None:
        query = query.filter(Node.id == node_id)
@@ -63,6 +81,7 @@ def _query_nodes(request, node_id=None):
    count = query.count()
    # order
    query = query.order_by(Node.hyperdata['publication_date'], Node.id)
+
    # paginate the query
    if parameters['pagination_limit'] == -1:
        query = query[parameters['pagination_offset']:]
@@ -72,8 +91,54 @@ def _query_nodes(request, node_id=None):
            parameters['pagination_limit']
        ]
    # return the result!
+    # (the receiver function does the filtering of fields and hyperdata_filter)
    return parameters, query, count

+
+def _filter_node_fields(node, parameters):
+    """
+    Filters the properties of a Node object before sending them to response
+
+    @parameters: a dict comming from get_parameters
+                 that must only contain a 'fields' key
+
+    Usually the dict looks like this :
+               {'fields': ['parent_id', 'id', 'name', 'typename', 'hyperdata'],
+                'hyperdata_filter': ['title'], 'parent_id': '55054',
+                'types': ['DOCUMENT'], 'pagination_limit': '15'}
+
+    History:
+        1) this used to be single line:
+           res = {field: getattr(node, field) for field in parameters['fields']}
+
+        2) it was in both NodeResource.get() and NodeListResource.get()
+
+        3) it's now expanded to add support for parameters['hyperdata_filter']
+            - if absent, entire hyperdata is considered as one field
+                (as before)
+            - if present, the hyperdata subfields are picked
+                (new)
+    """
+    # FIXME all this filtering
+    #       could be done in rawsql
+    #       (in _query_nodes)
+
+    result = {}
+    for field in parameters['fields']:
+        # normal field or entire hyperdata
+        if field != 'hyperdata' or (not 'hyperdata_filter' in parameters):
+            result[field] = getattr(node,field)
+
+        # hyperdata if needs to be filtered
+        else:
+            this_filtered_hyp = {}
+            for hfield in parameters['hyperdata_filter']:
+                if hfield in node.hyperdata:
+                    this_filtered_hyp[hfield] = node.hyperdata[hfield]
+            result['hyperdata'] = this_filtered_hyp
+
+    return result
+
 class Status(APIView):
    '''API endpoint that represent the current status of the node'''
    renderer_classes = (JSONRenderer, BrowsableAPIRenderer)
@@ -84,17 +149,27 @@ class Status(APIView):
            return HttpResponse('Unauthorized', status=401)

        user = cache.User[request.user.id]
-        check_rights(request, node_id)
+        # check_rights(request, node_id)
+        # I commented check_rights because filter on user_id below does the job
+
        node = session.query(Node).filter(Node.id == node_id, Node.user_id== user.id).first()
        if node is None:
            return Response({"detail":"Node not Found for this user"}, status=HTTP_404_NOT_FOUND)
        else:
-            context = format_response(node, [n for n in node.children()])
+
+            # FIXME using the more generic strategy ---------------------------
+            # context = format_response(node, [n for n in node.children()])
+            # or perhaps ? context = format_response(None, [node])
+            # -----------------------------------------------------------------
+
+            # using a more direct strategy
+            context = {}
            try:
-                context["status"] = node.hyperdata["statuses"]
+               context["statuses"] = node.hyperdata["statuses"]
            except KeyError:
-                context["status"] = None
+               context["statuses"] = None
            return Response(context)
+
    def post(self, request, data):
        '''create a new status for node'''
        if not request.user.is_authenticated():
@@ -102,17 +177,17 @@ class Status(APIView):
            return HttpResponse('Unauthorized', status=401)

        raise NotImplementedError
-    
+
    def put(self, request, data):
        '''update status for node'''
-        
+
        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        user = cache.User[request.user.id]
-        check_rights(request, node_id)
-        node = session.query(Node).filter(Node.id == node_id).first()
+        # check_rights(request, node_id)
+        node = session.query(Node).filter(Node.id == node_id, Node.user_id== user.id).first()

        raise NotImplementedError

@@ -126,8 +201,8 @@ class Status(APIView):
            return HttpResponse('Unauthorized', status=401)

        user = cache.User[request.user.id]
-        check_rights(request, node_id)
-        node = session.query(Node).filter(Node.id == node_id).first()
+        # check_rights(request, node_id)
+        node = session.query(Node).filter(Node.id == node_id, Node.user_id == user.id).first()
        if node is None:
            return Response({"detail":"Node not Found"}, status=HTTP_404_NOT_FOUND)
        node.hyperdata["status"] = []
@@ -150,16 +225,22 @@ class NodeListResource(APIView):
        parameters, query, count = _query_nodes(request)

        if parameters['formated'] == 'json':
+            records_array = []
+            add_record = records_array.append
+
+            # FIXME filter in rawsql in _query_nodes
+            for node in query:
+                add_record(_filter_node_fields(node, parameters))
+
            return JsonHttpResponse({
                'parameters': parameters,
                'count': count,
-                'records': [
-                    { field: getattr(node, field) for field in parameters['fields'] }
-                    for node in query
-                ]
+                'records': records_array
            })

        elif parameters['formated'] == 'csv':
+            # TODO add support for fields and hyperdata_filter
+
            response = HttpResponse(content_type='text/csv')
            response['Content-Disposition'] = 'attachment; filename="Gargantext_Corpus.csv"'

@@ -305,9 +386,8 @@ class NodeResource(APIView):
        if not len(query):
            raise Http404()
        node = query[0]
-        return JsonHttpResponse({
-            field: getattr(node, field) for field in parameters['fields']
-        })
+
+        return JsonHttpResponse(_filter_node_fields(node, parameters))

    # contains a check on user.id (within _query_nodes)
    def delete(self, request, node_id):
@@ -415,11 +495,11 @@ class CorpusFavorites(APIView):
        (will test if docs 53 and 54 are among the favorites of corpus 2)
        (returns the intersection of fav docs with [53,54])
        """
-        
+
        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)
-        
+
        fav_node = self._get_fav_node(corpus_id)

        req_params = validate(

--- a/gargantext/views/pages/projects.py
+++ b/gargantext/views/pages/projects.py
@@ -54,6 +54,9 @@ def overview(request):
            # projects owned by the user's contacts
            'common_users': (contact for contact, projects in contacts_projects),
            'common_projects': sum((projects for contact, projects in contacts_projects), []),
+            # status refreshing params (when active workflows)
+            'status_refresh_initial_interval': PROJECT_VIEW_REFRESH_INTERVAL,
+            'status_refresh_max_attempts': PROJECT_VIEW_MAX_REFRESH_ATTEMPTS,
        },
    )


--- a/static/lib/gargantext/Docs_dyna_chart_and_table.js
+++ b/static/lib/gargantext/Docs_dyna_chart_and_table.js
@@ -592,7 +592,7 @@ function Main_test(Data) {
          // by default we always decide to search in the title
          matchInTexts = [record.rawtitle]

-          // if box is checked we'll also search in the abstracts
+          // if box is checked we'll also search in the abstracts (todo: via ajax)
          if (doAbstractsSearch) {
              if (typeof record.hyperdata.abstract !== 'undefined') {
                  matchInTexts.push(record.hyperdata.abstract)
@@ -630,9 +630,23 @@ function Main_test(Data) {
    MyTable.data('dynatable').sorts.functions["signatureSort"] = makeAlphaSortFunctionOnProperty('signature')
    MyTable.data('dynatable').sorts.functions["sourceSort"] = function sourceSort (rec1,rec2, attr, direction) {
        // like rawtitle but nested property
-        if (direction == 1) return rec1.hyperdata.source.localeCompare(rec2.hyperdata.source)
-        else                return rec2.hyperdata.source.localeCompare(rec1.hyperdata.source)
-    }
+        if (rec1.hyperdata && rec1.hyperdata.source
+            && rec2.hyperdata && rec2.hyperdata.source) {
+            // the alphabetic sort
+            if (direction == 1) return rec1.hyperdata.source.localeCompare(rec2.hyperdata.source)
+            else                return rec2.hyperdata.source.localeCompare(rec1.hyperdata.source)
+        }
+        else if (rec1.hyperdata && rec1.hyperdata.source) {
+            cmp = direction
+        }
+        else if (rec2.hyperdata && rec2.hyperdata.source) {
+            cmp = -direction
+        }
+        else {
+          cmp = 0
+        }
+        if (cmp == 0)       cmp = RecDict[rec1.id] < RecDict[rec2.id] ? -1 : 1
+      }

    // hook on page change
    MyTable.bind('dynatable:page:set', tidyAfterPageSet)
@@ -736,9 +750,20 @@ function makeAlphaSortFunctionOnProperty(property) {
    return function (rec1,rec2, attr, direction) {
        var cmp = null

-        // the alphabetic sort
-        if (direction == 1) cmp = rec1[property].localeCompare(rec2[property])
-        else                cmp = rec2[property].localeCompare(rec1[property])
+        if (rec1[property] && rec2[property]) {
+            // the alphabetic sort
+            if (direction == 1) cmp = rec1[property].localeCompare(rec2[property])
+            else                cmp = rec2[property].localeCompare(rec1[property])
+        }
+        else if (rec1[property]) {
+            cmp = direction
+        }
+        else if (rec2[property]) {
+            cmp = -direction
+        }
+        else {
+          cmp = 0
+        }

        // second level sorting on key = id in records array
        // (this one volontarily not reversable by direction
@@ -768,7 +793,10 @@ function tidyAfterPageSet() {
 $.ajax({
  url: '/api/nodes?types[]=DOCUMENT&pagination_limit=-1&parent_id='
        + corpus_id
-        +'&fields[]=parent_id&fields[]=id&fields[]=name&fields[]=typename&fields[]=hyperdata',
+        +'&fields[]=parent_id&fields[]=id&fields[]=name&fields[]=typename&fields[]=hyperdata'
+        // +'&hyperdata_filter[]=title&hyperdata_filter[]=source&hyperdata_filter[]=language_iso2'
+        +'&hyperdata_filter[]=title&hyperdata_filter[]=source&hyperdata_filter[]=language_iso2&hyperdata_filter[]=abstract'
+        +'&hyperdata_filter[]=publication_year&hyperdata_filter[]=publication_month&hyperdata_filter[]=publication_day',
  success: function(maindata){
      // unfortunately favorites info is in a separate request (other nodes)
      $.ajax({
@@ -838,6 +866,10 @@ $.ajax({
                                 rec.hyperdata.publication_day
                                )

+              // and a bool property for remote search results
+              // (will be updated by ajax)
+              rec.matched_remote_search = false      // TODO use it
+
          }

          AjaxRecords = maindata.records; // backup-ing in global variable!

--- a/templates/pages/corpora/terms.html
+++ b/templates/pages/corpora/terms.html
@@ -131,7 +131,7 @@ em {
                          &nbsp;
                          <!-- save/import button -->
                          <button id="ImportListOrSaveAll" class="btn btn-warning" style="font-size:120%"
-                                   onclick="$('#csvimport').modal('show');">
+                                   onclick="$('#csvimport').modal('show'); document.getElementById('importsubmit').disabled = false ;">
                              <b>Import a Termlist</b>
                          </button>
                      </div>
@@ -437,13 +437,22 @@ function listmergeCsvPost(theFile){
                 // reload after 3s
                 setTimeout("location.reload(true)", 3000);
                 },
-              error: function(result) {
-                  my_html  = '<h3 style="color:red">Error</h3>'
-                  my_html += "<p class='note'>please correct your CSV file and retry</p>"
-                  my_html += "<p>"+ result.responseJSON['err']+"</p>"
+              error: function(result, t) {
+                  if (t != 'timeout') {
+                    my_html  = '<h3 style="color:red">Error</h3>'
+                    my_html += "<p class='note'>please correct your CSV file and retry</p>"
+                    my_html += "<p>"+ result.responseJSON['err']+"</p>"
+                  }
+                  else {
+                    my_html  = '<h3 style="color:red">Timeout</h3>'
+                    my_html += "<p>The CSV import timed out.</p>"
+                    my_html += "<p>(This bug is currently being fixed. <br/>The import and indexation are now continuing in background on our servers. Results will show in a few minutes.)</p>"
+                    document.getElementById('importsubmit').disabled = true
+                  }
                  $('#formanswer').html(my_html);
                  console.error(result);
                 },
+              timeout: 15000  // 15s
            });
    }
 };

--- a/templates/pages/corpora/titles.html
+++ b/templates/pages/corpora/titles.html
@@ -21,12 +21,12 @@
    <div class="col-md-3 col-md-offset-2">
        <div id="monthly-move-chart">
            <center>
-                   Select a time range in the chart with blue bars to zoom in
-                <p align="center">
+              Select a time range in the chart with blue bars to zoom in
+              <p align="center">
                <!--<a class="btn btn-xs btn-default" role="button" href="/chart/corpus/{{ corpus.id }}/data.csv">Save</a>-->
-                <a class="btn btn-xs btn-default" href="javascript:volumeChart.filterAll();dc.redrawAll();">Reset</a></p>
-
-                <div class="clearfix"></div>
+                <a class="btn btn-xs btn-default" href="javascript:volumeChart.filterAll();dc.redrawAll();">Reset</a>
+              </p>
+              <div class="clearfix"></div>
            </center>
        </div>

@@ -68,10 +68,14 @@
                    <span style="font-size:70%;">
                    <span class="glyphicon glyphicon-filter" aria-hidden="true"></span>
                      <!-- Used by the #doubleSearch associated function -->
-                      <input title="Search in Titles" id="searchTI" name="searchTI" type="checkbox" checked onclick="return false">TI&nbsp;
-                    <span class="glyphicon glyphicon-filter" aria-hidden="true"></span>
-                      <input title="Search in Abstracts" id="searchAB" name="searchAB" type="checkbox">AB
-                    </span>&nbsp;&nbsp;
+                      <input title="Search in Titles" id="searchTI" name="searchTI" type="checkbox" checked onclick="return false">
+                        TI&nbsp;
+                      </input>
+                      <span class="glyphicon glyphicon-filter" aria-hidden="true"></span>
+                      <input title="Search in Abstracts" id="searchAB" name="searchAB" type="checkbox">
+                        AB&nbsp;&nbsp;
+                      </input>
+                    </span>
                    <span class="glyphicon glyphicon-filter" aria-hidden="true"></span>
                    <select id="docFilter" name="docFilter">
                        <option value="filter_all">All</option>

--- a/templates/pages/projects/project.html
+++ b/templates/pages/projects/project.html
@@ -88,28 +88,28 @@
                    {{ key }}
            </h2>
                    {% for corpus in corpora %}
-                        <div id="corpus_{{corpus.id}}">
+                        <div id="corpus_{{corpus.id}}" class="corpusElt">
                            <div class="row">
                                <h4>
                                    <div class="col-md-1 content"></div>
                                    <div class="col-md-5 content">
                                        <a href="/projects/{{project.id}}/corpora/{{corpus.id}}">
                                            <span class="glyphicon glyphicon-file" aria-hidden="true"></span>
-                                            {{corpus.name}}, {{ corpus.count }} documents {{ corpus.status_message }}
+                                            {{corpus.name}}, <span id="corpus_{{corpus.id}}_ndocs">{{ corpus.count }} documents </span>
+                                                              <span id="corpus_{{corpus.id}}_msg">{{ corpus.status_message }}</span>
                                        </a>
                                    </div>
                                    <div class="col-md-3 content"  id="corpus_{{corpus.id}}_tools">
                                        {% for state in corpus.hyperdata.statuses %}
                                            {% ifequal state.action "Workflow" %}
-                                                {% if state.complete %}
-
-                                                    <a href="/projects/{{project.id}}/corpora/{{corpus.id}}" title="View the corpus">
+                                                    <a class="{% if not state.complete %}hidden{% endif %}"
+                                                       href="/projects/{{project.id}}/corpora/{{corpus.id}}" title="View the corpus">
                                                        <button type="button" class="btn btn-default" aria-label="Left Align">
                                                              <span class="glyphicon glyphicon-eye-open" aria-hidden="true"></span>
                                                        </button>
                                                    </a>

-                                                    <button type="button" class="btn btn-default" data-container="body" data-toggle="popover" data-placement="bottom"  data-trigger="focus"
+                                                    <button type="button" class="btn btn-default {% if not state.complete %}hidden{% endif %}" data-container="body" data-toggle="popover" data-placement="bottom"  data-trigger="focus"
                                                        data-content="
                                                        <ul>
                                                            <li
@@ -121,7 +121,6 @@
                                                        <span class="glyphicon glyphicon-repeat" aria-hidden="true"
                                                        title='Recalculate ngram scores and similarities'></span>
                                                    </button>
-                                                {% endif %}

                                                    <!-- TODO: delete non seulement si state.complete mais aussi si state.error -->
                                                    <button type="button" class="btn btn-default" data-container="body" data-toggle="popover" data-placement="bottom"
@@ -129,7 +128,14 @@
                                                        <ul>
                                                            <li
                                                            onclick=&quot;
-                                                                    garganrest.nodes.delete({{corpus.id}}, function(){$('#corpus_'+{{corpus.id}}).remove()});
+                                                                    trashedIds[{{corpus.id}}] = true ;
+                                                                    garganrest.nodes.delete(
+                                                                        {{corpus.id}},
+                                                                        function(){
+                                                                            $('#corpus_'+{{corpus.id}}).remove()
+                                                                            delete trashedIds[{{corpus.id}}]
+                                                                          }
+                                                                        );
                                                                    $(this).parent().parent().remove();
                                                                &quot;>
                                                                <a href='#'>Delete this</a>
@@ -146,7 +152,9 @@
                                        {% for state in corpus.hyperdata.statuses %}
                                                {% ifequal state.action "Workflow" %}
                                                    {% if state.complete %}
-                                                        <span class="glyphicon glyphicon-ok" aria-hidden="true"></span>
+                                                        <span id="corpus_{{corpus.id}}_status_ok"
+                                                              class="glyphicon glyphicon-ok"
+                                                              aria-hidden="true"></span>

                                                    {% else %}
                                                        {% if state.error %}
@@ -172,7 +180,8 @@
                                                                                            active
                                                                                        {% endif %}
                                                                                     "
-                                                                                role="progressbar" aria-valuenow="45" aria-valuemin="0" aria-valuemax="100" style="width: 20%">
+                                                                                role="progressbar" aria-valuenow="45" aria-valuemin="0" aria-valuemax="100"
+                                                                                id="corpus_{{corpus.id}}_status_{{state.action}}" style="width: 20%">
                                                                              <span>
                                                                                  {{ state.action }}
                                                                                        {% if not state.complete %}
@@ -305,6 +314,33 @@
                </div><!-- /.modal -->
                    <script type="text/javascript" src="{% static "lib/jquery/1.11.2/jquery-ui.js" %}"></script>
                    <script type="text/javascript">
+
+                        var corporaDivs = document.getElementsByClassName('corpusElt')
+
+                        // all corpora ids ======================================
+
+                        var corporaIds = []
+
+                        // for corpora ids whose delete is pending
+                        var trashedIds = {}
+
+                        for (var i = 0 ; i < corporaDivs.length ; i++) {
+                          // ex: corpus_1198
+                          divId = corporaDivs[i].id
+                          if (divId) {
+                            var corpusId = divId.match(/[0-9]+$/).pop()
+                            corporaIds.push(corpusId)
+                          }
+                        }
+
+                        var activeCorporaIds = testActiveCorpora()
+
+                        if (activeCorporaIds.length) {
+                          // initial checks if page reloaded with active corpora
+                          keepCheckingProjectStatus()
+                        }
+
+                        // cookie ajax helper ==================================
                        function getCookie(name) {
                            var cookieValue = null;
                            if (document.cookie && document.cookie != '') {
@@ -321,6 +357,9 @@
                            return cookieValue;
                        }

+                        // =====================================================
+                        // search queries and crawling =========================
+                        // =====================================================
                        var thequeries = [] ;

                        // load the template's value for N scan size
@@ -426,6 +465,9 @@
                                }

                            }
+
+                            // schedule periodic checks of status of active corpora
+                            keepCheckingProjectStatus()
                        }

                        function getGlobalResults(value){
@@ -772,9 +814,6 @@

                        }

-
-
-
                            {% if donut %}
                            // Morris Donut Chart
                            Morris.Donut({
@@ -793,10 +832,218 @@
                            });
                            {% endif %}

+
+                        // =====================================================
+
                        $('#wait').on('hidden.bs.modal', function (e) {
                          // reload page when dismiss the info box
                          window.location.reload()
                        })
+
+
+                        // =====================================================
+                        // corpus-status checking ==============================
+                        // =====================================================
+
+                        // ------------------------------
+                        // 1) helper progressbar function
+                        // -------------------------------
+                        function updateCorpusProgressbar(crid, statuses, the_status_url) {
+                          if (statuses && statuses[0]) {
+                            // 0 is status of whole WORKFLOW
+                            var statusW = statuses[0]
+                            if (statusW.complete) {
+                              // show checkbox
+                              $('#corpus_'+crid+'_status').html(
+                                  '<span id="corpus_'+crid+'_status_ok" '
+                                  + ' class="glyphicon glyphicon-ok"></span>'
+                              )
+
+                              // show all tools
+                              var cTools = document.getElementById('corpus_'+crid+'_tools').children
+                              for (var i in cTools) {
+                                var elt = cTools[i]
+                                if (elt.classList) {
+                                  elt.classList.remove("hidden")
+                                }
+                              }
+                            }
+                            // workflow incomplete: we check each action in [1,4]
+                            else {
+                              var subStatuses = statuses.slice(1,5)
+                              // console.warn(subStatuses)
+                              for (var j in subStatuses) {
+                                var stepOk = subStatuses[j]['complete']
+                                var stepError = subStatuses[j]['error']
+
+                                // stepName parmi 'Docs','Ngrams','Index','Lists'
+                                var stepName = subStatuses[j]['action']
+
+                                // debug
+                                // console.warn(stepName)
+
+                                var pgbarId = 'corpus_'+crid+'_status_'+stepName
+
+                                // if error
+                                if (stepError && stepError != 'null') {
+                                  $('#corpus_'+crid+'_status').html(
+                                    '<p class="workflow_error">'
+                                    + 'Error in corpus parsing at step '
+                                    + j +' (' + stepName + ')'
+                                    + JSON.stringify(stepError) +
+                                    + ' <a href="https://www.iscpif.fr/gargantext-feedback-and-bug-report/">'
+                                    +'(bug report here)'
+                                    +'</a></p>'
+                                  )
+                                }
+                                // normal cases: update progressbar ------------
+                                else {
+                                  var progressbar = document.getElementById(pgbarId)
+
+                                  if (progressbar) {
+                                    console.log('updating '+pgbarId, "stepOk:", stepOk)
+
+                                    // A: done
+                                    if (stepOk || stepOk == "true") {
+                                      // make progressbar segment green
+                                      if (progressbar.className
+                                          && progressbar.className.match('active')) {
+                                        progressbar.classList.remove("active")
+                                        progressbar.classList.add("progress-bar-success")
+                                      }
+                                      // remove message if any
+                                      document.getElementById('corpus_'+crid+'_msg').innerHTML = ""
+                                      // for docs parsing, also update nDocs
+                                      if (stepName == "Docs" && stepOk) {
+                                        var stepProgress = subStatuses[j]['progress']
+                                        document.getElementById('corpus_'+crid+'_ndocs')
+                                                .innerHTML = (stepProgress-1) + " documents"
+                                      }
+                                    }
+                                    // B: active
+                                    else {
+                                      progressbar.classList.add("active")
+                                    }
+                                  }
+                                  // C: new action => new bar segment
+                                  else {
+                                    console.log('creating '+pgbarId)
+
+                                    barSegmentHtml = '<div class="progress-bar progress-bar-striped'
+                                    barSegmentHtml += (stepOk ? ' progress-bar-success' : ' active') + '"'
+                                    barSegmentHtml += 'role="progressbar" aria-valuenow="45" aria-valuemin="0" aria-valuemax="100"'
+                                    barSegmentHtml += 'id="corpus_'+crid+'_status_'+stepName+'" style="width: 20%">'
+                                    barSegmentHtml +=  '<span>'+stepName
+                                    barSegmentHtml += '</span></div>'
+
+                                    $('#corpus_'+crid+'_status > .progress')
+                                      .append(barSegmentHtml)
+                                  }
+                                }
+                                // ---------------------------------------------
+                              } // end detailed check
+                            }
+                          } // end if statuses array
+                          else {
+                            console.error("Wrong status API return format "
+                                          + "for url" + the_status_url)
+                          }
+                          return null
+                        } // end function
+
+                        // ---------------------------------------------------
+                        // 2 - main status check function on activeCorporaIds
+                        // ---------------------------------------------------
+                        function updateCorporaStates(someCorporaIds) {
+                          for (var i in someCorporaIds) {
+                            // !careful with closure, async function & loop on i
+                            // cf stackoverflow.com/a/21819961/2489184
+                            (function(i) {
+                              var myCrid = someCorporaIds[i]
+                              var the_status_url = "/api/nodes/"+myCrid+"/status?format=json"
+                              // iterate ajax checks
+                              $.ajax({
+                                type: 'GET',
+                                url: the_status_url,
+                                success: function(data) {
+                                  var statuses = data['statuses']
+                                  // console.warn("running callback for corpus id:" + myCrid)
+                                  updateCorpusProgressbar(myCrid, statuses, the_status_url)
+                                },
+                                error: function(data, s) {
+                                  if (trashedIds[myCrid]) {
+                                    return null
+                                  }
+                                  else {
+                                    console.warn("status GET: ajax err (s="+s+")")
+                                    console.log(data)
+                                  }
+                                },
+                                beforeSend: function(xhr) {
+                                  xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
+                                }
+                              }) // ajax: did 1 corpus
+                            })(i)
+                          } // did all corpora
+                        }
+
+                        // -----------------------------------------------------
+                        // 3 - for testing on client side which need refresh
+                        // -----------------------------------------------------
+                        function testActiveCorpora() {
+                          var activeCorporaIds = []
+                          for (var i in corporaIds) {
+                            var crid = corporaIds[i]
+                            if ((! document.getElementById('corpus_'+crid+'_status_ok'))
+                                && (! trashedIds[crid])) {
+                              activeCorporaIds.push(crid)
+                            }
+                          }
+                          return activeCorporaIds
+                        }
+
+                        // -----------------------------------------------------
+                        // 4 - running the check at regular intervals until done
+                        // -----------------------------------------------------
+
+                        var nChecks = 0
+                        var currentJob = null
+                        function keepCheckingProjectStatus() {
+                          console.log("checking status", nChecks)
+                          nChecks ++
+
+                          // local check for active corpora
+                          var newActiveCorporaIds = testActiveCorpora()
+
+                          if (newActiveCorporaIds.length) {
+                            // start remote calls
+                            updateCorporaStates(newActiveCorporaIds)
+                            if (nChecks > {{status_refresh_max_attempts}}) {
+                              // we abandon after 5 checks
+                              console.warn("stopping status checks for corpora:",
+                                            newActiveCorporaIds)
+                              nChecks = 0
+                              return null
+                            }
+                            else {
+                              // decreasing intervals (preserving DB while "loosing interest")
+                              var nextTime = nChecks * {{status_refresh_initial_interval}}
+                              // schedule next check
+                              currentJob = setTimeout(keepCheckingProjectStatus, nextTime)
+                              console.log("next status check in", nextTime/1000, "s" )
+                              return false
+                            }
+                          }
+                          else {
+                            console.info("OK, all corpora ingestion complete")
+                            nChecks = 0
+                            return true
+                          }
+                        }
+
+                        function stopCheckingProjectStatus() {
+                          clearTimeout(currentJob)
+                        }
                    </script>