Merge remote-tracking branch 'gargantext.org/simon-unstable-notebook' into unstable-merge

e7ac6426 · Alexandre Delanoë · dae0243d · 30c1dbdc · e7ac6426 · e7ac6426
Commit e7ac6426 authored Aug 30, 2017 by Alexandre Delanoë
12 changed files
--- a/gargantext/models/base.py
+++ b/gargantext/models/base.py
 from sqlalchemy.schema import Column, ForeignKey, UniqueConstraint, Index
-from sqlalchemy.orm import relationship
+from sqlalchemy.orm import relationship, validates
 from sqlalchemy.types import TypeDecorator, \
                             Integer, Float, Boolean, DateTime, String, Text
 from sqlalchemy.dialects.postgresql import JSONB, DOUBLE_PRECISION as Double
@@ -7,6 +7,7 @@ from sqlalchemy.ext.mutable import MutableDict, MutableList
 from sqlalchemy.ext.declarative import declarative_base

 __all__ = ["Column", "ForeignKey", "UniqueConstraint", "relationship",
+           "validates", "ValidatorMixin",
           "Integer", "Float", "Boolean", "DateTime", "String", "Text",
           "TypeDecorator",
           "JSONB", "Double",
@@ -18,6 +19,25 @@ __all__ = ["Column", "ForeignKey", "UniqueConstraint", "relationship",
 # all tables handled by Alembic migration scripts.
 Base = declarative_base()

+
 # To be used by tables already handled by Django ORM, such as User model. We
 # separate them in order to keep those out of Alembic sight.
 DjangoBase = declarative_base()
+
+
+class ValidatorMixin(object):
+    def enforce_length(self, key, value):
+        """Truncate a string according to its column length
+
+        Usage example:
+
+        .. code-block:: python
+
+            @validates('some_column')
+            def validate_some_column(self, key, value):
+                self.enforce_length(key, value)
+        """
+        max_len = getattr(self.__class__, key).prop.columns[0].type.length
+        if value and len(value) > max_len:
+            return value[:max_len]
+        return value
--- a/gargantext/models/nodes.py
+++ b/gargantext/models/nodes.py
@@ -9,7 +9,7 @@ from datetime import datetime

 from .base import Base, Column, ForeignKey, relationship, TypeDecorator, Index, \
                  Integer, Float, String, DateTime, JSONB, \
-                  MutableList, MutableDict
+                  MutableList, MutableDict, validates, ValidatorMixin
 from .users import User

 __all__ = ['Node', 'NodeNode', 'CorpusNode']
@@ -26,7 +26,7 @@ class NodeType(TypeDecorator):
        return NODETYPES[typeindex]


-class Node(Base):
+class Node(ValidatorMixin, Base):
    """This model can fit many purposes:

    myFirstCorpus = session.query(CorpusNode).first()
@@ -112,6 +112,10 @@ class Node(Base):
               'user_id={0.user_id}, parent_id={0.parent_id}, ' \
               'name={0.name!r}, date={0.date})>'.format(self)

+    @validates('name')
+    def validate_name(self, key, value):
+        return self.enforce_length(key, value)
+
    @property
    def ngrams(self):
        """Pseudo-attribute allowing to retrieve a node's ngrams.

--- a/gargantext/util/http.py
+++ b/gargantext/util/http.py
@@ -73,7 +73,8 @@ from rest_framework.views import APIView
 from gargantext.util.json import json_encoder
 def JsonHttpResponse(data, status=200):
    return HttpResponse(
-        content      = json_encoder.encode(data),
+        content      = data.encode('utf-8') if isinstance(data, str) else \
+                       json_encoder.encode(data),
        content_type = 'application/json; charset=utf-8',
        status       = status
    )

--- a/install/notebook/gargantext_notebook.py
+++ b/install/notebook/gargantext_notebook.py
+#!/usr/bin/env python
 """
   Gargantext Software Copyright (c) 2016-2017 CNRS ISC-PIF -
 http://iscpif.fr
@@ -6,45 +7,29 @@ http://gitlab.iscpif.fr/humanities/gargantext/blob/stable/LICENSE )
    - In France : a CECILL variant affero compliant
    - GNU aGPLV3 for all other countries
 """
-#!/usr/bin/env python
-import sys
+
 import os
+import django

+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'gargantext.settings')
+django.setup()

-# Django settings
-dirname = os.path.dirname(os.path.realpath(__file__))
-os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext.settings")
+from gargantext.constants import QUERY_SIZE_N_MAX, get_resource, get_resource_by_name
+from gargantext.models import ProjectNode, DocumentNode, UserNode, User
+from gargantext.util.db import session, get_engine
+from collections import Counter
+import importlib
+from django.http import Http404

-# initialize Django application
-from django.core.wsgi import get_wsgi_application
-application = get_wsgi_application()

-from gargantext.util.toolchain.main import parse_extract_indexhyperdata
-from gargantext.util.db import *
-from gargantext.models import Node
-from gargantext.util.toolchain.main import parse_extract_indexhyperdata
-from nltk.tokenize import wordpunct_tokenize
+class NotebookError(Exception):
+    pass

-from gargantext.models import *
-from nltk.tokenize import word_tokenize
-import nltk as nltk
-from statistics import mean
-from math import log
-from collections import defaultdict
-import matplotlib.pyplot as plt
-import numpy as np
-import datetime
-
-from collections import Counter
-from langdetect import detect as detect_lang

 def documents(corpus_id):
-    return (session.query(Node).filter( Node.parent_id==corpus_id
-                                  , Node.typename=="DOCUMENT"
-                                  )
-        # .order_by(Node.hyperdata['publication_date'])
-        .all()
-        )
+    return (session.query(DocumentNode).filter_by(parent_id=corpus_id)
+                  #.order_by(Node.hyperdata['publication_date'])
+                   .all())


 #import seaborn as sns
@@ -56,11 +41,14 @@ def chart(docs, field):
    frame1 = pd.DataFrame(year_publis, columns=['Date', 'DateValue'], index=frame0.Date)
    return frame1

+
 from gargantext.util.crawlers.HAL import HalCrawler
+
 def scan_hal(request):
    hal = HalCrawler()
    return hal.scan_results(request)

+
 def scan_gargantext(corpus_id, lang, request):
    connection = get_engine().connect()
    # TODO add some sugar the request (ideally request should be the same for hal and garg)
@@ -77,47 +65,117 @@ def myProject_fromUrl(url):
    myProject :: String -> Project
    """
    project_id = url.split("/")[4]
-    project = session.query(Node).filter(Node.id == project_id).first()
+    project = session.query(ProjectNode).get(project_id)
    return project


-def newCorpus(project, resourceName=11, name="Machine learning", query="LSTM"):
-    print("Corpus \"%s\" in project \"%s\" created" % (name, project.name))
+def newCorpus(project, source, name=None, query=None):
+    error = False

-    corpus = project.add_child(name="Corpus name", typename='CORPUS')
-    corpus.hyperdata["resources"] = [{"extracted" : "true", "type" : 11}]
-    corpus.hyperdata["statuses"]  = [{"action" : "notebook", "complete" : "true"}]
-    # [TODO] Add informations needed to get buttons on the Project view.
-    session.add(corpus)
-    session.commit()
+    if name is None:
+        name = query

-    hal = HalCrawler()
-    max_result = hal.scan_results(query)
-    paging = 100 
-    for page in range(0, max_result, paging):
-        print("%s documents downloaded / %s." % (str( paging * (page +1)), str(max_result) ))
-        docs = (hal._get(query, fromPage=page, count=paging)
-                     .get("response", {})
-                      .get("docs", [])
-               )
-            
-        from gargantext.util.parsers.HAL import HalParser
-        # [TODO] fix boilerplate for docs here
-        new_docs = HalParser(docs)._parse(docs)
-        
-        for doc in new_docs:
-            new_doc = (corpus.add_child( name      = doc["title"][:255]
-                                       , typename  = 'DOCUMENT')
-                      )
-            new_doc["hyperdata"] = doc
-            session.add(new_doc)
-            session.commit()
+    if not isinstance(project, ProjectNode):
+        error = "a valid project"
+    if not isinstance(source, int) and not isinstance(source, str):
+        error = "a valid source identifier: id or name"
+    elif not isinstance(query, str):
+        error = "a valid query"
+    elif not isinstance(name, str):
+        error = "a valid name"

-    print("Extracting the ngrams")
-    parse_extract_indexhyperdata(corpus)
+    if error:
+        raise NotebookError("Please provide %s." % error)

-    print("Corpus is ready to explore:")
-    print("http://imt.gargantext.org/projects/%s/corpora/%s/" % (project.id, corpus.id))
+    resource = get_resource(source) if isinstance(source, int) else \
+               get_resource_by_name(source)

-    return corpus
+    moissonneur_name = get_moissonneur_name(resource) if resource else \
+                       source.lower()
+
+    try:
+        moissonneur = get_moissonneur(moissonneur_name)
+    except ImportError:
+        raise NotebookError("Invalid source identifier: %r" % source)
+
+    return run_moissonneur(moissonneur, project, name, query)
+
+
+def get_moissonneur_name(ident):
+    """ Return moissonneur module name from RESOURCETYPE or crawler name """
+
+    # Does it quacks like a RESOURCETYPE ?
+    if hasattr(ident, 'get'):
+        ident = ident.get('crawler')

+    # Extract name from crawler class name, otherwise assume ident is already
+    # a moissonneur name.
+    if isinstance(ident, str) and ident.endswith('Crawler'):
+        return ident[:-len('Crawler')].lower()
+
+
+def get_moissonneur(name):
+    """ Return moissonneur module from its name """
+    if not isinstance(name, str) or not name.islower():
+        raise NotebookError("Invalid moissonneur name: %r" % name)
+
+    module = importlib.import_module('moissonneurs.%s' % name)
+    module.name = name
+
+    return module
+
+
+def run_moissonneur(moissonneur, project, name, query):
+    """ Run moissonneur and return resulting corpus """
+
+    # XXX Uber-kludge with gory details. Spaghetti rulezzzzz!
+    class Dummy(object):
+        pass
+
+    request = Dummy()
+    request.method = 'POST'
+    request.path = 'nowhere'
+    request.META = {}
+    # XXX 'string' only have effect on moissonneurs.pubmed; its value is added
+    #     when processing request client-side, take a deep breath and see
+    #     templates/projects/project.html for more details.
+    request.POST = {'string': name,
+                    'query': query,
+                    'N': QUERY_SIZE_N_MAX}
+    request.user = Dummy()
+    request.user.id = project.user_id
+    request.user.is_authenticated = lambda: True
+
+    if moissonneur.name == 'istex':
+        # Replace ALL spaces by plus signs
+        request.POST['query'] = '+'.join(filter(None, query.split(' ')))
+
+    try:
+        import json
+
+        r = moissonneur.query(request)
+        raw_json = r.content.decode('utf-8')
+        data = json.loads(raw_json)
+
+        if moissonneur.name == 'pubmed':
+            count = sum(x['count'] for x in data)
+            request.POST['query'] = raw_json
+        elif moissonneur.name == 'istex':
+            count = data.get('total', 0)
+        else:
+            count = data.get('results_nb', 0)
+
+        if count > 0:
+            corpus = moissonneur.save(request, project.id, return_corpus=True)
+        else:
+            return None
+
+    except (ValueError, Http404) as e:
+        raise e
+
+    # Sometimes strange things happens...
+    if corpus.name != name:
+        corpus.name = name
+        session.commit()
+
+    return corpus
--- a/moissonneurs/cern.py
+++ b/moissonneurs/cern.py
@@ -30,7 +30,7 @@ def query( request):
            #ids = crawlerbot.get_ids(query)
            return JsonHttpResponse({"results_nb":crawlerbot.results_nb})

-def save(request, project_id):
+def save(request, project_id, return_corpus=False):
    '''save'''
    if request.method == "POST":

@@ -101,6 +101,9 @@ def save(request, project_id):
            session.rollback()
            # --------------------------------------------

+        if return_corpus:
+            return corpus
+
        return render(
            template_name = 'pages/projects/wait.html',
            request = request,

--- a/moissonneurs/hal.py
+++ b/moissonneurs/hal.py
@@ -33,7 +33,7 @@ def query( request):
            print(results)
            return JsonHttpResponse({"results_nb":crawlerbot.results_nb})

-def save(request, project_id):
+def save(request, project_id, return_corpus=False):
    '''save'''
    if request.method == "POST":

@@ -103,6 +103,9 @@ def save(request, project_id):
            session.rollback()
            # --------------------------------------------

+        if return_corpus:
+            return corpus
+
        return render(
            template_name = 'pages/projects/wait.html',
            request = request,

--- a/moissonneurs/isidore.py
+++ b/moissonneurs/isidore.py
@@ -29,7 +29,7 @@ def query( request):
            #ids = crawlerbot.get_ids(query)
            return JsonHttpResponse({"results_nb":crawlerbot.results_nb})

-def save(request, project_id):
+def save(request, project_id, return_corpus=False):
    '''save'''
    if request.method == "POST":

@@ -100,6 +100,9 @@ def save(request, project_id):
            session.rollback()
            # --------------------------------------------

+        if return_corpus:
+            return corpus
+
        return render(
            template_name = 'pages/projects/wait.html',
            request = request,

--- a/moissonneurs/istex.py
+++ b/moissonneurs/istex.py
@@ -52,7 +52,7 @@ def query( request ):



-def save(request , project_id):
+def save(request , project_id, return_corpus=False):
    print("testISTEX:")
    print(request.method)
    alist = ["bar","foo"]
@@ -171,6 +171,9 @@ def save(request , project_id):
            session.rollback()
            # --------------------------------------------

+        if return_corpus:
+            return corpus
+
        return render(
            template_name = 'pages/projects/wait.html',
            request = request,

--- a/moissonneurs/multivac.py
+++ b/moissonneurs/multivac.py
@@ -33,7 +33,7 @@ def query( request):
            print(results)
            return JsonHttpResponse({"results_nb":crawlerbot.results_nb})

-def save(request, project_id):
+def save(request, project_id, return_corpus=False):
    '''save'''
    if request.method == "POST":

@@ -104,6 +104,9 @@ def save(request, project_id):
            session.rollback()
            # --------------------------------------------

+        if return_corpus:
+            return corpus
+
        return render(
            template_name = 'pages/projects/wait.html',
            request = request,

--- a/moissonneurs/pubmed.py
+++ b/moissonneurs/pubmed.py
@@ -69,7 +69,7 @@ def query( request ):
    return JsonHttpResponse(data)


-def save( request , project_id ) :
+def save( request , project_id, return_corpus=False ) :
    # implicit global session
    # do we have a valid project id?
    try:
@@ -164,6 +164,10 @@ def save( request , project_id ) :
            session.rollback()
            # --------------------------------------------
        sleep(1)
+
+        if return_corpus:
+            return corpus
+
        return HttpResponseRedirect('/projects/' + str(project_id))

    data = alist

--- a/notebooks/AdvancedTutorial.ipynb
+++ b/notebooks/AdvancedTutorial.ipynb
@@ -2,10 +2,7 @@
 "cells": [
  {
   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "source": [
    "# Advanced Gargantext Tutorial (Python)"
   ]
@@ -13,9 +10,7 @@
  {
   "cell_type": "code",
   "execution_count": 1,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
@@ -25,11 +20,7 @@
  {
   "cell_type": "code",
   "execution_count": 2,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "# First import the library Gargantext Notebook\n",
@@ -41,17 +32,19 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 5,
+   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Corpus \"Machine learning\" in project \"Tests\" created\n",
-      "100 documents downloaded / 105.\n",
+      "105\n",
+      "LSTM 1000\n",
+      "Downloading page 0 to 100 results\n",
+      "Downloading page 100 to 100 results\n",
+      "CORPUS #17058\n",
+      "PARSING\n",
      "Loading available PARSERS:\n",
      "\t- EuropresseParser\n",
      "\t- RISParser\n",
@@ -64,85 +57,64 @@
      "\t- CernParser\n",
      "\t- MultivacParser\n",
      "\t- HalParser\n",
-      "\t- IsidoreParser\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Exception ignored in: <bound method Parser.__del__ of <gargantext.util.parsers.HAL.HalParser object at 0x7f8d1d70ad30>>\n",
-      "Traceback (most recent call last):\n",
-      "  File \"/srv/gargantext/gargantext/util/parsers/_Parser.py\", line 24, in __del__\n",
-      "    self._file.close()\n",
-      "AttributeError: 'list' object has no attribute 'close'\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "10100 documents downloaded / 105.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Exception ignored in: <bound method Parser.__del__ of <gargantext.util.parsers.HAL.HalParser object at 0x7f8d1d6f1e10>>\n",
-      "Traceback (most recent call last):\n",
-      "  File \"/srv/gargantext/gargantext/util/parsers/_Parser.py\", line 24, in __del__\n",
-      "    self._file.close()\n",
-      "AttributeError: 'list' object has no attribute 'close'\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Extracting the ngrams\n",
-      "CORPUS #300990\n",
-      "CORPUS #300990: parsed 105\n"
-     ]
-    },
-    {
-     "ename": "KeyError",
-     "evalue": "'languages'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-3-c08deaa02bf2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mproject\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmyProject_fromUrl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"http://imt.gargantext.org/projects/300535\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mcorpus\u001b[0m  \u001b[0;34m=\u001b[0m \u001b[0mnewCorpus\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mproject\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"Machine learning\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mquery\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"LSTM\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m/srv/gargantext/gargantext_notebook.py\u001b[0m in \u001b[0;36mnewCorpus\u001b[0;34m(project, resourceName, name, query)\u001b[0m\n\u001b[1;32m    115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    116\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Extracting the ngrams\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 117\u001b[0;31m     \u001b[0mparse_extract_indexhyperdata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcorpus\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    118\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    119\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Corpus is ready to explore:\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/home/alexandre/local/logiciels/python/env/3.5_20170123/lib/python3.5/site-packages/celery/local.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *a, **kw)\u001b[0m\n\u001b[1;32m    186\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    187\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 188\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_current_object\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    189\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    190\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__len__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/home/alexandre/local/logiciels/python/env/3.5_20170123/lib/python3.5/site-packages/celery/app/task.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    426\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__self__\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    427\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__self__\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 428\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    429\u001b[0m         \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    430\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpop_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/srv/gargantext/gargantext/util/toolchain/main.py\u001b[0m in \u001b[0;36mparse_extract_indexhyperdata\u001b[0;34m(corpus)\u001b[0m\n\u001b[1;32m     71\u001b[0m     \u001b[0mdocs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcorpus\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchildren\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"DOCUMENT\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     72\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'CORPUS #%d: parsed %d'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mcorpus\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdocs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 73\u001b[0;31m     \u001b[0mextract_ngrams\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcorpus\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     74\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     75\u001b[0m     \u001b[0;31m# Preparing Databse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/srv/gargantext/gargantext/util/toolchain/ngrams_extraction.py\u001b[0m in \u001b[0;36mextract_ngrams\u001b[0;34m(corpus, keys, do_subngrams)\u001b[0m\n\u001b[1;32m    151\u001b[0m         \u001b[0mcorpus\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstatus\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Ngrams'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merror\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    152\u001b[0m         \u001b[0mcorpus\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave_hyperdata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 153\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    154\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    155\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/srv/gargantext/gargantext/util/toolchain/ngrams_extraction.py\u001b[0m in \u001b[0;36mextract_ngrams\u001b[0;34m(corpus, keys, do_subngrams)\u001b[0m\n\u001b[1;32m     54\u001b[0m         \u001b[0;31m#load available taggers for default langage of plateform\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     55\u001b[0m         \u001b[0;31m#print(LANGUAGES.keys())\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 56\u001b[0;31m         tagger_bots = {lang: load_tagger(lang) for lang in corpus.hyperdata[\"languages\"] \\\n\u001b[0m\u001b[1;32m     57\u001b[0m                                 if lang != \"__unknown__\"}\n\u001b[1;32m     58\u001b[0m         \u001b[0mtagger_bots\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"__unknown__\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_tagger\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"en\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mKeyError\u001b[0m: 'languages'"
+      "\t- IsidoreParser\n",
+      "0 docs skipped\n",
+      "105 parsed\n",
+      "#MAIN language of the CORPUS __unknown__\n",
+      "CORPUS #17058: parsed 105\n",
+      "INTEGRATE\n",
+      "INTEGRATE\n",
+      "CORPUS #17058: extracted ngrams\n",
+      "CORPUS #17058: indexed hyperdata\n",
+      "CORPUS #17058: [2017-08-11_11:21:18] new favorites node #17164\n",
+      "CORPUS #17058: [2017-08-11_11:21:18] starting ngram lists computation\n",
+      "CORPUS #17058: [2017-08-11_11:21:18] new stoplist node #17165\n",
+      "# STEMMERS LOADED {'__unknown__': <nltk.stem.snowball.SnowballStemmer object at 0x7fb811588e48>}\n",
+      "#SUPPORTED STEMMERS LANGS []\n",
+      "CORPUS #17058: [2017-08-11_11:21:20] new grouplist node #17166\n",
+      "CORPUS #17058: [2017-08-11_11:21:20] new occs node #17167\n",
+      "compute_ti_ranking\n",
+      "2017-08-11_11:21:20 : Starting Query tf_nd_query\n",
+      "2017-08-11_11:21:21 : End Query tf_nd_quer\n",
+      "2017-08-11_11:21:21 : tfidfsum\n",
+      "CORPUS #17058: [2017-08-11_11:21:21] new ti ranking node #17168\n",
+      "MAINLIST: keeping 2908 ngrams out of 3878\n",
+      "CORPUS #17058: [2017-08-11_11:21:21] new mainlist node #17169\n",
+      "Compute TFIDF local\n",
+      "CORPUS #17058: [2017-08-11_11:21:22] new localtfidf node #17170\n",
+      "COOCS: NEW matrix shape [220x807]\n",
+      "CORPUS #17058: [2017-08-11_11:21:23] computed mainlist coocs for specif rank\n",
+      "SPECIFICITY: computing on 154 ngrams\n",
+      "CORPUS #17058: [2017-08-11_11:21:23] new spec-clusion node #17171\n",
+      "CORPUS #17058: [2017-08-11_11:21:23] new gen-clusion node #17172\n",
+      "MAPLIST quotas: {'topgen': {'multigrams': 168, 'monograms': 42}, 'topspec': {'multigrams': 112, 'monograms': 28}}\n",
+      "MAPLIST: top_spec_monograms = 28\n",
+      "MAPLIST: top_spec_multigrams = 41\n",
+      "MAPLIST: top_gen_monograms = 42\n",
+      "MAPLIST: top_gen_multigrams = 0\n",
+      "MAPLIST: kept 111 ngrams in total \n",
+      "CORPUS #17058: [2017-08-11_11:21:23] new maplist node #17173\n",
+      "CORPUS #17058: [2017-08-11_11:21:23] FINISHED ngram lists computation\n"
     ]
    }
   ],
   "source": [
-    "project = myProject_fromUrl(\"http://imt.gargantext.org/projects/300535\")\n",
-    "corpus  = newCorpus(project, name=\"Machine learning\", query=\"LSTM\")"
+    "#project = myProject_fromUrl(\"http://imt.gargantext.org/projects/300535\")\n",
+    "project = myProject_fromUrl(\"http://localhost:8000/projects/2\")\n",
+    "corpus  = newCorpus(project, source=\"hal\", name=\"Machine learning\", query=\"LSTM\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "session.query(Node.hyperdata[\"\"])"
@@ -151,27 +123,21 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
@@ -181,9 +147,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "for doc in new_docs:\n",
@@ -218,11 +182,7 @@
  {
   "cell_type": "code",
   "execution_count": 4,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
@@ -244,11 +204,7 @@
  {
   "cell_type": "code",
   "execution_count": 5,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "# To get all the documents:\n",
@@ -258,11 +214,7 @@
  {
   "cell_type": "code",
   "execution_count": 6,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "outputs": [
    {
     "data": {
@@ -284,11 +236,7 @@
  {
   "cell_type": "code",
   "execution_count": 7,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "outputs": [
    {
     "data": {
@@ -309,11 +257,7 @@
  {
   "cell_type": "code",
   "execution_count": 8,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "outputs": [
    {
     "data": {
@@ -334,11 +278,7 @@
  {
   "cell_type": "code",
   "execution_count": 9,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "outputs": [
    {
     "data": {
@@ -360,9 +300,7 @@
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
+    "collapsed": true
   },
   "outputs": [],
   "source": [
@@ -373,11 +311,7 @@
  {
   "cell_type": "code",
   "execution_count": 11,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "outputs": [
    {
     "data": {
@@ -406,10 +340,7 @@
  },
  {
   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "source": [
    "## Title\n",
    "\n",
@@ -422,19 +353,14 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
+    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "source": [
    "# Lang Cleaning tools"
   ]
@@ -442,11 +368,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "detect_lang(\"Ceci est une phrase en français.\")"
@@ -455,11 +377,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "detect_lang(\"This is an english sentence.\")"
@@ -468,11 +386,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "chart(docs, \"language_iso2\").plot.bar()"
@@ -481,11 +395,7 @@
  {
   "cell_type": "code",
   "execution_count": 14,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "outputs": [
    {
     "data": {
@@ -516,9 +426,7 @@
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
+    "collapsed": true
   },
   "outputs": [],
   "source": [
@@ -534,11 +442,7 @@
  {
   "cell_type": "code",
   "execution_count": 13,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "outputs": [
    {
     "data": {
@@ -558,11 +462,7 @@
  {
   "cell_type": "code",
   "execution_count": 15,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "outputs": [
    {
     "data": {
@@ -606,9 +506,7 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
+    "collapsed": true
   },
   "outputs": [],
   "source": [
@@ -617,10 +515,7 @@
  },
  {
   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "source": [
    "# Measures IMT Tools"
   ]
@@ -628,11 +523,7 @@
  {
   "cell_type": "code",
   "execution_count": 16,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "outputs": [
    {
     "ename": "ConnectionError",
@@ -680,11 +571,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "# Request syntax\n",
@@ -701,11 +588,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "# Forces / Faiblesses de l'IMT\n",
@@ -720,9 +603,7 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
+    "collapsed": true
   },
   "outputs": [],
   "source": [
@@ -734,11 +615,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "# Then chart it to see your strenght and weakness!\n",
@@ -749,9 +626,7 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
+    "collapsed": true
   },
   "outputs": [],
   "source": []
@@ -760,9 +635,7 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
+    "collapsed": true
   },
   "outputs": [],
   "source": []
@@ -771,9 +644,7 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
+    "collapsed": true
   },
   "outputs": [],
   "source": []
@@ -782,9 +653,7 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
+    "collapsed": true
   },
   "outputs": [],
   "source": []
@@ -793,9 +662,7 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
+    "collapsed": true
   },
   "outputs": [],
   "source": []
@@ -804,19 +671,14 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
+    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "source": [
    "# Graph generation"
   ]
@@ -825,9 +687,7 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
+    "collapsed": true
   },
   "outputs": [],
   "source": [
@@ -838,9 +698,7 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
+    "collapsed": true
   },
   "outputs": [],
   "source": [
@@ -849,10 +707,7 @@
  },
  {
   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
+   "metadata": {},
   "source": [
    "# List Management"
   ]
@@ -861,9 +716,7 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
+    "collapsed": true
   },
   "outputs": [],
   "source": [
@@ -874,9 +727,7 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
+    "collapsed": true
   },
   "outputs": [],
   "source": [
@@ -900,7 +751,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.5.3rc1"
+   "version": "3.5.3"
  }
 },
 "nbformat": 4,

--- a/templates/pages/projects/project.html
+++ b/templates/pages/projects/project.html
@@ -532,7 +532,7 @@
                                        $("#submit_thing").html("Process a {{ query_size }} sample!")

                                        thequeries = data
-                                        var N=0,k=0;
+                                        var N=0;

                                        for(var i in thequeries) N += thequeries[i].count
                                        if( N>0) {
@@ -571,12 +571,11 @@
                                        $("#submit_thing").html("Process a {{ query_size }} sample!")

                                        thequeries = data
-                                        var N=data.length,k=0;
-                                        // for(var i in thequeries) N += thequeries[i].count
-                                        if( N>1) {
-                                            var total = JSON.parse(data).total
-                                            console.log("N: "+total)
-                                            $("#theresults").html("<i> <b>"+pubmedquery+"</b>: "+total+" publications.</i><br>")
+                                        var N = data.total;
+
+                                        if (N > 0) {
+                                            console.log("N: "+N)
+                                            $("#theresults").html("<i> <b>"+pubmedquery+"</b>: "+N+" publications.</i><br>")
                                            $('#submit_thing').prop('disabled', false);
                                        } else {
                                            $("#theresults").html("<i>  <b>"+data[0]+"</b></i><br>")