[FIX]

29942ecd · Alexandre Delanoë · 32de2d3f · 742b8194 · 29942ecd · 29942ecd
Commit 29942ecd authored May 25, 2018 by Alexandre Delanoë
47 changed files
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,11 @@
 * Guided Tour
 * Sources form highlighting crawlers

+## Version 3.0.8.1
+* WOS parser date FIX
+* EUROPRESS parser author/text article FIX
+* Backend: each project as user node as parent
+
 ## Version 3.0.7
 * Alembic implemented to manage database migrations


--- a/alembic/README
+++ b/alembic/README
@@ -6,6 +6,21 @@ Keep in mind that Alembic only handles SQLAlchemy models: tables created from
 Django ORM must be put out of Alembic sight. See [alembic:exclude] section in
 alembic.ini.

+To bootstrap Alembic where a gargantext database is already existing see
+below: TELL ALEMBIC TO NOT START FROM SCRATCH.
+
+
+USUAL WORKFLOW WITH ALEMBIC
+
+1. Make change to models in gargantext/models
+2. Autogenerate revision (see below GENERATE A REVISION)
+3. Manually check and edit revision file in alembic/versions
+4. Commit alembic revision (it should never be reverted)
+5. Commit changes in models (it can be reverted if needed)
+
+To create, drop or modify views, schemas, roles, stored procedures, triggers or
+policies see below: REPLACEABLE OBJECTS.
+

 TELL ALEMBIC TO NOT START FROM SCRATCH

@@ -29,25 +44,76 @@ DOWNGRADE TO INITIAL DATABASE STATE
    alembic downgrade base


-GENERATE A NEW REVISION
+GENERATE A REVISION

-    alembic revision -m "Message for this migration"
+    alembic revision --autogenerate -m "Message for this migration"

    # A migration script is then created in alembic/versions directory. For
    # example alembic/versions/3adcc9a56557_message_for_this_migration.py
    # where 3adcc9a56557 is the revision id generated by Alembic.
    #
+    # Alembic should generate a script reflecting changes already made in
+    # models or database. However it is always a good idea to check it and edit
+    # it manually, Alembic is not always accurate and can't see all alterations.
+    # It should work with basic changes such as model or column creation. See
+    # http://alembic.zzzcomputing.com/en/latest/autogenerate.html#what-does-autogenerate-detect-and-what-does-it-not-detect
+
+
+GENERATE AN EMPTY REVISION
+
+    alembic revision -m "Message for this migration"
+
    # This script must be edited to write the migration itself, mainly
    # in `upgrade` and `downgrade` functions. See Alembic documentation for
    # further details.


-GENERATE A REVISION FROM CURRENT STATE
+REPLACEABLE OBJECTS

-    alembic revision --autogenerate -m "Message for this migration"
+There is no specific way no handle views, schemas, roles, stored procedures,
+triggers or policies with Alembic. To ease revisions of such objects, avoid
+boilerplate code and too much op.execute we use an enhanced version of
+ReplaceableObject recipe (see Alembic documentation).

-    # Alembic should generate a script reflecting changes already made in
-    # database. However it is always a good idea to check it and edit it
-    # manually, Alembic is not always accurate and can't see all alterations.
-    # It should work with basic changes such as model or column creation. See
-    # http://alembic.zzzcomputing.com/en/latest/autogenerate.html#what-does-autogenerate-detect-and-what-does-it-not-detect
+To create, drop or modify such object you need to make a ReplaceableObject
+instance, and then use create_*, drop_* or replace_* method of alembic.op.
+Conversion between ReplaceableObject and SQL is implemented in
+gargantext/util/alembic.py.
+
+* Views: create_view(ReplaceableObject(<name>, <query>))
+* Roles: create_role(ReplaceableObject(<name>, <options>))
+* Schemas: create_schema(ReplaceableObject(<name>))
+* Stored procedures: create_sp(ReplaceableObject(<name(arguments)>, <body>)
+* Triggers: create_trigger(ReplaceableObject(<name>, <when>, <table>, <body>))
+* Policies: create_policy(ReplaceableObject(<name>, <table>, <body>))
+
+Here is an example with a stored procedure:
+
+    ...
+    from gargantext.util.alembic import ReplaceableObject
+
+    revision = '08230100f512'
+    ...
+
+    my_function_sp = ReplaceableObject(
+        "my_function()", "RETURNS integer AS $$ SELECT 42 $$ LANGUAGE sql")
+
+    def upgrade():
+        op.create_sp(my_function_sp)
+
+    def downgrade():
+        op.drop_sp(my_function_sp)
+
+To modify this stored procedure in a later revision:
+
+    ...
+    from gargantext.util.alembic import ReplaceableObject
+
+    my_function_sp = ReplaceableObject(
+        "my_function()", "RETURNS integer AS $$ SELECT 43 $$ LANGUAGE sql")
+
+    def upgrade():
+        op.replace_sp(my_function_sp, replaces="08230100f512.my_function_sp")
+
+    def downgrade():
+        op.replace_sp(my_function_sp, replace_with="08230100f512.my_function_sp")
--- a/alembic/env.py
+++ b/alembic/env.py
@@ -18,7 +18,8 @@ from gargantext import settings, models
 # this is the Alembic Config object, which provides
 # access to the values within the .ini file in use.
 config = context.config
-config.set_main_option("sqlalchemy.url", settings.DATABASES['default']['URL'])
+config.set_main_option("sqlalchemy.url",
+    settings.DATABASES['default']['SECRET_URL'])

 # Interpret the config file for Python logging.
 # This line sets up loggers basically.
@@ -52,6 +53,14 @@ def include_object(obj, name, typ, reflected, compare_to):
        return True


+context_opts = dict(
+    target_metadata=target_metadata,
+    include_object=include_object,
+    compare_server_default=True,
+    compare_type=True,
+)
+
+
 def run_migrations_offline():
    """Run migrations in 'offline' mode.

@@ -65,9 +74,7 @@ def run_migrations_offline():

    """
    url = config.get_main_option("sqlalchemy.url")
-    context.configure(
-        url=url, target_metadata=target_metadata, literal_binds=True,
-        include_object=include_object)
+    context.configure(url=url, literal_binds=True, **context_opts)

    with context.begin_transaction():
        context.run_migrations()
@@ -86,11 +93,7 @@ def run_migrations_online():
        poolclass=pool.NullPool)

    with connectable.connect() as connection:
-        context.configure(
-            connection=connection,
-            target_metadata=target_metadata,
-            include_object=include_object
-        )
+        context.configure(connection=connection, **context_opts)

        with context.begin_transaction():
            context.run_migrations()

--- a/alembic/versions/159a5154362b_fix_bug_in_title_abstract_indexation.py
+++ b/alembic/versions/159a5154362b_fix_bug_in_title_abstract_indexation.py
+"""Fix bug in title_abstract indexation
+
+Revision ID: 159a5154362b
+Revises: 73112a361617
+Create Date: 2017-09-18 18:00:26.055335
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from gargantext.util.alembic import ReplaceableObject
+
+
+# revision identifiers, used by Alembic.
+revision = '159a5154362b'
+down_revision = '73112a361617'
+branch_labels = None
+depends_on = None
+
+
+title_abstract_insert = ReplaceableObject(
+    'title_abstract_insert',
+    'BEFORE INSERT',
+    'nodes',
+    """FOR EACH ROW
+       WHEN (NEW.hyperdata::text <> '{}'::text)
+       EXECUTE PROCEDURE title_abstract_update_trigger()"""
+)
+
+
+title_abstract_update = ReplaceableObject(
+    'title_abstract_update',
+    'BEFORE UPDATE OF hyperdata',
+    'nodes',
+    """FOR EACH ROW
+       WHEN ((OLD.hyperdata ->> 'title', OLD.hyperdata ->> 'abstract')
+             IS DISTINCT FROM
+             (NEW.hyperdata ->> 'title', NEW.hyperdata ->> 'abstract'))
+       EXECUTE PROCEDURE title_abstract_update_trigger()"""
+)
+
+
+def upgrade():
+    op.replace_trigger(title_abstract_insert, replaces="73112a361617.title_abstract_insert")
+    op.replace_trigger(title_abstract_update, replaces="73112a361617.title_abstract_update")
+
+    # Manually re-build index
+    op.execute("UPDATE nodes SET title_abstract = to_tsvector('english', (hyperdata ->> 'title') || ' ' || (hyperdata ->> 'abstract')) WHERE typename=4")
+
+
+def downgrade():
+    # Won't unfix the bug !
+    pass
--- a/alembic/versions/1fb4405b59e1_add_english_fulltext_index_on_nodes_.py
+++ b/alembic/versions/1fb4405b59e1_add_english_fulltext_index_on_nodes_.py
+"""Add english fulltext index on Nodes.hyperdata for abstract and title
+
+Revision ID: 1fb4405b59e1
+Revises: bedce47c9e34
+Create Date: 2017-09-13 16:31:36.926692
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy_utils.types import TSVectorType
+from gargantext.util.alembic import ReplaceableObject
+
+
+# revision identifiers, used by Alembic.
+revision = '1fb4405b59e1'
+down_revision = 'bedce47c9e34'
+branch_labels = None
+depends_on = None
+
+
+title_abstract_update_trigger = ReplaceableObject(
+    'title_abstract_update_trigger()',
+    """
+RETURNS trigger AS $$
+begin
+  new.title_abstract := to_tsvector('english', (new.hyperdata ->> 'title') || ' ' || (new.hyperdata ->> 'abstract'));
+  return new;
+end
+$$ LANGUAGE plpgsql;
+    """
+)
+
+
+title_abstract_update = ReplaceableObject(
+    'title_abstract_update',
+    'BEFORE INSERT OR UPDATE',
+    'nodes',
+    'FOR EACH ROW EXECUTE PROCEDURE title_abstract_update_trigger()'
+)
+
+
+def upgrade():
+    op.add_column('nodes', sa.Column('title_abstract', TSVectorType))
+    op.create_sp(title_abstract_update_trigger)
+    op.create_trigger(title_abstract_update)
+
+    # Initialize index with already existing data
+    op.execute('UPDATE nodes SET hyperdata = hyperdata');
+
+
+def downgrade():
+    op.drop_trigger(title_abstract_update)
+    op.drop_sp(title_abstract_update_trigger)
+    op.drop_column('nodes', 'title_abstract')
--- a/alembic/versions/73112a361617_optimize_title_abstract_indexation.py
+++ b/alembic/versions/73112a361617_optimize_title_abstract_indexation.py
+"""Optimize title_abstract indexation
+
+Revision ID: 73112a361617
+Revises: 1fb4405b59e1
+Create Date: 2017-09-15 14:14:51.737963
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from gargantext.util.alembic import ReplaceableObject
+
+
+# revision identifiers, used by Alembic.
+revision = '73112a361617'
+down_revision = '1fb4405b59e1'
+branch_labels = None
+depends_on = None
+
+
+title_abstract_insert = ReplaceableObject(
+    'title_abstract_insert',
+    'AFTER INSERT',
+    'nodes',
+    """FOR EACH ROW
+       WHEN (NEW.hyperdata::text <> '{}'::text)
+       EXECUTE PROCEDURE title_abstract_update_trigger()"""
+)
+
+
+title_abstract_update = ReplaceableObject(
+    'title_abstract_update',
+    'AFTER UPDATE OF hyperdata',
+    'nodes',
+    """FOR EACH ROW
+       WHEN ((OLD.hyperdata ->> 'title', OLD.hyperdata ->> 'abstract')
+             IS DISTINCT FROM
+             (NEW.hyperdata ->> 'title', NEW.hyperdata ->> 'abstract'))
+       EXECUTE PROCEDURE title_abstract_update_trigger()"""
+)
+
+
+def upgrade():
+    op.replace_trigger(title_abstract_update, replaces="1fb4405b59e1.title_abstract_update")
+    op.create_trigger(title_abstract_insert)
+
+
+def downgrade():
+    op.drop_trigger(title_abstract_insert)
+    op.replace_trigger(title_abstract_update, replace_with="1fb4405b59e1.title_abstract_update")
--- a/alembic/versions/73304ae9f1fb_add_server_side_sensible_defaults_for_.py
+++ b/alembic/versions/73304ae9f1fb_add_server_side_sensible_defaults_for_.py
+"""Add server side sensible defaults for nodes
+
+Revision ID: 73304ae9f1fb
+Revises: 159a5154362b
+Create Date: 2017-10-05 14:17:58.326646
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import gargantext
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = '73304ae9f1fb'
+down_revision = '159a5154362b'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.alter_column('nodes', 'date',
+               existing_type=postgresql.TIMESTAMP(timezone=True),
+               server_default=sa.text('CURRENT_TIMESTAMP'),
+               nullable=False)
+    op.alter_column('nodes', 'hyperdata',
+               existing_type=postgresql.JSONB(astext_type=sa.Text()),
+               server_default=sa.text("'{}'::jsonb"),
+               nullable=False)
+    op.alter_column('nodes', 'name',
+               existing_type=sa.VARCHAR(length=255),
+               server_default='',
+               nullable=False)
+    op.alter_column('nodes', 'typename',
+               existing_type=sa.INTEGER(),
+               nullable=False)
+    op.alter_column('nodes', 'user_id',
+               existing_type=sa.INTEGER(),
+               nullable=False)
+
+
+def downgrade():
+    op.alter_column('nodes', 'user_id',
+               existing_type=sa.INTEGER(),
+               nullable=True)
+    op.alter_column('nodes', 'typename',
+               existing_type=sa.INTEGER(),
+               nullable=True)
+    op.alter_column('nodes', 'name',
+               existing_type=sa.VARCHAR(length=255),
+               server_default=None,
+               nullable=True)
+    op.alter_column('nodes', 'hyperdata',
+               existing_type=postgresql.JSONB(astext_type=sa.Text()),
+               server_default=None,
+               nullable=True)
+    op.alter_column('nodes', 'date',
+               existing_type=postgresql.TIMESTAMP(timezone=True),
+               server_default=None,
+               nullable=True)
--- a/annotations/romain_infos.md
+++ b/annotations/romain_infos.md
--- a/annotations/static/annotations/http.js
+++ b/annotations/static/annotations/http.js
@@ -7,6 +7,12 @@
    $httpProvider.defaults.xsrfHeaderName = 'X-CSRFToken';
    $httpProvider.defaults.xsrfCookieName = 'csrftoken';
  }]);
+
+  function url(path) {
+    // adding explicit "http[s]://" -- for cross origin requests
+    return location.protocol + '//' + window.GARG_ROOT_URL + path;
+  }
+
  /*
  * DocumentHttpService: Read Document
  * ===================
@@ -98,9 +104,7 @@
  */
  http.factory('MainApiAddNgramHttpService', function($resource) {
    return $resource(
-       // adding explicit "http://" b/c this a cross origin request
-      'http://' + window.GARG_ROOT_URL
-                + "/api/ngrams?text=:ngramStr&corpus=:corpusId&testgroup",
+      url("/api/ngrams?text=:ngramStr&corpus=:corpusId&testgroup"),
      {
        ngramStr: '@ngramStr',
        corpusId: '@corpusId',
@@ -131,9 +135,7 @@

  http.factory('MainApiChangeNgramHttpService', function($resource) {
    return $resource(
-       // adding explicit "http://" b/c this a cross origin request
-      'http://' + window.GARG_ROOT_URL
-                + "/api/ngramlists/change?list=:listId&ngrams=:ngramIdList",
+      url("/api/ngramlists/change?list=:listId&ngrams=:ngramIdList"),
      {
        listId: '@listId',
        ngramIdList: '@ngramIdList'  // list in str form (sep=","): "12,25,30"
@@ -171,8 +173,7 @@
  */
  http.factory('MainApiFavoritesHttpService', function($resource) {
    return $resource(
-       // adding explicit "http://" b/c this a cross origin request
-      'http://' + window.GARG_ROOT_URL  + "/api/nodes/:corpusId/favorites?docs=:docId",
+      url("/api/nodes/:corpusId/favorites?docs=:docId"),
      {
        corpusId: '@corpusId',
        docId: '@docId'

--- a/annotations/templates/annotations/main.html
+++ b/annotations/templates/annotations/main.html
@@ -89,9 +89,9 @@
            </div>
            <div class="row-fluid">
              <ul class="list-group clearfix">
-                <li class="list-group-item small"><span class="badge">source</span>{[{source}]}</li>
-                <li class="list-group-item small"><span class="badge">authors</span>{[{authors}]}</li>
-                <li class="list-group-item small"><span class="badge">date</span>{[{publication_date}]}</li>
+                <li class="list-group-item small"><span class="badge">source</span>{[{source || '&nbsp;'}]}</li>
+                <li class="list-group-item small"><span class="badge">authors</span>{[{authors || '&nbsp;'}]}</li>
+                <li class="list-group-item small"><span class="badge">date</span>{[{publication_date || '&nbsp;'}]}</li>
              </ul>
            </div>


--- a/gargantext.ini
+++ b/gargantext.ini
@@ -2,8 +2,6 @@
 [uwsgi]


-# uwsgi --vacuum --socket monsite/mysite.sock --wsgi-file monsite/wsgi.py --chmod-socket=666 --home=/srv/alexandre.delanoe/env --chdir=/var/www/www/alexandre/monsite --env 
-
 env = DJANGO_SETTINGS_MODULE=gargantext.settings
 #module = django.core.handlers.wsgi:WSGIHandler()

@@ -44,7 +42,7 @@ touch-reload = /tmp/gargantext.reload


 # respawn processes taking more than 20 seconds
-harakiri = 120
+harakiri = 1200
 post-buffering=8192

 # limit the project to 128 MB
@@ -55,7 +53,18 @@ max-requests = 5000

 # background the process & log
 #daemonize = /var/log/uwsgi/gargantext.log
-
-uid = 1000
-gid = 1000
-
+daemonize = /var/log/gargantext/uwsgi/@(exec://date +%%Y-%%m-%%d_%%H%%M).log
+log-reopen = true
+
+#uid = 1000
+#gid = 1000
+#
+how-config=true
+disable-logging=false
+logfile-chmod=644
+#logfile-chown=false
+log-maxsize=500000000
+##logto=%(chdir)logs/uwsgi_access.log
+#logger = longquery file:%(chdir)logs/uwsgi_long.log
+#log-route = longquery msec
+#
--- a/gargantext/models/base.py
+++ b/gargantext/models/base.py
 from sqlalchemy.schema import Column, ForeignKey, UniqueConstraint, Index
 from sqlalchemy.orm import relationship, validates
 from sqlalchemy.types import TypeDecorator, \
-                             Integer, Float, Boolean, DateTime, String, Text
+                             Integer, REAL, Boolean, DateTime, String, Text
+from sqlalchemy_utils.types import TSVectorType
 from sqlalchemy.dialects.postgresql import JSONB, DOUBLE_PRECISION as Double
 from sqlalchemy.ext.mutable import MutableDict, MutableList
 from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy import text

-__all__ = ["Column", "ForeignKey", "UniqueConstraint", "relationship",
+__all__ = ["Column", "ForeignKey", "UniqueConstraint", "Index", "relationship",
+           "text",
           "validates", "ValidatorMixin",
           "Integer", "Float", "Boolean", "DateTime", "String", "Text",
+           "TSVectorType",
           "TypeDecorator",
           "JSONB", "Double",
           "MutableDict", "MutableList",
@@ -25,6 +29,16 @@ Base = declarative_base()
 DjangoBase = declarative_base()


+class Float(REAL):
+    """Reflect exact REAL type for PostgreSQL in order to avoid confusion
+    within Alembic type comparison"""
+
+    def __init__(self, *args, **kwargs):
+        if kwargs.get('precision') == 24:
+            kwargs.pop('precision')
+        super(Float, self).__init__(*args, **kwargs)
+
+
 class ValidatorMixin(object):
    def enforce_length(self, key, value):
        """Truncate a string according to its column length

--- a/gargantext/models/nodes.py
+++ b/gargantext/models/nodes.py
@@ -2,14 +2,11 @@ from gargantext.util.db import session
 from gargantext.util.files import upload
 from gargantext.constants import *

-# Uncomment to make column full text searchable
-#from sqlalchemy_utils.types import TSVectorType
-
 from datetime import datetime

 from .base import Base, Column, ForeignKey, relationship, TypeDecorator, Index, \
-                  Integer, Float, String, DateTime, JSONB, \
-                  MutableList, MutableDict, validates, ValidatorMixin
+                  Integer, Float, String, DateTime, JSONB, TSVectorType, \
+                  MutableList, MutableDict, validates, ValidatorMixin, text
 from .users import User

 __all__ = ['Node', 'NodeNode', 'CorpusNode']
@@ -47,7 +44,7 @@ class Node(ValidatorMixin, Base):
    >>> session.query(Node).filter_by(typename='USER').first() # doctest: +ELLIPSIS
    <UserNode(...)>

-    But beware, there are some caveats with bulk queries. In this case typename
+    But beware, there are some pitfalls with bulk queries. In this case typename
    MUST be specified manually.

    >>> session.query(UserNode).delete() # doctest: +SKIP
@@ -60,28 +57,33 @@ class Node(ValidatorMixin, Base):
            Index('nodes_user_id_typename_parent_id_idx', 'user_id', 'typename', 'parent_id'),
            Index('nodes_hyperdata_idx', 'hyperdata', postgresql_using='gin'))

-    # TODO
-    # create INDEX full_text_idx on nodes using gin(to_tsvector('english', hyperdata ->> 'abstract' || 'title'));
-
    id = Column(Integer, primary_key=True)

-    typename = Column(NodeType, index=True)
+    typename = Column(NodeType, index=True, nullable=False)
    __mapper_args__ = { 'polymorphic_on': typename }

    # foreign keys
-    user_id       = Column(Integer, ForeignKey(User.id, ondelete='CASCADE'))
+    user_id       = Column(Integer, ForeignKey(User.id, ondelete='CASCADE'),
+                           nullable=False)
    user          = relationship(User)

    parent_id     = Column(Integer, ForeignKey('nodes.id', ondelete='CASCADE'))
    parent        = relationship('Node', remote_side=[id])

-    name = Column(String(255))
-    date  = Column(DateTime(timezone=True), default=datetime.now)
+    name = Column(String(255), nullable=False, server_default='')
+    date = Column(DateTime(timezone=True), nullable=False,
+                  server_default=text('CURRENT_TIMESTAMP'))
+
+    hyperdata = Column(JSONB, default=dict, nullable=False,
+                       server_default=text("'{}'::jsonb"))

-    hyperdata     = Column(JSONB, default=dict)
-    # metadata (see https://bashelton.com/2014/03/updating-postgresql-json-fields-via-sqlalchemy/)
-    # To make search possible uncomment the line below
-    #search_vector = Column(TSVectorType('hyperdata'))
+    # Create a TSVECTOR column to use fulltext search feature of PostgreSQL.
+    # We need to create a trigger to update this column on update and insert,
+    # it's created in alembic/version/1fb4405b59e1_add_english_fulltext_index_on_nodes_.py
+    #
+    # To use this column: session.query(DocumentNode) \
+    #                            .filter(Node.title_abstract.match('keyword'))
+    title_abstract = Column(TSVectorType(regconfig='english'))

    def __new__(cls, *args, **kwargs):
        if cls is Node and kwargs.get('typename'):

--- a/gargantext/settings.py
+++ b/gargantext/settings.py
+"""
+Django settings for gargantext project.
+
+Generated by 'django-admin startproject' using Django 1.9.2.
+
+For more information on this file, see
+https://docs.djangoproject.com/en/1.9/topics/settings/
+
+For the full list of settings and their values, see
+https://docs.djangoproject.com/en/1.9/ref/settings/
+"""
+
+import os
+
+# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+# Quick-start development settings - unsuitable for production
+# See https://docs.djangoproject.com/en/1.9/howto/deployment/checklist/
+
+# SECURITY WARNING: keep the secret key used in production secret!
+SECRET_KEY = '!%ktkh981)piil1%t5r0g4$^0=uvdafk!=f2x8djxy7_gq(n5%'
+
+# SECURITY WARNING: don't run with debug turned on in production!
+DEBUG = True
+MAINTENANCE = False
+
+BASE_URL = "testing.gargantext.org"
+ALLOWED_HOSTS = ["localhost", ".gargantext.org", ".iscpif.fr",]
+
+
+# Asynchronous tasks
+
+import djcelery
+djcelery.setup_loader()
+
+BROKER_URL = 'amqp://guest:guest@localhost:5672/'
+
+CELERY_ACCEPT_CONTENT = ['pickle', 'json', 'msgpack', 'yaml']
+CELERY_TIMEZONE = 'Europe/Paris'
+CELERYBEAT_SCHEDULER = 'djcelery.schedulers.DatabaseScheduler'
+CELERY_IMPORTS = (
+                    "gargantext.util.toolchain",
+                    "gargantext.util.crawlers",
+                    "graph.graph",
+                    "moissonneurs.pubmed",
+                    "moissonneurs.istex",
+                    "gargantext.util.ngramlists_tools",
+                    )
+
+
+# garg's custom unittests runner (adapted to our db models)
+TEST_RUNNER = 'unittests.framework.GargTestRunner'
+
+# Application definition
+
+INSTALLED_APPS = [
+    'django.contrib.admin',
+    'django.contrib.auth',
+    'django.contrib.contenttypes',
+    'django.contrib.sessions',
+    'django.contrib.messages',
+    'django.contrib.staticfiles',
+    'rest_framework',
+    'djcelery',
+    'annotations',
+    'graph',
+    'moissonneurs',
+    'gargantext',
+]
+
+MIDDLEWARE_CLASSES = [
+    'django.middleware.security.SecurityMiddleware',
+    'django.contrib.sessions.middleware.SessionMiddleware',
+    'django.middleware.common.CommonMiddleware',
+    'django.middleware.csrf.CsrfViewMiddleware',
+    'django.contrib.auth.middleware.AuthenticationMiddleware',
+    'django.contrib.auth.middleware.SessionAuthenticationMiddleware',
+    'django.contrib.messages.middleware.MessageMiddleware',
+    'django.middleware.clickjacking.XFrameOptionsMiddleware',
+]
+
+ROOT_URLCONF = 'gargantext.urls'
+
+TEMPLATES = [
+    {
+        'BACKEND': 'django.template.backends.django.DjangoTemplates',
+        'DIRS': [
+            os.path.join(BASE_DIR, 'templates'),
+            #'./templates'
+        ],
+        'APP_DIRS': True,
+        'OPTIONS': {
+            'context_processors': [
+                'django.template.context_processors.debug',
+                'django.template.context_processors.request',
+                'django.contrib.auth.context_processors.auth',
+                'django.contrib.messages.context_processors.messages',
+            ],
+        },
+    },
+]
+
+WSGI_APPLICATION = 'gargantext.wsgi.application'
+
+# http://getblimp.github.io/django-rest-framework-jwt/#additional-settings
+REST_FRAMEWORK = {
+    'DEFAULT_PERMISSION_CLASSES': (
+        'rest_framework.permissions.IsAuthenticated',
+    ),
+    'DEFAULT_AUTHENTICATION_CLASSES': (
+        'rest_framework_jwt.authentication.JSONWebTokenAuthentication',
+        'rest_framework.authentication.SessionAuthentication',
+        'rest_framework.authentication.BasicAuthentication',
+    ),
+}
+
+JWT_AUTH = {
+    'JWT_VERIFY_EXPIRATION': False,
+    'JWT_SECRET_KEY': SECRET_KEY,
+    'JWT_AUTH_HEADER_PREFIX': 'Bearer',
+}
+
+# Static files (CSS, JavaScript, Images)
+# https://docs.djangoproject.com/en/1.9/howto/static-files/
+
+STATIC_ROOT = '/srv/gargantext_static/'
+STATIC_URL = '/static/'
+
+STATICFILES_DIRS = (
+    os.path.join(BASE_DIR, 'static'),
+)
+
+STATICFILES_FINDERS = (
+    'django.contrib.staticfiles.finders.AppDirectoriesFinder',
+    'django.contrib.staticfiles.finders.FileSystemFinder',
+)
+
+MEDIA_ROOT = '/srv/gargantext_media'
+#MEDIA_ROOT = os.path.join(PROJECT_PATH, 'media')
+MEDIA_URL   = '/media/'
+
+
+# Database
+# https://docs.djangoproject.com/en/1.9/ref/settings/#databases
+
+DATABASES = {
+    'default': {
+        'ENGINE': 'django.db.backends.postgresql_psycopg2',
+        'NAME': 'gargandb',
+        'USER': 'gargantua',
+        'PASSWORD': 'C8kdcUrAQy66U',
+        'HOST': '127.0.0.1',
+        'PORT': '5432',
+        'TEST': {
+            'NAME': 'test_gargandb',
+        },
+    }
+}
+DATABASES['default']['SECRET_URL'] = \
+    'postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}:{PORT}/{NAME}'.format(
+        **DATABASES['default']
+    )
+
+# Password validation
+# https://docs.djangoproject.com/en/1.9/ref/settings/#auth-password-validators
+
+AUTH_PASSWORD_VALIDATORS = [
+    {
+        'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
+    },
+    {
+        'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
+    },
+    {
+        'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
+    },
+    {
+        'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
+    },
+]
+
+API_TOKENS = {
+    "CERN": {
+        "APIKEY":'',
+        "APISECRET":'',
+    },
+    "MULTIVAC": {
+        "APIKEY": ""
+    }
+}
+
+# Internationalization
+# https://docs.djangoproject.com/en/1.9/topics/i18n/
+
+LANGUAGE_CODE = 'en-us'
+
+TIME_ZONE = None
+
+USE_I18N = True
+
+USE_L10N = True
+
+USE_TZ = True
+
+# BOOL Interpreter
+BOOL_TOOLS_PATH="/srv/gargantext/gargantext/util/crawlers/sparql"
--- a/gargantext/util/alembic.py
+++ b/gargantext/util/alembic.py
@@ -7,6 +7,8 @@ the name of "ReplaceableObject" class.
 This recipe is directly borrowed from Alembic documentation, see
 http://alembic.zzzcomputing.com/en/latest/cookbook.html#replaceable-objects

+**2017-10-09** ReversibleOp.define has been added to reduce boilerplate code.
+
 """

 from alembic.operations import Operations, MigrateOperation
@@ -16,9 +18,9 @@ __all__ = ['ReplaceableObject']


 class ReplaceableObject(object):
-    def __init__(self, name, sqltext):
+    def __init__(self, name, *args):
        self.name = name
-        self.sqltext = sqltext
+        self.args = args


 class ReversibleOp(MigrateOperation):
@@ -58,38 +60,41 @@ class ReversibleOp(MigrateOperation):
        operations.invoke(drop_old)
        operations.invoke(create_new)

+    @classmethod
+    def define(cls, name, cname=None, register=Operations.register_operation):
+        def create(self):
+            return CreateOp(self.target)

-@Operations.register_operation("create_view", "invoke_for_target")
-@Operations.register_operation("replace_view", "replace")
-class CreateViewOp(ReversibleOp):
-    def reverse(self):
-        return DropViewOp(self.target)
+        def drop(self):
+            return DropOp(self.target)

+        name = name.lower()
+        cname = cname or name.capitalize()

-@Operations.register_operation("drop_view", "invoke_for_target")
-class DropViewOp(ReversibleOp):
-    def reverse(self):
-        return CreateViewOp(self.view)
+        CreateOp = type('Create%sOp' % cname, (ReversibleOp,), {'reverse': drop})
+        DropOp = type('Drop%sOp' % cname, (ReversibleOp,), {'reverse': create})

+        CreateOp = register('create_' + name, 'invoke_for_target')(CreateOp)
+        CreateOp = register('replace_' + name, 'replace')(CreateOp)

-@Operations.register_operation("create_sp", "invoke_for_target")
-@Operations.register_operation("replace_sp", "replace")
-class CreateSPOp(ReversibleOp):
-    def reverse(self):
-        return DropSPOp(self.target)
+        DropOp = register('drop_' + name, 'invoke_for_target')(DropOp)

+        return (CreateOp, DropOp)

-@Operations.register_operation("drop_sp", "invoke_for_target")
-class DropSPOp(ReversibleOp):
-    def reverse(self):
-        return CreateSPOp(self.target)
+
+CreateViewOp,    DropViewOp    = ReversibleOp.define('view')
+CreateRoleOp,    DropRoleOp    = ReversibleOp.define('role')
+CreateSchemaOp,  DropSchemaOp  = ReversibleOp.define('schema')
+CreateSPOp,      DropSPOp      = ReversibleOp.define('sp', 'SP')
+CreateTriggerOp, DropTriggerOp = ReversibleOp.define('trigger')
+CreatePolicyOp,  DropPolicyOp  = ReversibleOp.define('policy')


 @Operations.implementation_for(CreateViewOp)
 def create_view(operations, operation):
    operations.execute("CREATE VIEW %s AS %s" % (
        operation.target.name,
-        operation.target.sqltext
+        operation.target.args[0]
    ))


@@ -98,11 +103,37 @@ def drop_view(operations, operation):
    operations.execute("DROP VIEW %s" % operation.target.name)


+@Operations.implementation_for(CreateRoleOp)
+def create_role(operations, operation):
+    args = operation.target.args
+    operations.execute(
+        "CREATE ROLE %s WITH %s" % (
+            operation.target.name,
+            args[0] if len(args) else 'NOLOGIN'
+        )
+    )
+
+
+@Operations.implementation_for(DropRoleOp)
+def drop_role(operations, operation):
+    operations.execute("DROP ROLE %s" % operation.target.name)
+
+
+@Operations.implementation_for(CreateSchemaOp)
+def create_schema(operations, operation):
+    operations.execute("CREATE SCHEMA %s" % operation.target.name)
+
+
+@Operations.implementation_for(DropSchemaOp)
+def drop_schema(operations, operation):
+    operations.execute("DROP SCHEMA %s" % operation.target.name)
+
+
 @Operations.implementation_for(CreateSPOp)
 def create_sp(operations, operation):
    operations.execute(
        "CREATE FUNCTION %s %s" % (
-            operation.target.name, operation.target.sqltext
+            operation.target.name, operation.target.args[0]
        )
    )

@@ -110,3 +141,44 @@ def create_sp(operations, operation):
 @Operations.implementation_for(DropSPOp)
 def drop_sp(operations, operation):
    operations.execute("DROP FUNCTION %s" % operation.target.name)
+
+
+@Operations.implementation_for(CreateTriggerOp)
+def create_trigger(operations, operation):
+    args = operation.target.args
+    operations.execute(
+        "CREATE TRIGGER %s %s ON %s %s" % (
+            operation.target.name, args[0], args[1], args[2]
+        )
+    )
+
+
+@Operations.implementation_for(DropTriggerOp)
+def drop_trigger(operations, operation):
+    operations.execute(
+        "DROP TRIGGER %s ON %s" % (
+            operation.target.name,
+            operation.target.args[1]
+        )
+    )
+
+
+@Operations.implementation_for(CreatePolicyOp)
+def create_policy(operations, operation):
+    operations.execute(
+        "CREATE POLICY %s ON %s %s" % (
+            operation.target.name,
+            operation.target.args[0],
+            operation.target.args[1],
+        )
+    )
+
+
+@Operations.implementation_for(DropPolicyOp)
+def drop_policy(operations, operation):
+    operations.execute(
+        "DROP POLICY %s ON %s" % (
+            operation.target.name,
+            operation.target.args[0],
+        )
+    )
--- a/gargantext/util/dates.py
+++ b/gargantext/util/dates.py
@@ -2,6 +2,7 @@ import os
 from gargantext.settings import MEDIA_ROOT

 from datetime import MINYEAR
+from dateutil.parser import parse as parse_datetime_flexible
 from django.utils.dateparse import parse_datetime
 from django.utils.timezone import datetime as _datetime, utc as UTC, now as utcnow

@@ -19,7 +20,8 @@ class datetime(_datetime):

    @staticmethod
    def parse(s):
-        dt = parse_datetime(s)
+        dt = parse_datetime(s) or \
+             parse_datetime_flexible(s, default=datetime(MINYEAR, 1, 1))
        return dt.astimezone(UTC) if dt.tzinfo else dt.replace(tzinfo=UTC)



--- a/gargantext/util/db.py
+++ b/gargantext/util/db.py
@@ -10,7 +10,7 @@ from sqlalchemy import delete

 def get_engine():
    from sqlalchemy import create_engine
-    return create_engine( settings.DATABASES['default']['URL']
+    return create_engine( settings.DATABASES['default']['SECRET_URL']
                        , use_native_hstore = True
                        , json_serializer = json_dumps
                        , pool_size=20, max_overflow=0
@@ -25,6 +25,7 @@ session = scoped_session(sessionmaker(bind=engine))
 ########################################################################
 from sqlalchemy.orm import aliased
 from sqlalchemy import func, desc
+from sqlalchemy.sql.expression import case

 ########################################################################
 # bulk insertions

--- a/gargantext/util/generators/credits.py
+++ b/gargantext/util/generators/credits.py
@@ -14,17 +14,6 @@ _members = [
     'picture' : 'david.jpg',
     'role':'principal investigator'},

-    { 'first_name' : 'Samuel', 'last_name' : 'Castillo J.',
-     'mail' : 'kaisleanATgmail.com',
-     'website'  : 'http://www.pksm3.droppages.com',
-     'picture' : 'samuel.jpg',
-     'role' : 'developer'},
-
-    { 'first_name' : 'Maziyar', 'last_name' : 'Panahi',
-     'mail' : '',
-     'website'  : 'http://iscpif.fr',
-     'picture' : 'maziyar.jpg',
-     'role' : 'developer'},

    { 'first_name' : 'Alexandre', 'last_name' : 'Delanoë',
     'mail' : 'alexandre+gargantextATdelanoe.org',
@@ -59,12 +48,19 @@ _membersPast = [
     'mail' : '',
     'website' : 'https://github.com/elishowk',
     'picture' : '', 'role' : 'developer'},
+    
+    { 'first_name' : 'Samuel', 'last_name' : 'Castillo J.',
+     'mail' : 'kaisleanATgmail.com',
+     'website'  : 'http://www.pksm3.droppages.com',
+     'picture' : 'samuel.jpg',
+     'role' : 'developer'},
+
        ]

 _institutions = [
-    { 'name' : 'Mines ParisTech', 'website' : 'http://mines-paristech.fr', 'picture' : 'mines.png', 'funds':''},
-    #{ 'name' : 'Institut Pasteur', 'website' : 'http://www.pasteur.fr', 'picture' : 'pasteur.png', 'funds':''},
+    { 'name' : 'Institut Mines Telecom', 'website' : 'https://www.imt.fr', 'picture' : 'IMT.jpg', 'funds':''},
    { 'name' : 'EHESS', 'website' : 'http://www.ehess.fr', 'picture' : 'ehess.png', 'funds':''},
+    { 'name' : 'Mines ParisTech', 'website' : 'http://mines-paristech.fr', 'picture' : 'mines.png', 'funds':''},
    #{ 'name' : '', 'website' : '', 'picture' : '', 'funds':''},
    # copy paste the line above and write your informations please
 ]
@@ -76,10 +72,11 @@ _labs = [
 ]

 _grants = [
-    { 'name' : 'Institut Mines Telecom', 'website' : 'https://www.imt.fr', 'picture' : 'IMT.jpg', 'funds':''},
    { 'name' : 'Forccast', 'website' : 'http://forccast.hypotheses.org/', 'picture' : 'forccast.png', 'funds':''},
    { 'name' : 'Mastodons', 'website' : 'http://www.cnrs.fr/mi/spip.php?article53&lang=fr', 'picture' : 'mastodons.png', 'funds':''},
-    #{ 'name' : 'ADEME', 'website' : 'http://www.ademe.fr', 'picture' : 'ademe.png', 'funds':''},
+    { 'name' : 'ADEME', 'website' : 'http://www.ademe.fr', 'picture' : 'ademe.png', 'funds':''},
+    { 'name' : 'Institut Pasteur', 'website' : 'http://www.pasteur.fr', 'picture' : 'pasteur.png', 'funds':''},
+    { 'name' : 'Scoap 3', 'website' : 'https://scoap3.org/', 'picture' : 'cern.png', 'funds':''},
    #{ 'name' : '', 'website' : '', 'picture' : '', 'funds':''},
    # copy paste the line above and write your informations please
 ]

--- a/gargantext/util/ngramlists_tools.py
+++ b/gargantext/util/ngramlists_tools.py
@@ -8,7 +8,7 @@ Tools to work with ngramlists (MAINLIST, MAPLIST, STOPLIST)
 """

 from gargantext.util.group_tools import query_groups, group_union
-from gargantext.util.db          import session, bulk_insert_ifnotexists
+from gargantext.util.db          import session, bulk_insert_ifnotexists, desc
 from gargantext.models           import Ngram, NodeNgram, NodeNodeNgram, \
                                        NodeNgramNgram, Node


--- a/gargantext/util/parsers/CSV.py
+++ b/gargantext/util/parsers/CSV.py
 from ._Parser import Parser
-# from ..NgramsExtractors import *
-import sys
-import csv
-csv.field_size_limit(sys.maxsize)
-import numpy as np
+import pandas
+import io


 class CSVParser(Parser):
-    DELIMITERS = ", \t;|:"
+    ENCODING = "utf-8"

-    def detect_delimiter(self, lines, sample_size=10):
-        sample = lines[:sample_size]
+    def open(self, file):
+        f = super(CSVParser, self).open(file)

-        # Compute frequency of each delimiter on each input line
-        delimiters_freqs = {
-            d: [line.count(d) for line in sample]
-            for d in self.DELIMITERS
-        }
+        if isinstance(file, str) and file.endswith('.zip'):
+            return f

-        # Select delimiters with a standard deviation of zero, ie. delimiters
-        # for which we have the same number of fields on each line
-        selected_delimiters = [
-            (d, np.sum(freqs))
-            for d, freqs in delimiters_freqs.items()
-            if any(freqs) and np.std(freqs) == 0
-        ]
+        return io.TextIOWrapper(f, encoding=self.ENCODING)

-        if selected_delimiters:
-            # Choose the delimiter with highest frequency amongst selected ones
-            sorted_delimiters = sorted(selected_delimiters, key=lambda x: x[1])
-            return sorted_delimiters[-1][0]
-
-    def parse(self, filebuf):
-        print("CSV: parsing (assuming UTF-8 and LF line endings)")
-
-        contents = filebuf.read().decode("UTF-8").split("\n")
-
-        # Filter out empty lines
-        contents = [line for line in contents if line.strip()]
-
-        # Delimiter auto-detection
-        delimiter = self.detect_delimiter(contents, sample_size=10)
-
-        if delimiter is None:
-            raise ValueError("CSV: couldn't detect delimiter, bug or malformed data")
-
-        print("CSV: selected delimiter: %r" % delimiter)
-
-        # Parse CSV
-        reader = csv.reader(contents, delimiter=delimiter)
-
-        # Get first not empty row and its fields (ie. header row), or (0, [])
-        first_row, headers = \
-            next(((i, fields) for i, fields in enumerate(reader) if any(fields)),
-                 (0, []))
-
-        # Get first not empty column of the first row, or 0
-        first_col = next((i for i, field in enumerate(headers) if field), 0)
-
-        # Strip out potential empty fields in headers
-        headers = headers[first_col:]
+    def parse(self, fp=None):
+        fp = fp or self._file
+        df = pandas.read_csv(fp, dtype=object, engine='python',
+                                 skip_blank_lines=True, sep=None,
+                                 na_values=[], keep_default_na=False)

        # Return a generator of dictionaries with column labels as keys,
        # filtering out empty rows
-        for i, fields in enumerate(reader):
+        for i, fields in enumerate(df.itertuples(index=False)):
            if i % 500 == 0:
                print("CSV: parsing row #%s..." % (i+1))
-            if any(fields):
-                yield dict(zip(headers, fields[first_col:]))
+
+            # See https://docs.python.org/3/library/collections.html#collections.somenamedtuple._asdict
+            yield fields._asdict()
--- a/gargantext/util/parsers/EUROPRESSE.py
+++ b/gargantext/util/parsers/EUROPRESSE.py
@@ -81,8 +81,9 @@ class EuropresseParser(Parser):
        #         "./header/div/p[@class='titreArticleVisu grandTitre']"
        #
        # title_xpath (chemin plus générique)
-        title_xpath         = "./header//*[contains(@class,'titreArticle')]"
-        text_xpath          = "./section/div[@class='DocText']//p"
+        title_xpath         = "./header//*[contains(@class,'titreArticleVisu rdp__articletitle')]"
+        authors_xpath       = "./header//*[contains(@class,'docAuthors')]"
+        text_xpath          = "./section/div[@class='DocText clearfix']//p"
        entire_header_xpath = "./header"

        # diagnosed during date retrieval and used for rubrique
@@ -144,6 +145,15 @@ class EuropresseParser(Parser):
                        yield(hyperdata)
                        continue

+                    # Authors
+                    # --------
+                    try:
+                        authors    = scrap_text(html_article.xpath(authors_xpath))
+                        hyperdata['authors'] = '; '.join([author for author in authors])
+
+                    except:
+                        pass
+

                    # FULLTEXT
                    # --------
@@ -154,6 +164,7 @@ class EuropresseParser(Parser):
                    except:
                        pass

+
                    # PUBLICATIONNAME
                    # ----------------
                    try:

--- a/gargantext/util/parsers/HAL.py
+++ b/gargantext/util/parsers/HAL.py
@@ -12,12 +12,12 @@ import json

 class HalParser(Parser):
    def _parse(self, json_docs):
-        
+
        hyperdata_list = []
-        
-        hyperdata_path = { "id"              : "isbn_s"
-                         , "title"           : "en_title_s"
-                         , "abstract"        : "en_abstract_s"
+
+        hyperdata_path = { "id"              : "docid"
+                         , "title"           : ["en_title_s", "title_s"]
+                         , "abstract"        : ["en_abstract_s", "abstract_s"]
                         , "source"          : "journalTitle_s"
                         , "url"             : "uri_s"
                         , "authors"         : "authFullName_s"
@@ -29,8 +29,8 @@ class HalParser(Parser):
                         , "instStructId_i"  : "instStructId_i"
                         , "deptStructId_i"  : "deptStructId_i"
                         , "labStructId_i"   : "labStructId_i"
-                         , "rteamStructId_i" : "rteamStructId_i" 
-                         , "docType_s"       : "docType_s" 
+                         , "rteamStructId_i" : "rteamStructId_i"
+                         , "docType_s"       : "docType_s"
                         }

        uris = set()
@@ -38,29 +38,32 @@ class HalParser(Parser):
        for doc in json_docs:

            hyperdata = {}
-            
+
            for key, path in hyperdata_path.items():
-                    
-                    field = doc.get(path, "NOT FOUND")
-                    if isinstance(field, list):
-                        hyperdata[key] = ", ".join(map(lambda x: str(x), field))
-                    else:
-                        hyperdata[key] = str(field)
-            
+
+                # A path can be a field name or a sequence of field names
+                if isinstance(path, (list, tuple)):
+                    # Get first non-empty value of fields in path sequence, or None
+                    field = next((x for x in (doc.get(p) for p in path) if x), None)
+                else:
+                    # Get field value
+                    field = doc.get(path)
+
+                if field is None:
+                    field = "NOT FOUND"
+
+                if isinstance(field, list):
+                    hyperdata[key] = ", ".join(map(str, field))
+                else:
+                    hyperdata[key] = str(field)
+
            if hyperdata["url"] in uris:
                print("Document already parsed")
+
            else:
                uris.add(hyperdata["url"])
-#            hyperdata["authors"] = ", ".join(
-#                                             [ p.get("person", {})
-#                                                .get("name"  , "")
-#                          
-#                                               for p in doc.get("hasauthor", [])
-#                                             ]
-#                                            )
-#            
-                maybeDate = doc.get("submittedDate_s", None)

+                maybeDate = doc.get("submittedDate_s", None)
                if maybeDate is not None:
                    date = datetime.strptime(maybeDate, "%Y-%m-%d %H:%M:%S")
                else:
@@ -70,9 +73,9 @@ class HalParser(Parser):
                hyperdata["publication_year"]  = str(date.year)
                hyperdata["publication_month"] = str(date.month)
                hyperdata["publication_day"]   = str(date.day)
-                
+
                hyperdata_list.append(hyperdata)
-        
+
        return hyperdata_list

    def parse(self, filebuf):

--- a/gargantext/util/parsers/ISI.py
+++ b/gargantext/util/parsers/ISI.py
+import re
+
 from .RIS import RISParser


@@ -12,8 +14,39 @@ class ISIParser(RISParser):
            "DI":  {"type": "hyperdata", "key": "doi"},
            "SO":  {"type": "hyperdata", "key": "source"},
            "PY":  {"type": "hyperdata", "key": "publication_year"},
-            "PD":  {"type": "hyperdata", "key": "publication_month"},
+            "PD":  {"type": "hyperdata", "key": "publication_date_to_parse"},
            "LA":  {"type": "hyperdata", "key": "language_fullname"},
            "AB":  {"type": "hyperdata", "key": "abstract", "separator": " "},
            "WC":  {"type": "hyperdata", "key": "fields"},
        }
+
+        _year = re.compile(r'\b\d{4}\b')
+        _season = re.compile(r'\b(SPR|SUM|FAL|WIN)\b', re.I)
+        _month_interval = re.compile(r'\b([A-Z]{3})-([A-Z]{3})\b', re.I)
+        _day_interval = re.compile(r'\b(\d{1,2})-(\d{1,2})\b')
+
+        def _preprocess_PD(self, PD, PY):
+            # Add a year to date if applicable
+            if PY and self._year.search(PY) and not self._year.search(PD):
+                PD = PY + " " + PD
+
+            # Drop season if any
+            PD = self._season.sub('', PD).strip()
+
+            # If a month interval is present, keep only the first month
+            PD = self._month_interval.sub(r'\1', PD)
+
+            # If a day interval is present, keep only the first day
+            PD = self._day_interval.sub(r'\1', PD)
+
+            return PD
+
+        def parse(self, file):
+            PD = self._parameters["PD"]["key"]
+            PY = self._parameters["PY"]["key"]
+
+            for entry in super().parse(file):
+                if PD in entry:
+                    entry[PD] = self._preprocess_PD(entry[PD], entry[PY])
+
+                yield entry
--- a/gargantext/util/parsers/PUBMED.py
+++ b/gargantext/util/parsers/PUBMED.py
@@ -14,12 +14,12 @@ class PubmedParser(Parser):
        "language_iso3"     : 'MedlineCitation/Article/Language',
        "doi"               : 'PubmedData/ArticleIdList/ArticleId[@type=doi]',
        "realdate_full_"     : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/MedlineDate',
-        "realdate_year_"     : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/Year',
-        "realdate_month_"    : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/Month',
-        "realdate_day_"      : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/Day',
-        "publication_year"  : 'MedlineCitation/DateCreated/Year',
-        "publication_month" : 'MedlineCitation/DateCreated/Month',
-        "publication_day"   : 'MedlineCitation/DateCreated/Day',
+        "realdate_year_"     : 'PubmedData/History/PubMedPubDate/Year',
+        "realdate_month_"    : 'PubmedData/History/PubMedPubDate/Month',
+        "realdate_day_"      : 'PubmedData/History/PubMedPubDate/Day',
+        "publication_year"  : 'MedlineCitation/Article/ArticleDate/Year',
+        "publication_month" : 'MedlineCitation/Article/ArticleDate/Month',
+        "publication_day"   : 'MedlineCitation/Article/ArticleDate/Day',
        "authors"           : 'MedlineCitation/Article/AuthorList',
    }


--- a/gargantext/util/parsers/_Parser.py
+++ b/gargantext/util/parsers/_Parser.py
@@ -3,10 +3,7 @@ import zipfile
 import re
 import dateparser as date_parser
 from gargantext.util.languages import languages
-from gargantext.util import datetime, convert_to_datetime, MINYEAR
-
-
-DEFAULT_DATE = datetime(MINYEAR, 1, 1)
+from gargantext.util import datetime, convert_to_datetime


 class Parser:
@@ -14,15 +11,14 @@ class Parser:
    """

    def __init__(self, file):
-        if isinstance(file, str):
-            self._file = open(file, 'rb')
-        else:
-            self._file = file
+        self._file = self.open(file)

    def __del__(self):
        if hasattr(self, '_file'):
            self._file.close()

+    def open(self, file):
+        return open(file, 'rb') if isinstance(file, str) else file

    def detect_encoding(self, string):
        """Useful method to detect the encoding of a document.
@@ -47,10 +43,7 @@ class Parser:
        if date_string is not None:
            date_string = re.sub(r'\/\/+(\w*|\d*)', '', date_string)
            try:
-                hyperdata['publication_date'] = dateutil.parser.parse(
-                    date_string,
-                    default=DEFAULT_DATE
-                )
+                hyperdata['publication_date'] = datetime.parse(date_string)
            except Exception as error:
                print(error, 'Date not parsed for:', date_string)
                hyperdata['publication_date'] = datetime.now()
@@ -93,6 +86,9 @@ class Parser:
            print("WARNING: Date unknown at _Parser level, using now()")
            hyperdata['publication_date'] = datetime.now()

+        # XXX Handling prefixes is most likely useless: there seem to be only
+        #     one prefix which is "publication" (like in "publication_date").
+
        # ...then parse all the "date" fields, to parse it into separate elements
        prefixes = [key[:-5] for key in hyperdata.keys() if key[-5:] == "_date"]
        for prefix in prefixes:
@@ -165,11 +161,10 @@ class Parser:
            file = self._file
        # if the file is a ZIP archive, recurse on each of its files...
        if zipfile.is_zipfile(file):
-            zipArchive = zipfile.ZipFile(file)
-            for filename in zipArchive.namelist():
-                f = zipArchive.open(filename, 'r')
-                yield from self.__iter__(f)
-                f.close()
+            with zipfile.ZipFile(file) as zf:
+                for filename in zf.namelist():
+                    with zf.open(filename) as df, self.open(df) as f:
+                        yield from self.__iter__(f)
        # ...otherwise, let's parse it directly!
        else:
            try:

--- a/gargantext/util/show_nodes.py
+++ b/gargantext/util/show_nodes.py
@@ -61,12 +61,14 @@ def nodes(parent=None, group_by='typename', order_by='typename', has_child='chec

 def node_show(node, prefix='', maxlen=60):
    if node.children > 0 or node.cnt == 1:
+        node_id = '<{}> '.format(node.id)
        name = node.name[:maxlen] + '..' if len(node.name) > maxlen else node.name
-        label = Fore.CYAN + name + Fore.RESET
+        label = node_id + Fore.CYAN + name + Fore.RESET
    else:
        label = Fore.MAGENTA + str(node.cnt) + Fore.RESET

-    print(prefix, '%s%s %s' % (Fore.GREEN, node.typename, label), sep='')
+    typename = Fore.GREEN + node.typename + Fore.RESET
+    print(prefix, '%s %s' % (typename, label), sep='')


 def tree_show(node, pos=FIRST|LAST, level=0, prefix='', maxlen=60, compact=True):

--- a/gargantext/util/toolchain/mail_notification.py
+++ b/gargantext/util/toolchain/mail_notification.py
@@ -6,7 +6,7 @@ from gargantext.settings     import BASE_URL



-drafts = { 
+drafts = {
        'workflowEnd' : '''
    Bonjour,
    votre analyse sur Gargantext vient de se terminer.
@@ -42,18 +42,33 @@ drafts = {

    ''',

+        'recountDone': '''
+    Bonjour,
+    le recalcul que vous avez lancé est terminé.
+
+    Vous pouvez accéder à votre corpus intitulé
+        \"%s\"
+    à l'adresse:
+
+    http://%s/projects/%d/corpora/%d

-        }
+    Nous restons à votre disposition pour tout complément d'information.
+    Cordialement
+    --
+        L'équipe de Gargantext (CNRS)
+    '''

+}


-def notification(corpus,draft):
+
+def notification(corpus, draft, subject='Update'):
    user = session.query(User).filter(User.id == corpus.user_id).first()

    message = draft % (corpus.name, BASE_URL, corpus.parent_id, corpus.id)
-    
+
    if user.email != "" :
-        send_mail('[Gargantext] Update'
+        send_mail('[Gargantext] %s' % subject
                 , message
                 , 'contact@gargantext.org'
                 , [user.email], fail_silently=False )
@@ -63,11 +78,12 @@ def notification(corpus,draft):


 def notify_owner(corpus):
-    notification(corpus, drafts['workflowEnd'])
+    notification(corpus, drafts['workflowEnd'], 'Corpus updated')


 def notify_listMerged(corpus):
-    notification(corpus, drafts['listMerged'])
-
+    notification(corpus, drafts['listMerged'], 'List merged')


+def notify_recount(corpus):
+    notification(corpus, drafts['recountDone'], 'Recount done')
--- a/gargantext/util/toolchain/main.py
+++ b/gargantext/util/toolchain/main.py
@@ -13,7 +13,7 @@ from .ngram_coocs         import compute_coocs
 #from .ngram_coocs_old_sqlalchemy_version import compute_coocs
 from .metric_specgen      import compute_specgen
 from .list_map            import do_maplist
-from .mail_notification   import notify_owner
+from .mail_notification   import notify_owner, notify_recount
 from gargantext.util.db   import session
 from gargantext.models    import Node

@@ -62,12 +62,12 @@ def parse_extract_indexhyperdata(corpus):

    # apply actions
    print('CORPUS #%d' % (corpus.id))
-    
+
    corpus.status('Docs', progress=1)
    corpus.save_hyperdata()
    session.commit()
    parse(corpus)
-    
+
    docs = corpus.children("DOCUMENT").count()
    print('CORPUS #%d: parsed %d' % (corpus.id, docs))
    extract_ngrams(corpus)
@@ -242,6 +242,19 @@ def recount(corpus_id):
    corpus.save_hyperdata()
    session.commit()

+    # START OF KLUDGE...
+    from gargantext.models import NodeNgram, DocumentNode
+    from .ngrams_addition import index_new_ngrams
+    maplist_id = corpus.children("MAPLIST").first().id
+    ngram_ids = session.query(NodeNgram.ngram_id.distinct())
+    indexed_ngrams = ngram_ids.join(DocumentNode).filter(DocumentNode.parent_id==corpus.id)
+    not_indexed_ngrams = ngram_ids.filter(NodeNgram.node_id==maplist_id,
+                                          ~NodeNgram.ngram_id.in_(indexed_ngrams))
+    not_indexed_ngrams = [x[0] for x in not_indexed_ngrams]
+    added = index_new_ngrams(not_indexed_ngrams, corpus)
+    print('RECOUNT #%d: [%s] indexed %s ngrams' % (corpus.id, t(), added))
+    # ...END OF KLUDGE
+
    # -> overwrite occurrences (=> NodeNodeNgram)
    occ_id = compute_occs(corpus,
                            groupings_id = group_id,
@@ -286,5 +299,10 @@ def recount(corpus_id):
    corpus.save_hyperdata()
    session.commit()

+    if not DEBUG:
+        print('RECOUNT #%d: [%s] FINISHED Sending email notification' % (corpus.id, t()))
+        notify_recount(corpus)
+
+
 def t():
    return datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
--- a/gargantext/util/toolchain/metric_tfidf.py
+++ b/gargantext/util/toolchain/metric_tfidf.py
@@ -23,7 +23,7 @@ from datetime             import datetime
 def t():
    return datetime.now().strftime("%Y-%m-%d_%H:%M:%S")

-def compute_occs(corpus, overwrite_id = None, groupings_id = None,):
+def compute_occs(corpus, overwrite_id = None, groupings_id = None, year=None, start=None, end=None, interactiv=False):
    """
    Calculates sum of occs per ngram (or per mainform if groups) within corpus
                 (used as info in the ngrams table view)
@@ -61,6 +61,8 @@ def compute_occs(corpus, overwrite_id = None, groupings_id = None,):
                    .group_by(NodeNgram.ngram_id)
                   )

+        if year is not None:
+            occs_q = occs_q.filter(Node.hyperdata["publication_year"].astext == str(year))

    #   difficult case: with groups
    #                   ------------
@@ -108,6 +110,10 @@ def compute_occs(corpus, overwrite_id = None, groupings_id = None,):
                    # for the sum
                    .group_by("counted_form")
                 )
+        
+        if year is not None:
+            occs_q = occs_q.filter(Node.hyperdata["publication_year"].astext == str(year))
+

    #print(str(occs_q.all()))
    occ_sums = occs_q.all()
@@ -134,13 +140,17 @@ def compute_occs(corpus, overwrite_id = None, groupings_id = None,):

    # £TODO  make it NodeNgram instead NodeNodeNgram ! and rebase :/
    #        (idem ti_ranking)
-    bulk_insert(
-        NodeNodeNgram,
-        ('node1_id' , 'node2_id', 'ngram_id', 'score'),
-        ((the_id, corpus.id,  res[0], res[1]) for res in occ_sums)
-    )

-    return the_id
+    if interactiv is False :
+        bulk_insert(
+            NodeNodeNgram,
+            ('node1_id' , 'node2_id', 'ngram_id', 'score'),
+            ((the_id, corpus.id,  res[0], res[1]) for res in occ_sums)
+        )
+
+        return the_id
+    else :
+        return [(res[0], res[1]) for res in occ_sums]


 def compute_ti_ranking(corpus,

--- a/gargantext/util/toolchain/ngram_coocs.py
+++ b/gargantext/util/toolchain/ngram_coocs.py
@@ -20,6 +20,7 @@ def compute_coocs(  corpus,
                    stoplist_id     = None,
                    start           = None,
                    end             = None,
+                    year            = None,
                    symmetry_filter = False,
                    diagonal_filter = True):
    """
@@ -97,14 +98,21 @@ def compute_coocs(  corpus,
        WHERE
            n.typename  = {nodetype_id}
        AND n.parent_id = {corpus_id}
+            """.format( nodetype_id = NODETYPES.index('DOCUMENT')
+                  , corpus_id=corpus.id
+                  )
+    if year :
+        cooc_filter_sql += """
+        AND n.hyperdata -> 'publication_year' = '{year}'
+            """.format( year=str(year))
+
+    cooc_filter_sql += """
        GROUP BY 1,2
        --    ==
        -- GROUP BY ngA, ngB
        )
-        """.format( nodetype_id = NODETYPES.index('DOCUMENT')
-                  , corpus_id=corpus.id
-                  )
-
+        """
+    
    # 3) taking the cooccurrences of ngram x2
    ngram_filter_A_sql += """
        -- STEP 1: X axis of the matrix

--- a/gargantext/util/toolchain/ngrams_extraction.py
+++ b/gargantext/util/toolchain/ngrams_extraction.py
@@ -56,18 +56,15 @@ def extract_ngrams(corpus, keys=DEFAULT_INDEX_FIELDS, do_subngrams = DEFAULT_IND
        tagger_bots = {lang: load_tagger(lang) for lang in corpus.hyperdata["languages"] \
                                if lang != "__unknown__"}
        tagger_bots["__unknown__"] = load_tagger("en")
-        # print("#TAGGERS LOADED: ", tagger_bots)
+        print("#TAGGERS LOADED: ", tagger_bots)
        supported_taggers_lang = tagger_bots.keys()
-        # print("#SUPPORTED TAGGER LANGS", supported_taggers_lang)
+        print("#SUPPORTED TAGGER LANGS", list(supported_taggers_lang))

        for documents_count, document in enumerate(corpus.children('DOCUMENT')):
            #load only the docs that have passed the parsing without error
            if document.id not in corpus.hyperdata["skipped_docs"]:

-                if 'language_iso2' in document.hyperdata:
-                    language_iso2 = document.hyperdata['language_iso2']
-                else:
-                    language_iso2 = "__unknown__"
+                language_iso2 = document.hyperdata.get('language_iso2', '__unknown__')

                # debug
                # print(language_iso2)

--- a/gargantext/views/api/projects.py
+++ b/gargantext/views/api/projects.py
@@ -5,6 +5,7 @@ from collections import defaultdict
 from gargantext.util.toolchain import  *
 import copy
 from gargantext.util.db import session
+from gargantext.models import UserNode

 class ProjectList(APIView):
    '''API endpoint that represent a list of projects owned by a user'''
@@ -36,10 +37,16 @@ class ProjectList(APIView):
                return Response({"detail":"Project with this name already exists", "url":"/projects/%s" %str(project.id)}, status = HTTP_409_CONFLICT)

            else:
+                user_node = session.query(UserNode).filter_by(user_id=request.user.id).one_or_none()
+
+                if user_node is None:
+                    print("??? Can't find UserNode for %r to create ProjectNode with name %r ???" % (request.user, name))
+
                new_project = Node(
                    user_id = request.user.id,
                    typename = 'PROJECT',
                    name = name,
+                    parent_id = user_node and user_node.id,
                )

                session.add(new_project)

--- a/graph/cooccurrences.py
+++ b/graph/cooccurrences.py
@@ -230,6 +230,7 @@ def countCooccurrences( corpus_id=None      , cooc_id=None
        session.commit()

        #data = cooc2graph(coocNode.id, cooc, distance=distance, bridgeness=bridgeness)
-        #return data
+    else:
+        return cooc

    return(coocNode.id, cooc)
--- a/init_accounts.py
+++ b/init_accounts.py
@@ -70,15 +70,12 @@ drafts = {

        Your feedback will be valuable for further development
        of the platform, do not hesitate to contact us and
-        to contribute!
+        to contribute on our forum:
+                https://discourse.iscpif.fr/c/gargantext

-        If you want to access your old corpuses,
-        access codes remain valid until June 30
-        2017 midnight:
-
-                http://old.gargantext.org
-
-        We remain at your disposal for any further information.
+        We remain at your disposal for any further information and you be
+        kept updated if you subscribe to our mailing-list:
+                https://phplist.iscpif.fr/?p=subscribe&id=4
        
        With our best regards,
        -- 
@@ -127,15 +124,14 @@ drafts = {

        Vos retours seront précieux pour poursuivre le développement
        de la plateforme, n'hésitez pas à nous contacter et
-        contribuer!
+        contribuer sur notre forum:
+                https://discourse.iscpif.fr/c/gargantext

-        Si vous souhaitez accéder à vos anciens corpus, vos anciens
-        codes d'accès restent valides à cette adresse jusqu'au 30 juin
-        2017 minuit:
+        Nous restons à votre disposition pour tout complément
+        d'information et vous serez informés de l'évolution des
+        services en vous inscrivant à la liste:
+            https://phplist.iscpif.fr/?p=subscribe&id=4

-                http://old.gargantext.org
-
-        Nous restons à votre disposition pour tout complément d'information.
        Cordialement
        --
            L'équipe de Gargantext (CNRS)
@@ -195,9 +191,14 @@ drafts = {

        Vos retours seront précieux pour poursuivre le développement
        de la plateforme, n'hésitez pas à nous contacter et
-        contribuer!
+        contribuer sur notre forum:
+                https://discourse.iscpif.fr/c/gargantext
+
+        Nous restons à votre disposition pour tout complément
+        d'information et vous serez informés de l'évolution des
+        services en vous inscrivant à la liste:
+            https://phplist.iscpif.fr/?p=subscribe&id=4

-        Nous restons à votre disposition pour tout complément d'information.
        Cordialement
        --
            L'équipe de Gargantext (CNRS)

--- a/install/gargamelle/requirements.txt
+++ b/install/gargamelle/requirements.txt
@@ -28,11 +28,13 @@ RandomWords==0.1.12
 ujson==1.35
 umalqurra==0.2                 # arabic calendars (?? why use ??)
 networkx==1.11
-pandas==0.18.0
+pandas==0.21.0
 six==1.10.0
 lxml==3.5.0
 requests-futures==0.9.7
 bs4==0.0.1
 requests==2.10.0
 alembic>=0.9.2
-# SQLAlchemy-Searchable==0.10.4
+SQLAlchemy==1.1.14
+SQLAlchemy-Searchable==0.10.4
+SQLAlchemy-Utils==0.32.16
--- a/install/notebook/Dockerfile
+++ b/install/notebook/Dockerfile
@@ -106,17 +106,17 @@ RUN apt-get update && apt-get install -y \
        libblas-dev                            \
        liblapack-dev

-USER notebooks
-
-RUN cd  /home/notebooks                              \
-    &&  curl -sSL https://get.haskellstack.org/ | sh  \
-    &&  stack setup                                    \
-    &&  git clone https://github.com/gibiansky/IHaskell \
-    &&  . /env_3-5/bin/activate                          \
-    &&  cd IHaskell                                       \
-    &&  stack install gtk2hs-buildtools                    \
-    &&  stack install --fast                                \
-    &&  /root/.local/bin/ihaskell install --stack
-
+#USER notebooks
+#
+#RUN cd  /home/notebooks                              \
+#    &&  curl -sSL https://get.haskellstack.org/ | sh  \
+#    &&  stack setup                                    \
+#    &&  git clone https://github.com/gibiansky/IHaskell \
+#    &&  . /env_3-5/bin/activate                          \
+#    &&  cd IHaskell                                       \
+#    &&  stack install gtk2hs-buildtools                    \
+#    &&  stack install --fast                                \
+#    &&  /root/.local/bin/ihaskell install --stack
+#


--- a/install/notebook/gargantext_notebook.py
+++ b/install/notebook/gargantext_notebook.py
@@ -15,12 +15,17 @@ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'gargantext.settings')
 django.setup()

 from gargantext.constants import QUERY_SIZE_N_MAX, get_resource, get_resource_by_name
-from gargantext.models import ProjectNode, DocumentNode, UserNode, User
-from gargantext.util.db import session, get_engine
+from gargantext.models import (Node, ProjectNode, DocumentNode,
+                               Ngram, NodeNgram, NodeNgramNgram, NodeNodeNgram)
+from gargantext.util.db import session, get_engine, func, aliased, case
 from collections import Counter
 import importlib
 from django.http import Http404

+# Import those to be available by notebook user
+from langdetect import detect as detect_lang
+from gargantext.models import UserNode, User
+import functools

 class NotebookError(Exception):
    pass
@@ -35,8 +40,11 @@ def documents(corpus_id):
 #import seaborn as sns
 import pandas as pd

+def countByField(docs, field):
+    return list(Counter([doc.hyperdata[field] for doc in docs]).items())
+
 def chart(docs, field):
-    year_publis = list(Counter([doc.hyperdata[field] for doc in docs]).items())
+    year_publis = countByField(docs, field)
    frame0 = pd.DataFrame(year_publis, columns=['Date', 'DateValue'])
    frame1 = pd.DataFrame(year_publis, columns=['Date', 'DateValue'], index=frame0.Date)
    return frame1
@@ -49,15 +57,35 @@ def scan_hal(request):
    return hal.scan_results(request)


-def scan_gargantext(corpus_id, lang, request):
-    connection = get_engine().connect()
-    # TODO add some sugar the request (ideally request should be the same for hal and garg)
-    query = """select count(n.id) from nodes n
-                  where to_tsvector('%s', hyperdata ->> 'abstract' || 'title')
-                  @@ to_tsquery('%s')
-                  AND n.parent_id = %s;""" % (lang, request, corpus_id)
-    return [i for i in connection.execute(query)][0][0]
-    connection.close()
+def _search_docs(corpus_id, request, fast=False):
+    q = session.query(DocumentNode).filter_by(parent_id=corpus_id)
+
+    # Search ngram <request> in hyperdata <field>
+    H = lambda field, request: Node.hyperdata[field].astext.op('~*')(request)
+
+    if not fast:
+        # Only match <request> starting and ending with word boundary
+        # Sequence of spaces will match any sequence of spaces
+        request = '\s+'.join(filter(None, r'\m{}\M'.format(request).split(' ')))
+
+    return q.filter(Node.title_abstract.match(request)) if fast else \
+           q.filter(H('title', request) | H('abstract', request))
+
+
+def scan_gargantext(corpus_id, request, fast=False, documents=False):
+    query = _search_docs(corpus_id, request, fast)
+
+    if documents:
+        return query.all()
+
+    return query.with_entities(func.count(DocumentNode.id.distinct())).one()[0]
+
+
+def scan_gargantext_and_delete(corpus_id, request, fast=False):
+    r = _search_docs(corpus_id, request, fast).delete(synchronize_session='fetch')
+    session.commit()
+
+    return r


 def myProject_fromUrl(url):
@@ -179,3 +207,80 @@ def run_moissonneur(moissonneur, project, name, query):
        session.commit()

    return corpus
+
+
+ALL_LIST_TYPES = ['main', 'map', 'stop']
+
+
+def _ngrams(corpus_id, list_types, entities):
+    list_types = (list_types,) if isinstance(list_types, str) else list_types
+    list_typenames = [
+        '{}LIST'.format(t.upper()) for t in list_types if t in ALL_LIST_TYPES]
+
+    # `Node` is our list, ie. MAINLIST and/or MAPLIST and/or STOPLIST
+    return (session.query(*entities)
+                   .select_from(Ngram)
+                   .filter(NodeNgram.ngram_id==Ngram.id,
+                           NodeNgram.node_id==Node.id,
+                           Node.parent_id==corpus_id,
+                           Node.typename.in_(list_typenames)))
+
+
+def corpus_list(corpus_id, list_types=ALL_LIST_TYPES, with_synonyms=False,
+                with_count=False):
+    # Link between a GROUPLIST, a normal form (ngram1), and a synonym (ngram2)
+    NNN = NodeNgramNgram
+
+    # Get the list type from the Node type -- as in CSV export
+    list_type = (case([(Node.typename=='MAINLIST', 'main'),
+                       (Node.typename=='MAPLIST',  'map'),
+                       (Node.typename=='STOPLIST', 'stop')])
+                 .label('type'))
+
+    # We will retrieve each ngram as the following tuple:
+    entities = (list_type, Ngram.terms.label('ng'))
+
+    if with_count:
+        entities += (Ngram.id.label('id'),)
+
+    # First, get ngrams from wanted lists
+    ngrams = _ngrams(corpus_id, list_types, entities)
+
+    # Secondly, exclude "synonyms" (grouped ngrams that are not normal forms).
+    # We have to exclude synonyms first because data is inconsistent and some
+    # of them can be both in GROUPLIST and in MAIN/MAP/STOP lists. We want to
+    # take synonyms from GROUPLIST only -- see below.
+    Groups = aliased(Node, name='groups')
+    query = (ngrams.outerjoin(Groups, (Groups.parent_id==corpus_id) & (Groups.typename=='GROUPLIST'))
+                   .outerjoin(NNN, (NNN.node_id==Groups.id) & (NNN.ngram2_id==Ngram.id))
+                   .filter(NNN.ngram1_id==None))
+
+    # If `with_synonyms` is True, add them from GROUPLIST: this is the reliable
+    # source for them
+    if with_synonyms:
+        Synonym = aliased(Ngram)
+        ent = (list_type, Synonym.terms.label('ng'), Synonym.id.label('id'))
+        synonyms = (ngrams.with_entities(*ent)
+                          .filter(NNN.ngram1_id==Ngram.id,
+                                  NNN.ngram2_id==Synonym.id,
+                                  NNN.node_id==Groups.id,
+                                  Groups.parent_id==corpus_id,
+                                  Groups.typename=='GROUPLIST'))
+        query = query.union(synonyms)
+
+    # Again, data is inconsistent: MAINLIST may intersect with MAPLIST and
+    # we don't wan't that
+    if 'main' in list_types and 'map' not in list_types:
+        # Exclude MAPLIST ngrams from MAINLIST
+        query = query.except_(_ngrams(corpus_id, 'map', entities))
+
+    if with_count:
+        N = query.subquery()
+        return (session.query(N.c.type, N.c.ng, NodeNodeNgram.score)
+                       .join(Node, (Node.parent_id==corpus_id) & (Node.typename=='OCCURRENCES'))
+                       .outerjoin(NodeNodeNgram, (NodeNodeNgram.ngram_id==N.c.id) &
+                                                 (NodeNodeNgram.node1_id==Node.id) &
+                                                 (NodeNodeNgram.node2_id==corpus_id)))
+
+    # Return found ngrams sorted by list type, and then alphabetically
+    return query.order_by('type', 'ng')
--- a/install/notebook/requirements.txt
+++ b/install/notebook/requirements.txt
@@ -42,3 +42,7 @@ ipython==5.2.0
 ipython-genutils==0.1.0
 ipywidgets
 matplotlib==2.0.2
+alembic>=0.9.2
+SQLAlchemy==1.1.14
+SQLAlchemy-Searchable==0.10.4
+SQLAlchemy-Utils==0.32.16
--- a/notebooks/.ipynb_checkpoints/AdvancedTutorial-checkpoint.ipynb
+++ b/notebooks/.ipynb_checkpoints/AdvancedTutorial-checkpoint.ipynb
--- a/notebooks/AdvancedTutorial.ipynb
+++ b/notebooks/AdvancedTutorial.ipynb
--- a/start_uwsgi
+++ b/start_uwsgi
 #!/bin/bash

-FILE="/var/log/gargantext/uwsgi/$(date +%Y%m%d-%H:%M:%S).log"
-#touch /var/log/gargantext/uwsgi/$FILE && sudo 
-sudo uwsgi gargantext.ini --logto $FILE
+# Script to start uwsgi
+uwsgi /srv/gargantext/gargantext.ini 
+
+echo "To reload UWSGI: touch /tmp/gargantext.reload"
+
--- a/static/img/credits/cern.png
+++ b/static/img/credits/cern.png
--- a/static/img/credits/iscpif.svg
+++ b/static/img/credits/iscpif.svg
--- a/templates/pages/main/home.html
+++ b/templates/pages/main/home.html
@@ -30,7 +30,7 @@
                </a>

                              
-                <a class="btn btn-success btn-lg" target="blank" href="https://iscpif.fr/gargantext/your-first-map/" title="Fill the form to sign up">
+                <a class="btn btn-success btn-lg" target="blank" href="https://iscpif.fr/gargantext/" title="Fill the form to sign up">
                    <span class="glyphicon glyphicon-hand-right" aria-hidden="true"></span>
                    Documentation
                </a>

--- a/templates/pages/menu.html
+++ b/templates/pages/menu.html
@@ -368,7 +368,7 @@
            <p>
                Gargantext
                <span class="glyphicon glyphicon-registration-mark" aria-hidden="true"></span>
-                , version 3.0.7,
+                , version 3.0.8.1,
                <a href="http://www.cnrs.fr" target="blank" title="Institution that enables this project.">
                    Copyrights
                    <span class="glyphicon glyphicon-copyright-mark" aria-hidden="true"></span>

--- a/templates/pages/projects/overview.html
+++ b/templates/pages/projects/overview.html
@@ -203,6 +203,7 @@
      // do something…
        resetStatusForm("#createForm");
      })
+      return false;

    })


--- a/templates/pages/projects/project.html
+++ b/templates/pages/projects/project.html
@@ -1170,10 +1170,10 @@

                          // REST and callback
                          garganrest.metrics.update(corpusId, function(){
-                            statusDiv.innerHTML = '<div class="statusinfo">Corpus updated</div>'
+                            statusDiv.innerHTML = '<div class="statusinfo">Recount is started, please wait, you will be sent a notification email.</div>'

                            // revert visual
-                            setTimeout(function(){ statusDiv.innerHTML = previousStatus }, 2000);
+                            //setTimeout(function(){ statusDiv.innerHTML = previousStatus }, 2000);
                          })

                        }