[MERGE] from unstable to testing.

8aa028fe · Administrator · 376b1304 · 26261382 · 8aa028fe · 8aa028fe
Commit 8aa028fe authored Apr 01, 2015 by Administrator
53 changed files
--- a/analysis/InterUnion.py
+++ b/analysis/InterUnion.py
+import networkx as nx
+from itertools import combinations
+
+class Utils:
+
+	def __init__(self):
+		self.G = nx.Graph()
+
+	def unique(self,a):
+		""" return the list with duplicate elements removed """
+		return list(set(a))
+
+	def intersect(self,a, b):
+		""" return the intersection of two lists """
+		return list(set(a) & set(b))
+
+	def union(self,a, b):
+		""" return the union of two lists """
+		return list(set(a) | set(b))
+
+	def addCompleteSubGraph(self,terms):
+		G=self.G
+		# <addnode> #
+		for i in terms:
+			G.add_node(i)
+		# </addnode> #
+
+		# <addedge> #
+		edges = combinations(terms, 2)
+		for n in edges:
+			n1=n[0]
+			n2=n[1]
+			one=float(1)
+			if G.has_edge(n1,n2):
+				G[n1][n2]['weight']+=one
+			else: G.add_edge(n1,n2,weight=one)
+		self.G = G
\ No newline at end of file
--- a/analysis/functions.py
+++ b/analysis/functions.py
--- a/analysis/tfidf.py
+++ b/analysis/tfidf.py
@@ -60,6 +60,7 @@ def tfidf(corpus, document, ngram):
                .filter(NodeNgram.ngram_id == ngram.id)\
                .count()
        
+        # print("\t\t\t","occs:",occurrences_of_ngram," || ngramsbydoc:",ngrams_by_document," || TF = occ/ngramsbydoc:",term_frequency," |||||| x:",xx," || y:",yy," || IDF = log(x/y):",log(xx/yy))
        inverse_document_frequency= log(xx/yy)

        # result = tf * idf

--- a/api/__init__.py
+++ b/api/__init__.py
--- a/api/analyses/__init__.py
+++ b/api/analyses/__init__.py
--- a/api/crawls/__init__.py
+++ b/api/crawls/__init__.py
--- a/api/lists/__init__.py
+++ b/api/lists/__init__.py
--- a/api/nodes/__init__.py
+++ b/api/nodes/__init__.py
--- a/gargantext_web/api.py
+++ b/gargantext_web/api.py
 from django.http import HttpResponseNotFound, HttpResponse, Http404
 from django.core.exceptions import PermissionDenied, SuspiciousOperation
 from django.core.exceptions import ValidationError
+from django.core.urlresolvers import reverse

 from django.db.models import Avg, Max, Min, Count, Sum
 # from node.models import Language, ResourceType, Resource
@@ -10,8 +11,9 @@ from sqlalchemy import text, distinct
 from sqlalchemy.sql import func
 from sqlalchemy.orm import aliased

+from gargantext_web.views import move_to_trash
 from .db import *
-
+from node import models

 def DebugHttpResponse(data):
    return HttpResponse('<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), ))
@@ -45,10 +47,14 @@ _ngrams_order_columns = {
 }


+
+from rest_framework.authentication import SessionAuthentication, BasicAuthentication
+from rest_framework.permissions import IsAuthenticated
 from rest_framework.views import APIView
 from rest_framework.response import Response
 from rest_framework.exceptions import APIException as _APIException

+
 class APIException(_APIException):
    def __init__(self, message, code=500):
        self.status_code = code
@@ -82,7 +88,7 @@ class NodesChildrenNgrams(APIView):
    def get(self, request, node_id):
        # query ngrams
        ParentNode = aliased(Node)
-        ngrams_query = (Ngram
+        ngrams_query = (session
            .query(Ngram.terms, func.count().label('count'))
            # .query(Ngram.id, Ngram.terms, func.count().label('count'))
            .join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
@@ -128,7 +134,7 @@ class NodesChildrenDuplicates(APIView):
            raise APIException('Missing GET parameter: "keys"', 400)
        keys = request.GET['keys'].split(',')
        # metadata retrieval
-        metadata_query = (Metadata
+        metadata_query = (session
            .query(Metadata)
            .filter(Metadata.name.in_(keys))
        )
@@ -187,6 +193,7 @@ class NodesChildrenDuplicates(APIView):
        # get the minimum ID for each of the nodes sharing the same metadata
        kept_node_ids_query = self._fetch_duplicates(request, node_id, [func.min(Node.id).label('id')], 0)
        kept_node_ids = [kept_node.id for kept_node in kept_node_ids_query]
+        # TODO with new orm
        duplicate_nodes =  models.Node.objects.filter( parent_id=node_id ).exclude(id__in=kept_node_ids)
        # # delete the stuff
        # delete_query = (session
@@ -197,7 +204,7 @@ class NodesChildrenDuplicates(APIView):
        count = len(duplicate_nodes)
        for node in duplicate_nodes:
            print("deleting node ",node.id)
-            node.delete()
+            move_to_trash(node.id)
        # print(delete_query)
        # # delete_query.delete(synchronize_session=True)
        # session.flush()
@@ -213,7 +220,7 @@ class NodesChildrenMetatadata(APIView):
        
        # query metadata keys
        ParentNode = aliased(Node)
-        metadata_query = (Metadata
+        metadata_query = (session
            .query(Metadata)
            .join(Node_Metadata, Node_Metadata.metadata_id == Metadata.id)
            .join(Node, Node.id == Node_Metadata.node_id)
@@ -233,7 +240,7 @@ class NodesChildrenMetatadata(APIView):
            values_to = None
            if metadata.type != 'text':
                value_column = getattr(Node_Metadata, 'value_' + metadata.type)
-                node_metadata_query = (Node_Metadata
+                node_metadata_query = (session
                    .query(value_column)
                    .join(Node, Node.id == Node_Metadata.node_id)
                    .filter(Node.parent_id == node_id)
@@ -381,9 +388,9 @@ class NodesChildrenQueries(APIView):
        for field_name in fields_names:
            split_field_name = field_name.split('.')
            if split_field_name[0] == 'metadata':
-                metadata = Metadata.query(Metadata).filter(Metadata.name == split_field_name[1]).first()
+                metadata = session.query(Metadata).filter(Metadata.name == split_field_name[1]).first()
                if metadata is None:
-                    metadata_query = Metadata.query(Metadata.name).order_by(Metadata.name)
+                    metadata_query = session.query(Metadata.name).order_by(Metadata.name)
                    metadata_names = [metadata.name for metadata in metadata_query.all()]
                    raise APIException('Invalid key for "%s" in parameter "field", should be one of the following values: "%s". "%s" was found instead' % (field[0], '", "'.join(metadata_names), field[1]), 400)
                # check or create Node_Metadata alias; join if necessary
@@ -422,7 +429,7 @@ class NodesChildrenQueries(APIView):
            )

        # starting the query!
-        document_type_id = NodeType.query(NodeType.id).filter(NodeType.name == 'Document').scalar()
+        document_type_id = cache.NodeType['Document'].id ##session.query(NodeType.id).filter(NodeType.name == 'Document').scalar()
        query = (session
            .query(*fields_list)
            .select_from(Node)
@@ -451,9 +458,9 @@ class NodesChildrenQueries(APIView):
            # 
            if field[0] == 'metadata':
                # which metadata?
-                metadata = Metadata.query(Metadata).filter(Metadata.name == field[1]).first()
+                metadata = session.query(Metadata).filter(Metadata.name == field[1]).first()
                if metadata is None:
-                    metadata_query = Metadata.query(Metadata.name).order_by(Metadata.name)
+                    metadata_query = session.query(Metadata.name).order_by(Metadata.name)
                    metadata_names = [metadata.name for metadata in metadata_query.all()]
                    raise APIException('Invalid key for "%s" in parameter "field", should be one of the following values: "%s". "%s" was found instead' % (field[0], '", "'.join(metadata_names), field[1]), 400)                
                # check or create Node_Metadata alias; join if necessary
@@ -475,7 +482,7 @@ class NodesChildrenQueries(APIView):
                ))
            elif field[0] == 'ngrams': 
                query = query.filter(
-                    Node.id.in_(Node_Metadata
+                    Node.id.in_(session
                        .query(Node_Ngram.node_id)
                        .filter(Node_Ngram.ngram_id == Ngram.id)
                        .filter(operator(
@@ -549,11 +556,13 @@ class NodesChildrenQueries(APIView):


 class NodesList(APIView):
+    authentication_classes = (SessionAuthentication, BasicAuthentication)

    def get(self, request):
-        query = (Node
+        print("user id : " + str(request.user))
+        query = (session
            .query(Node.id, Node.name, NodeType.name.label('type'))
-            .filter(Node.user_id == request.session._session_cache['_auth_user_id'])
+            .filter(Node.user_id == int(request.user.id))
            .join(NodeType)
        )
        if 'type' in request.GET:
@@ -576,8 +585,11 @@ class Nodes(APIView):
        return JsonHttpResponse({
            'id': node.id,
            'name': node.name,
+            'parent_id': node.parent_id,
+            'type': cache.NodeType[node.type_id].name,
            # 'type': node.type__name,
-            'metadata': dict(node.metadata),
+            #'metadata': dict(node.metadata),
+            'metadata': node.metadata,
        })

    # deleting node by id
@@ -585,13 +597,19 @@ class Nodes(APIView):
    # it should take the subnodes into account as well,
    # for better constistency...
    def delete(self, request, node_id):
-        node = models.Node.objects.filter(id = node_id)
-        msgres = ""
+        
+        user = request.user
+        node = session.query(Node).filter(Node.id == node_id).first()
+        
+        msgres = str()
+        
        try:
-            node.delete()
-            msgres = node_id+" deleted!"
-        except:
-            msgres ="error deleting: "+node_id
+            
+            move_to_trash(node_id)
+            msgres = node_id+" moved to Trash"
+        
+        except Exception as error:
+            msgres ="error deleting : " + node_id + str(error)

        return JsonHttpResponse({
            'deleted': msgres,
@@ -605,9 +623,9 @@ class CorpusController:
            corpus_id = int(corpus_id)
        except:
            raise ValidationError('Corpora are identified by an integer.', 400)
-        corpusQuery = Node.objects.filter(id = corpus_id)
+        corpusQuery = session.query(Node).filter(Node.id == corpus_id).first()
        # print(str(corpusQuery))
-        # raise Http404("C'est toujours ça de pris.")
+        # raise Http404("404 error.")
        if not corpusQuery:
            raise Http404("No such corpus: %d" % (corpus_id, ))
        corpus = corpusQuery.first()
@@ -626,7 +644,7 @@ class CorpusController:

        # build query
        ParentNode = aliased(Node)
-        query = (Ngram
+        query = (session
            .query(Ngram.terms, func.count('*'))
            .join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
            .join(Node, Node.id == Node_Ngram.node_id)

--- a/gargantext_web/db.py
+++ b/gargantext_web/db.py
-from node import models
 from gargantext_web import settings
+from node import models
+
+
+__all__ = ['literalquery', 'session', 'cache', 'Session', 'bulk_insert', 'engine', 'get_cursor']
+
+
+# initialize sqlalchemy
+
+from sqlalchemy.orm import Session, mapper
+from sqlalchemy.ext.automap import automap_base

+from sqlalchemy import create_engine, MetaData, Table, Column, ForeignKey
+from sqlalchemy.types import Integer, String, DateTime
+from sqlalchemy.dialects.postgresql import JSON

-__all__ = ['literalquery', 'session', 'cache']
+engine = create_engine('postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}/{NAME}'.format(
+    **settings.DATABASES['default']
+))
+Base = automap_base()

+Base.prepare(engine, reflect=True)
+
+# model representation
+
+def model_repr(modelname):
+    def _repr(obj):
+        result = '<' + modelname
+        isfirst = True
+        for key, value in obj.__dict__.items():
+            if key[0] != '_':
+                value = repr(value)
+                if len(value) > 64:
+                    value = value[:30] + '....' + value[-30:]
+                if isfirst:
+                    isfirst = False
+                else:
+                    result += ','
+                result += ' ' + key + '=' + value
+        result += '>'
+        return result
+    return _repr

 # map the Django models found in node.models to SQLAlchemy models

 for model_name, model in models.__dict__.items():
-    if hasattr(model, 'sa'):
-        globals()[model_name] = model.sa
-        __all__.append(model_name)
+    if hasattr(model, '_meta') :
+        table_name = model._meta.db_table
+        if hasattr(Base.classes, table_name):
+            sqla_model = getattr(Base.classes, table_name)
+            setattr(sqla_model, '__repr__', model_repr(model_name))
+            globals()[model_name] = sqla_model
+            __all__.append(model_name)
+

 NodeNgram = Node_Ngram
+NodeResource = Node_Resource


 # debugging tool, to translate SQLAlchemy queries to string
@@ -61,16 +103,17 @@ def literalquery(statement, dialect=None):

 # SQLAlchemy session management

-def get_sessionmaker():
-    from django.db import connections
-    from sqlalchemy.orm import sessionmaker
+def get_engine():
    from sqlalchemy import create_engine
-    alias = 'default'
-    connection = connections[alias]
    url = 'postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}/{NAME}'.format(
        **settings.DATABASES['default']
    )
-    engine = create_engine(url, use_native_hstore=True)
+    return create_engine(url, use_native_hstore=True)
+
+engine = get_engine()
+
+def get_sessionmaker():
+    from sqlalchemy.orm import sessionmaker
    return sessionmaker(bind=engine)

 Session = get_sessionmaker()
@@ -84,7 +127,7 @@ from sqlalchemy import or_
 class ModelCache(dict):

    def __init__(self, model, preload=False):
-        self._model = model.sa
+        self._model = globals()[model.__name__]
        self._columns_names = [column.name for column in model._meta.fields if column.unique]
        self._columns = [getattr(self._model, column_name) for column_name in self._columns_names]
        self._columns_validators = []
@@ -92,20 +135,16 @@ class ModelCache(dict):
            self.preload()

    def __missing__(self, key):
-        for column in self._columns:
-            conditions = []
-            try:
-                formatted_key = column.type.python_type(key)
-                conditions.append(column == key)
-            except ValueError:
-                pass
-        if formatted_key in self:
-            self[key] = self[formatted_key]
-        else:
-            element = session.query(self._model).filter(or_(*conditions)).first()
-            if element is None:
-                raise KeyError
-            self[key] = element
+        #print(key)
+        conditions = [
+            (column == str(key))
+            for column in self._columns
+            if column.type.python_type == str or key.__class__ == column.type.python_type
+        ]
+        element = session.query(self._model).filter(or_(*conditions)).first()
+        if element is None:
+            raise KeyError
+        self[key] = element
        return element

    def preload(self):
@@ -115,7 +154,7 @@ class ModelCache(dict):
                key = getattr(element, column_name)
                self[key] = element

-class Cache:
+class Cache():

    def __getattr__(self, key):
        try:
@@ -127,3 +166,50 @@ class Cache:
        return modelcache

 cache = Cache()
+
+
+# Insert many elements at once
+
+import psycopg2
+
+def get_cursor():
+    db_settings = settings.DATABASES['default']
+    db = psycopg2.connect(**{
+        'database': db_settings['NAME'],
+        'user':     db_settings['USER'],
+        'password': db_settings['PASSWORD'],
+        'host':     db_settings['HOST'],
+    })
+    return db, db.cursor()
+
+class bulk_insert:
+
+    def __init__(self, table, keys, data, cursor=None):
+        # prepare the iterator
+        self.iter = iter(data)
+        # template
+        self.template = '%s' + (len(keys) - 1) * '\t%s' + '\n'
+        # prepare the cursor
+        if cursor is None:
+            db, cursor = get_cursor()
+            mustcommit = True
+        else:
+            mustcommit = False
+        # insert data
+        if not isinstance(table, str):
+            table = table.__table__.name
+        cursor.copy_from(self, table, columns=keys)
+        # commit if necessary
+        if mustcommit:
+            db.commit()
+
+    def read(self, size=None):
+        try:
+            return self.template % tuple(
+                str(x).replace('\r', '').replace('\n', '\\n').replace('\t', '\\t') for x in next(self.iter)
+            )
+        except StopIteration:
+            return ''
+
+    readline = read
+
--- a/gargantext_web/home.py
+++ b/gargantext_web/home.py
+
+import random
+import random_words
+from math import pi
+
+
+
+def paragraph_lorem(size_target=450):
+    '''
+    Function that returns paragraph with false latin language.
+    size_target is the number of random words that will be given.
+    '''
+    
+    lorem = random_words.LoremIpsum()
+    
+    sentences_list = lorem.get_sentences_list(sentences=5)
+    paragraph_size = 0
+    
+    while paragraph_size < size_target :
+        sentences_list.append(lorem.get_sentence())
+        paragraph = ' '.join(sentences_list)
+        paragraph_size = len(paragraph)
+    
+    return(paragraph)
+
+
+def paragraph_gargantua(size_target=500):
+    '''
+    Function that returns paragraph with chapter titles of Gargantua.
+    size_target is the number of random words that will be given.
+    '''
+    
+    paragraph = list()
+    paragraph_size = 0
+    chapter_number = 1
+    
+    while paragraph_size < size_target and chapter_number < 6:
+        chapitre = open('/srv/gargantext/static/docs/gargantua_book/gargantua_chapter_' + str(chapter_number) + '.txt', 'r')
+        paragraph.append(random.choice(chapitre.readlines()).strip())
+        chapitre.close()
+        paragraph_size = len(' '.join(paragraph))
+        chapter_number += 1
+    
+    return(' '.join(paragraph))
+
+
+def random_letter(mot, size_min=5):
+    '''
+    
+    Functions that randomize order letters of a 
+    word which size is greater that size_min.
+
+    '''
+    if len(mot) > size_min:
+        
+        size = round(len(mot) / pi)
+        
+        first_letters = mot[:size]
+        last_letters  = mot[-size:]
+        
+        others_letters = list(mot[size:-size])
+        random.shuffle(others_letters)
+        
+        mot_list = list()
+        mot_list.append(first_letters)
+        
+        for letter in others_letters:
+            mot_list.append(letter)
+            
+        mot_list.append(last_letters)
+        
+        return(''.join(mot_list))  
+        
+    else:
+        return(mot)
+
+
+tutoriel = """Il paraît que l'ordre des lettres dans un mot n'a pas d'importance. La première et la dernière lettre doivent être à la bonne place. Le reste peut être dans un désordre total et on peut toujours lire sans problème. On ne lit donc pas chaque lettre en elle-même, mais le mot comme un tout. Un changement de référentiel et nous transposons ce résultat au texte lui-même: l'ordre des mots est faiblement important comparé au contexte du texte qui, lui, est compté"""
+
+
+def paragraph_tutoreil(tutoriel=tutoriel):
+    '''
+    Functions that returns paragraph of words with words with
+    randomized letters.
+    '''
+    paragraph = ' '.join([ random_letter(mot) for mot in tutoriel.split(" ")]) \
+            + ": comptexter avec Gargantext."
+    return(paragraph)
+
--- a/gargantext_web/settings.py
+++ b/gargantext_web/settings.py
@@ -63,12 +63,11 @@ INSTALLED_APPS = (
    'django.contrib.messages',
    'django.contrib.staticfiles',
    'django_extensions',
-    'south',
+    'django_pg',
    'cte_tree',
    'node',
    'ngram',
    'scrap_pubmed',
-    'django_hstore',
    'djcelery',
    'aldjemy',
    'rest_framework',
@@ -83,6 +82,16 @@ MIDDLEWARE_CLASSES = (
    'django.middleware.clickjacking.XFrameOptionsMiddleware',
 )

+REST_SESSION_LOGIN = False
+REST_FRAMEWORK = {
+    'DEFAULT_AUTHENTICATION_CLASSES': (
+        'rest_framework.authentication.TokenAuthentication',
+        'rest_framework.authentication.SessionAuthentication',
+    ),
+   'DEFAULT_PERMISSION_CLASSES': (
+        'rest_framework.permissions.AllowAny',
+    ),
+}

 WSGI_APPLICATION = 'wsgi.application'


--- a/gargantext_web/urls.py
+++ b/gargantext_web/urls.py
@@ -3,7 +3,7 @@ from django.conf.urls import patterns, include, url
 from django.contrib import admin
 from django.contrib.auth.views import login

-from gargantext_web import views
+from gargantext_web import views, views_optimized

 import gargantext_web.api
 import scrap_pubmed.views as pubmedscrapper
@@ -20,22 +20,23 @@ urlpatterns = patterns('',

    url(r'^auth/$', views.login_user),
    url(r'^auth/logout/$', views.logout_user),
+    
+    # Dynamic CSS
    url(r'^img/logo.svg$', views.logo),
    url(r'^css/bootstrap.css$', views.css),
    
    # User Home view
-    url(r'^$', views.home),
+    url(r'^$', views.home_view),
    url(r'^about/', views.get_about),
    url(r'^maintenance/', views.get_maintenance),
    
    # Project Management
    url(r'^projects/$', views.projects),
-    url(r'^project/(\d+)/delete/$', views.delete_project),
-    url(r'^project/(\d+)/$', views.project),
+    url(r'^project/(\d+)/$', views_optimized.project),
+    url(r'^delete/(\d+)$', views.delete_node), # => api.node('id' = id, children = 'True', copies = False)
    
    # Corpus management
    url(r'^project/(\d+)/corpus/(\d+)/$', views.corpus),
-    url(r'^project/(\d+)/corpus/(\d+)/delete/$', views.delete_corpus),
    url(r'^project/(\d+)/corpus/(\d+)/corpus.csv$', views.corpus_csv),
    url(r'^project/(\d+)/corpus/(tests_mvc_listdocuments+)/corpus.tests_mvc_listdocuments$', views.corpus_csv),
    
@@ -47,16 +48,19 @@ urlpatterns = patterns('',
    url(r'^project/(\d+)/corpus/(\d+)/matrix$', views.matrix),
    
    # Data management
-    url(r'^chart/corpus/(\d+)/data.csv$', views.send_csv),
-    url(r'^corpus/(\d+)/node_link.json$', views.node_link),
-    url(r'^corpus/(\d+)/adjacency.json$', views.adjacency),
-    url(r'^api/tfidf/(\d+)/(\w+)$', views.tfidf),
+    url(r'^chart/corpus/(\d+)/data.csv$', views.send_csv),  # => api.node.children('type' : 'data', 'format' : 'csv')
+    url(r'^corpus/(\d+)/node_link.json$', views.node_link), # => api.analysis('type': 'node_link', 'format' : 'json')
+    url(r'^corpus/(\d+)/adjacency.json$', views.adjacency), # => api.analysis('type': 'adjacency', 'format' : 'json')
+    
+    url(r'^api/tfidf/(\d+)/(\w+)$', views_optimized.tfidf),
+    # url(r'^api/tfidf/(\d+)/(\w+)$', views.tfidf),
+    url(r'^api/tfidf2/(\d+)/(\w+)$', views.tfidf2),

    # Data management
-    url(r'^api$', gargantext_web.api.Root),
+    #url(r'^api$', gargantext_web.api.Root), # = ?
    url(r'^api/nodes$', gargantext_web.api.NodesList.as_view()),
    url(r'^api/nodes/(\d+)$', gargantext_web.api.Nodes.as_view()),
-    url(r'^api/nodes/(\d+)/children/ngrams$', gargantext_web.api.NodesChildrenNgrams.as_view()),
+    url(r'^api/nodes/(\d+)/children/ngrams$', gargantext_web.api.NodesChildrenNgrams.as_view()),  # => repeated children ?
    url(r'^api/nodes/(\d+)/children/metadata$', gargantext_web.api.NodesChildrenMetatadata.as_view()),
    url(r'^api/nodes/(\d+)/children/queries$', gargantext_web.api.NodesChildrenQueries.as_view()),
    url(r'^api/nodes/(\d+)/children/duplicates$', gargantext_web.api.NodesChildrenDuplicates.as_view()),
@@ -66,12 +70,13 @@ urlpatterns = patterns('',

    url(r'^api/nodes/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),

-    url(r'^ngrams$', views.ngrams),
-    url(r'^nodeinfo/(\d+)$', views.nodeinfo),
+    # Provisory tests
+    url(r'^ngrams$', views.ngrams),  # to be removed 
+    url(r'^nodeinfo/(\d+)$', views.nodeinfo), # to be removed ?
    url(r'^tests/mvc$', views.tests_mvc),
    url(r'^tests/mvc-listdocuments$', views.tests_mvc_listdocuments),

-    url(r'^tests/istextquery$', pubmedscrapper.getGlobalStatsISTEXT),
+    url(r'^tests/istextquery$', pubmedscrapper.getGlobalStatsISTEXT), # api/query?type=istext ?
    url(r'^tests/pubmedquery$', pubmedscrapper.getGlobalStats),
    url(r'^tests/project/(\d+)/pubmedquery/go$', pubmedscrapper.doTheQuery),
    url(r'^tests/project/(\d+)/ISTEXquery/go$', pubmedscrapper.testISTEX)
@@ -90,3 +95,15 @@ if settings.DEBUG:
        }),
 )

+if settings.MAINTENANCE:
+    urlpatterns = patterns('',
+    url(r'^img/logo.svg$', views.logo),
+    url(r'^css/bootstrap.css$', views.css),
+    
+    url(r'^$', views.home_view),
+    url(r'^about/', views.get_about),
+    
+    url(r'^.*', views.get_maintenance),
+    )
+
+
--- a/gargantext_web/views.py
+++ b/gargantext_web/views.py
--- a/gargantext_web/views_optimized.py
+++ b/gargantext_web/views_optimized.py
+from django.shortcuts import redirect
+from django.shortcuts import render
+from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseForbidden
+
+from sqlalchemy import func, and_, or_
+from sqlalchemy.orm import aliased
+
+from collections import defaultdict
+from datetime import datetime
+from threading import Thread
+
+from node.admin import CustomForm
+from gargantext_web.db import *
+from gargantext_web.settings import DEBUG, MEDIA_ROOT
+from gargantext_web.api import JsonHttpResponse
+import json
+
+from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf
+
+
+def project(request, project_id):
+
+    # SQLAlchemy session
+    session = Session()
+
+    # do we have a valid project id?
+    try:
+        project_id = int(project_id)
+    except ValueError:
+        raise Http404()
+    
+    # do we have a valid project?
+    project = (session
+        .query(Node)
+        .filter(Node.id == project_id)
+        .filter(Node.type_id == cache.NodeType['Project'].id)
+    ).first()
+    if project is None:
+        raise Http404()
+
+    # do we have a valid user?
+    user = request.user
+    if not user.is_authenticated():
+        return redirect('/login/?next=%s' % request.path)
+    if project.user_id != user.id:
+        return HttpResponseForbidden()
+
+    # Let's find out about the children nodes of the project
+    ChildrenNode = aliased(Node)
+    # This query is giving you the wrong number of docs from the pubmedquerier (x 5)
+    #  ... sqlalchemy.func by Resource.type_id is the guilty
+    # ISSUE L51
+    corpus_query = (session
+        .query(Node.id, Node.name, func.count(ChildrenNode.id))
+        #.query(Node.id, Node.name, Resource.type_id, func.count(ChildrenNode.id))
+        #.join(Node_Resource, Node_Resource.node_id == Node.id)
+        #.join(Resource, Resource.id == Node_Resource.resource_id)
+        .filter(Node.parent_id == project.id)
+        .filter(Node.type_id == cache.NodeType['Corpus'].id)
+        .filter(and_(ChildrenNode.parent_id  == Node.id, ChildrenNode.type_id  == cache.NodeType['Document'].id))
+        .group_by(Node.id, Node.name)
+        .order_by(Node.name)
+        .all()
+    )
+    corpora_by_resourcetype = defaultdict(list)
+    documents_count_by_resourcetype = defaultdict(int)
+    corpora_count = 0
+    corpusID_dict = {}
+    for corpus_id, corpus_name, document_count in corpus_query:
+        
+        # Not optimized GOTO ISSUE L51
+        resource_type_id = (session.query(Resource.type_id)
+                                   .join(Node_Resource, Node_Resource.resource_id == Resource.id)
+                                   .join(Node, Node.id == Node_Resource.node_id )
+                                   .filter(Node.id==corpus_id)
+                                   .first())[0]
+        
+        if not corpus_id in corpusID_dict:
+            if resource_type_id is None:
+                resourcetype_name = '(no resource)'
+            else:
+                resourcetype = cache.ResourceType[resource_type_id]
+                resourcetype_name = resourcetype.name
+            corpora_by_resourcetype[resourcetype_name].append({
+                'id': corpus_id,
+                'name': corpus_name,
+                'count': document_count,
+            })
+            documents_count_by_resourcetype[resourcetype_name] += document_count
+            corpora_count += 1
+            corpusID_dict[corpus_id]=True
+
+    # do the donut
+    total_documents_count = sum(documents_count_by_resourcetype.values())
+    donut = [
+        {   'source': key, 
+            'count': value,
+            'part' : round(value * 100 / total_documents_count) if total_documents_count else 0,
+        }
+        for key, value in documents_count_by_resourcetype.items()
+    ]
+
+    # deal with the form
+    if request.method == 'POST':
+        # form validation
+        form = CustomForm(request.POST, request.FILES)
+        if form.is_valid():
+            
+            # extract information from the form
+            name = form.cleaned_data['name']
+            thefile = form.cleaned_data['file']
+            resourcetype = cache.ResourceType[form.cleaned_data['type']]
+            
+            # which default language shall be used?
+            if resourcetype.name == "europress_french":
+                language_id = cache.Language['fr'].id
+            elif resourcetype.name == "europress_english":
+                language_id = cache.Language['en'].id
+            else:
+                language_id = None
+            
+            # corpus node instanciation as a Django model
+            corpus = Node(
+                name = name,
+                user_id = request.user.id,
+                parent_id = project_id,
+                type_id = cache.NodeType['Corpus'].id,
+                language_id = language_id,
+            )
+            session.add(corpus)
+            session.commit()
+            # save the uploaded file
+            filepath = '%s/corpora/%s/%s' % (MEDIA_ROOT, request.user.username, thefile._name)
+            f = open(filepath, 'wb')
+            f.write(thefile.read())
+            f.close()
+            # add the uploaded resource to the corpus
+            add_resource(corpus,
+                user_id = request.user.id,
+                type_id = resourcetype.id,
+                file = filepath,
+            )
+            # let's start the workflow
+            try:
+                def apply_workflow(corpus):
+                    parse_resources(corpus)
+                    extract_ngrams(corpus, ['title'])
+                    compute_tfidf(corpus)
+                if DEBUG:
+                    apply_workflow(corpus)
+                else:
+                    thread = Thread(target=apply_workflow, args=(corpus, ), daemon=True)
+                    thread.start()
+            except Exception as error:
+                print('WORKFLOW ERROR')
+                print(error)
+            # redirect to the main project page
+            return HttpResponseRedirect('/project/' + str(project_id))
+        else:
+            print('ERROR: BAD FORM')
+    else:
+        form = CustomForm()
+
+    # HTML output
+    return render(request, 'project.html', {
+        'form'          : form,
+        'user'          : user,
+        'date'          : datetime.now(),
+        'project'       : project,
+        'donut'         : donut,
+        'list_corpora'  : dict(corpora_by_resourcetype),
+        'whitelists'    : '',
+        'blacklists'    : '',
+        'cooclists'     : '',
+        'number'        : corpora_count,
+    })
+
+def tfidf(request, corpus_id, ngram_ids):
+    """Takes IDs of corpus and ngram and returns list of relevent documents in json format
+    according to TFIDF score (order is decreasing).
+    """
+    limit=6
+    nodes_list = []
+    # filter input
+    ngram_ids = ngram_ids.split('a')
+    ngram_ids = [int(i) for i in ngram_ids]
+    # request data
+    nodes_query = (session
+        .query(Node, func.sum(NodeNodeNgram.score))
+        .join(NodeNodeNgram, NodeNodeNgram.nodey_id == Node.id)
+        .filter(NodeNodeNgram.nodex_id == corpus_id)
+        .filter(NodeNodeNgram.ngram_id.in_(ngram_ids))
+        .group_by(Node)
+        .order_by(func.sum(NodeNodeNgram.score).desc())
+        .limit(limit)
+    )
+    # convert query result to a list of dicts
+    for node, score in nodes_query:
+        node_dict = {
+            'id': node.id,
+            'score': score,
+        }
+        for key in ('title', 'publication_date', 'journal', 'authors', 'fields'):
+            if key in node.metadata:
+                node_dict[key] = node.metadata[key]
+        nodes_list.append(node_dict)
+
+    data = json.dumps(nodes_list) 
+    return JsonHttpResponse(data)
--- a/init/graph.pdf
+++ b/init/graph.pdf
--- a/init/hstore2jsonb.sql
+++ b/init/hstore2jsonb.sql
+
+
+ALTER TABLE ONLY node_node ALTER COLUMN date SET DEFAULT CURRENT_DATE ;
+
+ALTER TABLE ONLY node_node ALTER COLUMN metadata DROP NOT NULL ;
+
+ALTER TABLE ONLY node_node ALTER COLUMN metadata DROP DEFAULT ;
+
+ALTER TABLE ONLY node_node ALTER COLUMN metadata TYPE JSONB USING hstore_to_json(metadata)::jsonb ;
+
+ALTER TABLE ONLY node_node ALTER COLUMN metadata SET DEFAULT '{}'::jsonb ;
+
+ALTER TABLE ONLY node_node ALTER COLUMN metadata SET NOT NULL ;
+
--- a/init/init.py
+++ b/init/init.py
@@ -7,7 +7,7 @@
 #NodeType.objects.all().delete()


-from node.models import Node, NodeType, Project, Corpus, Document, Ngram, Node_Ngram, User, Language, ResourceType
+from node.models import *


 import pycountry
@@ -31,14 +31,8 @@ except:
    me = User(username='pksm3')
    me.save()

-
-try:
-    typeProject = NodeType.objects.get(name='Root')
-except Exception as error:
-    print(error)
-    typeProject = NodeType(name='Root')
-    typeProject.save()  
-
+for node_type in ['Trash', 'Root', ]:
+    NodeType.objects.get_or_create(name=node_type)

 try:
    typeProject = NodeType.objects.get(name='Project')
@@ -141,13 +135,7 @@ except Exception as error:
 #Node.objects.all().delete()


-# In[9]:

-try:
-    project = Node.objects.get(name='Bees project')
-except:
-    project = Node(name='Bees project', type=typeProject, user=me)
-    project.save()

 try:
    stem = Node.objects.get(name='Stem')
@@ -158,3 +146,17 @@ except:



+
+from gargantext_web.db import *
+
+# Instantiante table NgramTag:
+f = open("part_of_speech_labels.txt", 'r')
+
+for line in f.readlines():
+    name, description = line.strip().split('\t')
+    _tag = Tag(name=name, description=description)
+    session.add(_tag)
+session.commit()
+f.close()
+
+
--- a/init/init.sh
+++ b/init/init.sh
@@ -4,6 +4,11 @@ psql -d gargandb -f init.sql

 sleep 2
 ../manage.py syncdb
+psql -d gargandb -f init2.sql
+

 sleep 2
-../manage.py shell < init.py
+#../manage.py shell < init.py
+../manage.py shell < init_gargantext.py
+
+#psql -d gargandb -f hstore2jsonb.sql
--- a/init/init_gargantext.py
+++ b/init/init_gargantext.py
+# Without this, we couldn't use the Django environment
+
+import os
+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
+os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")
+
+# We're gonna use all the models!
+
+# Django models
+from node import models
+
+# SQLA models
+from gargantext_web.db import *
+
+# Reset: all data
+#
+#tables_to_empty = [
+#    Node,
+#    Node_Metadata,
+#    Metadata,
+#    NodeType,
+#    ResourceType,
+#    Resource,
+#]
+#for table in tables_to_empty:
+#    print('Empty table "%s"...' % (table._meta.db_table, ))
+#    table.objects.all().delete()
+
+
+# Integration: metadata types
+
+print('Initialize metadata...')
+metadata = {
+    'publication_date': 'datetime',
+    'authors': 'string',
+    'language_fullname': 'string',
+    'abstract': 'text',
+    'title': 'string',
+    'source': 'string',
+    'volume': 'string',
+    'text': 'text',
+    'page': 'string',
+    'doi': 'string',
+    'journal': 'string',
+}
+for name, type in metadata.items():
+    models.Metadata(name=name, type=type).save()
+
+
+# Integration: languages
+
+print('Initialize languages...')
+import pycountry
+Language.objects.all().delete()
+for language in pycountry.languages:
+    if 'alpha2' in language.__dict__:
+        Language(
+            iso2 = language.alpha2,
+            iso3 = language.bibliographic,
+            fullname = language.name,
+            implemented = 1 if language.alpha2 in ['en', 'fr'] else 0,
+        ).save()
+
+english = Language.objects.get(iso2='en')
+french  = Language.objects.get(iso2='fr')
+
+
+# Integration: users
+
+print('Initialize users...')
+me = models.User.objects.get_or_create(username='alexandre')
+gargantua = models.User.objects.get_or_create(username='gargantua')
+node_root = Node(user_id=gargantua.id, type_id=cache.NodeType['Root'].id, name='Root')
+node_stem = Node(user_id=gargantua.id, type_id=cache.NodeType['Stem'].id, name='Stem', parent_id=node_root.id)
+node_lem = Node(user_id=gargantua.id, type_id=cache.NodeType['Lem'].id, name='Lem', parent_id=node_root.id)
+
+session.add(node_root)
+session.add(node_stem)
+session.add(node_lem)
+session.commit()
+
+# Integration: node types
+
+print('Initialize node types...')
+
+node_types = [
+        'Root', 'Trash',
+        'Project', 'Corpus', 'Document', 
+        'Stem', 'Lem', 'Tfidf', 
+        'Synonym', 
+        'MiamList', 'StopList',
+        'Cooccurrence', 'WhiteList', 'BlackList'
+        ]
+
+for node_type in node_types:
+    models.NodeType.objects.get_or_create(name=node_type)
+
+# Integration: resource types
+
+print('Initialize resource...')
+
+resources = [
+        'pubmed', 'isi', 'ris', 'europress_french', 'europress_english']
+
+for resource in resources:
+    models.ResourceType.objects.get_or_create(name=resource)
+
+
+
+# TODO 
+# here some tests
+# add a new project and some corpora to test it
+
+
+# Integration: project
+#
+#print('Initialize project...')
+#try:
+#    project = Node.objects.get(name='Bees project')
+#except:
+#    project = Node(name='Bees project', type=typeProject, user=me)
+#    project.save()
+#
+
+# Integration: corpus
+
+#print('Initialize corpus...')
+#try:
+#    corpus_pubmed = Node.objects.get(name='PubMed corpus')
+#except:
+#    corpus_pubmed = Node(parent=project, name='PubMed corpus', type=typeCorpus, user=me)
+#    corpus_pubmed.save()
+#
+#print('Initialize resource...')
+#corpus_pubmed.add_resource(
+#    # file='./data_samples/pubmed.zip',
+#    #file='./data_samples/pubmed_2013-04-01_HoneyBeesBeeBees.xml',
+#    file='/srv/gargantext_lib/data_samples/pubmed.xml',
+#    type=typePubmed,
+#    user=me
+#)
+#
+#for resource in corpus_pubmed.get_resources():
+#    print('Resource #%d - %s - %s' % (resource.id, resource.digest, resource.file))
+#    
+## print('Parse corpus #%d...' % (corpus_pubmed.id, ))
+# corpus_pubmed.parse_resources(verbose=True)
+# print('Extract corpus #%d...' % (corpus_pubmed.id, ))
+# corpus_pubmed.children.all().extract_ngrams(['title',])
+# print('Parsed corpus #%d.' % (corpus_pubmed.id, ))
+
+
+
+
+
+# Instantiante table NgramTag:
+f = open("part_of_speech_labels.txt", 'r')
+
+for line in f.readlines():
+    name, description = line.strip().split('\t')
+    _tag = Tag(name=name, description=description)
+    session.add(_tag)
+session.commit()
+
+f.close()
+
+
+exit()
--- a/init/part_of_speech_labels.txt
+++ b/init/part_of_speech_labels.txt
+CC	Coordinating conjunction
+CD	Cardinal number
+DT	Determiner
+EX	Existential there
+FW	Foreign word
+IN	Preposition or subordinating conjunction
+JJ	Adjective
+JJR	Adjective, comparative
+JJS	Adjective, superlative
+LS	List item marker
+MD	Modal
+NN	Noun, singular or mass
+NNS	Noun, plural
+NNP	Proper noun, singular
+NNPS	Proper noun, plural
+PDT	Predeterminer
+POS	Possessive ending
+PRP	Personal pronoun
+PRP$	Possessive pronoun
+RB	Adverb
+RBR	Adverb, comparative
+RBS	Adverb, superlative
+RP	Particle
+SYM	Symbol
+TO	to
+UH	Interjection
+VB	Verb, base form
+VBD	Verb, past tense
+VBG	Verb, gerund or present participle
+VBN	Verb, past participle
+VBP	Verb, non3rd person singular present
+VBZ	Verb, 3rd person singular present
+WDT	Whdeterminer
+WP	Whpronoun
+WP$	Possessive whpronoun
+WRB	Whadverb
+NGRA	Ngram
--- a/init/requirements.txt
+++ b/init/requirements.txt
 Cython==0.20.2
-Django==1.6.6
+Django==1.6.11
 Jinja2==2.7.3
 MarkupSafe==0.23
 Pillow==2.5.3
 Pygments==1.6
+RandomWords==0.1.12
 SQLAlchemy==0.9.8
 South==1.0
 aldjemy==0.3.10
 amqp==1.4.6
 anyjson==0.3.3
+bibtexparser==0.6.0
 billiard==3.3.0.18
 celery==3.1.15
 certifi==14.05.14
@@ -23,17 +25,22 @@ django-cte-trees==0.9.2
 django-extensions==1.4.0
 django-grappelli==2.5.3
 django-hstore==1.3.1
+django-maintenance==0.1
 django-mptt==0.6.1
 django-nested-inlines==0.1
+django-pgfields==1.4.4
+django-pgjson==0.2.2
+django-pgjsonb==0.0.10
 django-treebeard==2.0
 djangorestframework==3.0.0
+gensim==0.10.3
 graphviz==0.4
 ipython==2.2.0
 kombu==3.0.23
-lxml==3.3.6
-matplotlib==1.4.0
+lxml==3.4.1
+#matplotlib==1.4.0
 networkx==1.9
-nltk==3.0a4
+#nltk==3.0a4
 nose==1.3.4
 numpy==1.8.2
 pandas==0.14.1
@@ -44,13 +51,16 @@ pycparser==2.10
 pydot2==1.0.33
 pyparsing==2.0.2
 python-dateutil==2.2
+python-igraph==0.7
 pytz==2014.7
 pyzmq==14.3.1
 readline==6.2.4.1
 redis==2.10.3
 scikit-learn==0.15.1
 scipy==0.14.0
+simplerandom==0.12.1
 six==1.7.3
 sympy==0.7.5
 tornado==4.0.1
 uWSGI==2.0.7
+ujson==1.33
--- a/init_cooc.py
+++ b/init_cooc.py
-# Without this, we couldn't use the Django environment
-
-import os
-os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
-os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")
-
-
-# We're gonna use all the models!
-
-from node.models import User, NodeType, Node
-
-
-user = User.objects.get(username = 'contro2015.lait')
-
-# Reset: all data
-
-try:
-    typeDoc     = NodeType.objects.get(name='Cooccurrence')
-except Exception as error:
-    print(error)
-
-Node.objects.filter(user=user, type=typeDoc).all().delete()
-
-exit()
--- a/init_gargantext.py
+++ b/init_gargantext.py
-# Without this, we couldn't use the Django environment
-
-import os
-os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
-os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")
-
-
-# We're gonna use all the models!
-
-from node.models import *
-
-
-# Reset: all data
-
-tables_to_empty = [
-    Node,
-    Node_Metadata,
-    Metadata,
-    NodeType,
-    ResourceType,
-    Resource,
-]
-for table in tables_to_empty:
-    print('Empty table "%s"...' % (table._meta.db_table, ))
-    table.objects.all().delete()
-
-
-# Integration: metadata types
-
-print('Initialize metadata...')
-metadata = {
-    'publication_date': 'datetime',
-    'authors': 'string',
-    'language_fullname': 'string',
-    'abstract': 'text',
-    'title': 'string',
-    'source': 'string',
-    'volume': 'string',
-    'text': 'text',
-    'page': 'string',
-    'doi': 'string',
-    'journal': 'string',
-}
-for name, type in metadata.items():
-    Metadata(name=name, type=type).save()
-
-
-# Integration: languages
-
-print('Initialize languages...')
-import pycountry
-Language.objects.all().delete()
-for language in pycountry.languages:
-    if 'alpha2' in language.__dict__:
-        Language(
-            iso2 = language.alpha2,
-            iso3 = language.bibliographic,
-            fullname = language.name,
-            implemented = 1 if language.alpha2 in ['en', 'fr'] else 0,
-        ).save()
-
-english = Language.objects.get(iso2='en')
-french  = Language.objects.get(iso2='fr')
-
-
-# Integration: users
-
-print('Initialize users...')
-try:
-    me = User.objects.get(username='alexandre')
-except:
-    me = User(username='alexandre')
-    me.save()
-
-
-# Integration: node types
-
-print('Initialize node types...')
-
-try:
-    typeProject = NodeType.objects.get(name='Root')
-except Exception as error:
-    print(error)
-    typeProject = NodeType(name='Root')
-    typeProject.save()  
-
-try:
-    typeProject = NodeType.objects.get(name='Project')
-except Exception as error:
-    print(error)
-    typeProject = NodeType(name='Project')
-    typeProject.save()  
-
-try:
-    typeCorpus  = NodeType.objects.get(name='Corpus')
-except Exception as error:
-    print(error)
-    typeCorpus  = NodeType(name='Corpus')
-    typeCorpus.save()
-    
-try:
-    typeDoc     = NodeType.objects.get(name='Document')
-except Exception as error:
-    print(error)
-    typeDoc     = NodeType(name='Document')
-    typeDoc.save()
-
-try:
-    typeStem     = NodeType.objects.get(name='Stem')
-except Exception as error:
-    print(error)
-    typeStem     = NodeType(name='Stem')
-    typeStem.save()
-
-try:
-    typeTfidf     = NodeType.objects.get(name='Tfidf')
-except Exception as error:
-    print(error)
-    typeTfidf     = NodeType(name='Tfidf')
-    typeTfidf.save()
-
-try:
-    typeDoc     = NodeType.objects.get(name='WhiteList')
-except Exception as error:
-    print(error)
-    typeDoc     = NodeType(name='WhiteList')
-    typeDoc.save()
-
-try:
-    typeDoc     = NodeType.objects.get(name='BlackList')
-except Exception as error:
-    print(error)
-    typeDoc     = NodeType(name='BlackList')
-    typeDoc.save()
-
-try:
-    typeDoc     = NodeType.objects.get(name='Synonyme')
-except Exception as error:
-    print(error)
-    typeDoc     = NodeType(name='Synonyme')
-    typeDoc.save()
-
-try:
-    typeDoc     = NodeType.objects.get(name='Cooccurrence')
-except Exception as error:
-    print(error)
-    typeDoc     = NodeType(name='Cooccurrence')
-    typeDoc.save()
-
-
-
-# Integration: resource types
-
-print('Initialize resource...')
-try:
-    typePubmed      = ResourceType.objects.get(name='pubmed')
-    typeIsi         = ResourceType.objects.get(name='isi')
-    typeRis         = ResourceType.objects.get(name='ris')
-    typePresseFr    = ResourceType.objects.get(name='europress_french')
-    typePresseEn    = ResourceType.objects.get(name='europress_english')
-
-except Exception as error:
-    print(error)
-    
-    typePubmed = ResourceType(name='pubmed')
-    typePubmed.save()  
-    
-    typeIsi    = ResourceType(name='isi')
-    typeIsi.save()
-    
-    typeRis    = ResourceType(name='ris')
-    typeRis.save()
-    
-    typePresseFr = ResourceType(name='europress_french')
-    typePresseFr.save()
-    
-    typePresseEn = ResourceType(name='europress_english')
-    typePresseEn.save()
-
-# Integration Node Stem
-try:
-    stem = Node.objects.get(name='Stem')
-except:
-    stem = Node(name='Stem', type=typeStem, user=me)
-    stem.save()
-
-
-
-# Integration: project
-
-print('Initialize project...')
-try:
-    project = Node.objects.get(name='Bees project')
-except:
-    project = Node(name='Bees project', type=typeProject, user=me)
-    project.save()
-
-
-# Integration: corpus
-
-print('Initialize corpus...')
-try:
-    corpus_pubmed = Node.objects.get(name='PubMed corpus')
-except:
-    corpus_pubmed = Node(parent=project, name='PubMed corpus', type=typeCorpus, user=me)
-    corpus_pubmed.save()
-
-print('Initialize resource...')
-corpus_pubmed.add_resource(
-    # file='./data_samples/pubmed.zip',
-    #file='./data_samples/pubmed_2013-04-01_HoneyBeesBeeBees.xml',
-    file='/srv/gargantext_lib/data_samples/pubmed.xml',
-    type=typePubmed,
-    user=me
-)
-
-for resource in corpus_pubmed.get_resources():
-    print('Resource #%d - %s - %s' % (resource.id, resource.digest, resource.file))
-    
-# print('Parse corpus #%d...' % (corpus_pubmed.id, ))
-# corpus_pubmed.parse_resources(verbose=True)
-# print('Extract corpus #%d...' % (corpus_pubmed.id, ))
-# corpus_pubmed.children.all().extract_ngrams(['title',])
-# print('Parsed corpus #%d.' % (corpus_pubmed.id, ))
-
-exit()
--- a/node/admin.py
+++ b/node/admin.py
@@ -117,23 +117,16 @@ class CustomForm(forms.Form):
    """
    def clean_file(self):
        file_ = self.cleaned_data.get('file')
-        from datetime import datetime
-        file_.name = str(datetime.now().microsecond)
        # #Filename length
        # if len(file_.name)>30:
        #     from datetime import datetime
        #     file_.name = str(datetime.now().microsecond)
        #     # raise forms.ValidationError(_('Come on dude, name too long. Now is:'+file_.name))
-        # #File size
-        # if len(file_)>104857600:
-        #     raise forms.ValidationError(_('File to heavy! (<100MB).'))
-        ## File type:
-        # if file_.content_type == "application/zip":
-        #     raise forms.ValidationError(_('We need a zip pls.'))
+        #File size
+        if len(file_)>1024 ** 3:
+            raise forms.ValidationError(_('File too heavy! (>1GB).'))
        return file_

-
-
 class CorpusForm(ModelForm):
    #parent = ModelChoiceField(EmptyQuerySet)
    def __init__(self, *args, **kwargs):
@@ -155,14 +148,14 @@ class CorpusAdmin(NodeAdmin):

 ######################################################################

-class DocumentForm(ModelForm):
-    parent = ModelChoiceField(Node.objects.filter(user_id=1, type_id=3))
-
-class DocumentAdmin(NodeAdmin):
-    _parent_nodetype_name = 'Corpus'
-    _nodetype_name = 'Document'
-    form = DocumentForm
+#class DocumentForm(ModelForm):
+#    parent = ModelChoiceField(Node.objects.filter(user_id=1, type_id=3))

+#class DocumentAdmin(NodeAdmin):
+#    _parent_nodetype_name = 'Corpus'
+#    _nodetype_name = 'Document'
+#    form = DocumentForm
+#
 class LanguageAdmin(admin.ModelAdmin):
    
    def get_queryset(self, request):
@@ -178,7 +171,7 @@ admin.site.register(Language, LanguageAdmin)
 admin.site.register(NodeType)
 admin.site.register(Project, ProjectAdmin)
 admin.site.register(Corpus, CorpusAdmin)
-admin.site.register(Document, DocumentAdmin)
+admin.site.register(Document)#, DocumentAdmin)


 admin.site.register(Node_Resource)

--- a/node/models.py
+++ b/node/models.py
--- a/parsing/FileParsers/EuropressFileParser.py
+++ b/parsing/FileParsers/EuropressFileParser.py
@@ -41,18 +41,47 @@ class EuropressFileParser(FileParser):
            html = etree.fromstring(contents, html_parser)
            
            try:
+                
+                format_europresse = 50
                html_articles = html.xpath('/html/body/table/tbody')
+
                if len(html_articles) < 1:
                    html_articles = html.xpath('/html/body/table')
+                    
+                    if len(html_articles) < 1:
+                        format_europresse = 1
+                        html_articles = html.xpath('//div[@id="docContain"]')
            except Exception as error:
                print(error)
            
+            if format_europresse == 50:
+                name_xpath = "./tr/td/span[@class = 'DocPublicationName']"
+                header_xpath = "//span[@class = 'DocHeader']"
+                title_xpath = "string(./tr/td/span[@class = 'TitreArticleVisu'])"
+                text_xpath  = "./tr/td/descendant-or-self::*[not(self::span[@class='DocHeader'])]/text()"
+            elif format_europresse == 1:
+                name_xpath = "//span[@class = 'DocPublicationName']"
+                header_xpath = "//span[@class = 'DocHeader']"
+                title_xpath = "string(//div[@class = 'titreArticleVisu'])"
+                text_xpath  = "./descendant::*[\
+                        not(\
+                           self::div[@class='Doc-SourceText'] \
+                        or self::span[@class='DocHeader'] \
+                        or self::span[@class='DocPublicationName'] \
+                        or self::span[@id='docNameVisu'] \
+                        or self::span[@class='DocHeader'] \
+                        or self::div[@class='titreArticleVisu'] \
+                        or self::span[@id='docNameContType'] \
+                        or descendant-or-self::span[@id='ucPubliC_lblCertificatIssuedTo'] \
+                        or descendant-or-self::span[@id='ucPubliC_lblEndDate'] \
+                        or self::td[@class='txtCertificat'] \
+                        )]/text()"
+                doi_xpath  = "//span[@id='ucPubliC_lblNodoc']/text()"
+                

-        except:
-            return []
+        except Exception as error:
+            print(error)

-        # initialize the list of metadata
-        metadata_list = []
        # parse all the articles, one by one
        try:
            for html_article in html_articles:
@@ -60,19 +89,20 @@ class EuropressFileParser(FileParser):
                metadata = {}
                
                if len(html_article):
-                    for name in html_article.xpath("./tr/td/span[@class = 'DocPublicationName']"):
+                    for name in html_article.xpath(name_xpath):
                        if name.text is not None:
                            format_journal = re.compile('(.*), (.*)', re.UNICODE)
                            test_journal = format_journal.match(name.text)
                            if test_journal is not None:
-                                metadata['source'] = test_journal.group(1)
+                                metadata['journal'] = test_journal.group(1)
                                metadata['volume'] = test_journal.group(2)
                            else:
-                                metadata['source'] = name.text.encode(codif)
+                                metadata['journal'] = name.text.encode(codif)

-                    for header in html_article.xpath("./tr/td/span[@class = 'DocHeader']"):
+                    for header in html_article.xpath(header_xpath):
                        try:
                            text = header.text
+                            #print("header", text)
                        except Exception as error:
                            print(error)

@@ -138,8 +168,8 @@ class EuropressFileParser(FileParser):
                        if test_page is not None:
                            metadata['page'] = test_page.group(1).encode(codif)

-                    metadata['title'] = html_article.xpath("string(./tr/td/span[@class = 'TitreArticleVisu'])").encode(codif)
-                    metadata['text']  = html_article.xpath("./tr/td/descendant-or-self::*[not(self::span[@class='DocHeader'])]/text()")
+                    metadata['title'] = html_article.xpath(title_xpath).encode(codif)
+                    metadata['abstract']  = html_article.xpath(text_xpath)
                   
                    line = 0
                    br_tag = 10
@@ -185,32 +215,36 @@ class EuropressFileParser(FileParser):
                    metadata['publication_year']  = metadata['publication_date'].strftime('%Y')
                    metadata['publication_month'] = metadata['publication_date'].strftime('%m')
                    metadata['publication_day']  = metadata['publication_date'].strftime('%d')
-                    metadata['publication_date'] = ""
+                    metadata.pop('publication_date')
+                    
+                    if len(metadata['abstract'])>0 and format_europresse == 50: 
+                        metadata['doi'] = str(metadata['abstract'][-9])
+                        metadata['abstract'].pop()
+# Here add separator for paragraphs
+                        metadata['abstract'] = str(' '.join(metadata['abstract']))
+                        metadata['abstract'] = str(re.sub('Tous droits réservés.*$', '', metadata['abstract']))
+                    elif format_europresse == 1:
+                        metadata['doi'] = ' '.join(html_article.xpath(doi_xpath))
+                        metadata['abstract'] = metadata['abstract'][:-9]
+# Here add separator for paragraphs
+                        metadata['abstract'] = str(' '.join(metadata['abstract']))
+
+                    else: 
+                        metadata['doi'] = "not found"
+                    
+                    metadata['length_words'] = len(metadata['abstract'].split(' '))
+                    metadata['length_letters'] = len(metadata['abstract'])
                    
-                    if len(metadata['text'])>0: 
-                        metadata['doi'] = str(metadata['text'][-9])
-                        metadata['text'].pop()
-                        metadata['text'] = str(' '.join(metadata['text']))
-                        metadata['text'] = str(re.sub('Tous droits réservés.*$', '', metadata['text']))
-
-                    else: metadata['doi'] = "not found"
-
                    metadata['bdd']  = u'europresse'
                    metadata['url']  = u''
                    
                  #metadata_str = {}
                    for key, value in metadata.items():
                        metadata[key] = value.decode() if isinstance(value, bytes) else value
-                    metadata_list.append(metadata)
+                    yield metadata
                    count += 1
-        
+            file.close()
+
        except Exception as error:
            print(error)
            pass
-
-#       from pprint import pprint
-#       pprint(metadata_list)
-#       return []
-        return metadata_list
-
-
--- a/parsing/FileParsers/FileParser.py
+++ b/parsing/FileParsers/FileParser.py
@@ -103,15 +103,21 @@ class FileParser:
            zipArchive = zipfile.ZipFile(file)
            for filename in zipArchive.namelist():
                try:
-                    metadata_list += self.parse(zipArchive.open(filename, "r"))
+                    f = zipArchive.open(filename, 'r')
+                    metadata_list += self.parse(f)
+                    f.close()
                except Exception as error:
                    print(error)
        # ...otherwise, let's parse it directly!
        else:
            try:
-                metadata_list += self._parse(file)
+                for metadata in self._parse(file):
+                    metadata_list.append(self.format_metadata(metadata))
+                if hasattr(file, 'close'):
+                    file.close()
            except Exception as error:
                print(error)
        # return the list of formatted metadata
-        return map(self.format_metadata, metadata_list)
+        return metadata_list
+

--- a/parsing/FileParsers/PubmedFileParser.py
+++ b/parsing/FileParsers/PubmedFileParser.py
@@ -25,6 +25,7 @@ class PubmedFileParser(FileParser):
            metadata_path = {
                "journal"           : 'MedlineCitation/Article/Journal/Title',
                "title"             : 'MedlineCitation/Article/ArticleTitle',
+                "abstract"          : 'MedlineCitation/Article/Abstract/AbstractText',
                "title2"            : 'MedlineCitation/Article/VernacularTitle',
                "language_iso3"     : 'MedlineCitation/Article/Language',
                "doi"               : 'PubmedData/ArticleIdList/ArticleId[@type=doi]',
@@ -101,7 +102,6 @@ class PubmedFileParser(FileParser):
                if "realdate_day_" in metadata: metadata.pop("realdate_day_")
                if "title2" in metadata: metadata.pop("title2")
                
-                # print(metadata)
                metadata_list.append(metadata)
        # return the list of metadata
        return metadata_list
--- a/parsing/FileParsers/RisFileParser.py
+++ b/parsing/FileParsers/RisFileParser.py
@@ -17,42 +17,34 @@ class RisFileParser(FileParser):
    }

    def _parse(self, file):
-        metadata_list = []
        metadata = {}
        last_key = None
        last_values = []
+        # browse every line of the file
        for line in file:
            if len(line) > 2:
+                # extract the parameter key
                parameter_key = line[:2]
-#                print(parameter_key)
                if parameter_key != b'  ' and parameter_key != last_key:
                    if last_key in self._parameters:
+                        # translate the parameter key
                        parameter = self._parameters[last_key]
                        if parameter["type"] == "metadata":
                            separator = parameter["separator"] if "separator" in parameter else ""
                            metadata[parameter["key"]] = separator.join(last_values)
                        elif parameter["type"] == "delimiter":
-                            #language = self._languages_fullname[metadata["language"].lower()]
-                            #print(metadata)
-                            try:
-                                #print("append")
-                                if 'language_fullname' not in metadata.keys():
-                                    if 'language_iso3' not in metadata.keys():
-                                        if 'language_iso2' not in metadata.keys():
-                                            metadata['language_iso2'] = 'en'
-                                metadata_list.append(metadata)
-                                metadata = {}
-                                #print("append succeeded")
-                            except:
-                                pass
+                            if 'language_fullname' not in metadata.keys():
+                                if 'language_iso3' not in metadata.keys():
+                                    if 'language_iso2' not in metadata.keys():
+                                        metadata['language_iso2'] = 'en'
+                            yield metadata
+                            metadata = {}
                    last_key = parameter_key
                    last_values = []
                try:
                    last_values.append(line[3:-1].decode())
                except Exception as error:
                    print(error)
-                    pass
-        #print(len(metadata_list))
-        #print(metadata_list)
-
-        return metadata_list
+        # if a metadata object is left in memory, yield it as well
+        if metadata:
+            yield metadata
--- a/parsing/NgramsExtractors/NgramsExtractor.py
+++ b/parsing/NgramsExtractors/NgramsExtractor.py
-from ..Taggers import Tagger
+from ..Taggers import TurboTagger
 import nltk


@@ -13,12 +13,13 @@ class NgramsExtractor:
        self.start()
        self._label = "NP"
        self._rule = self._label + ": " + rule
+        self._grammar = nltk.RegexpParser(self._rule)
        
    def __del__(self):
        self.stop()
        
    def start(self):
-        self.tagger = Tagger()
+        self.tagger = TurboTagger()
        
    def stop(self):
        pass
@@ -29,19 +30,8 @@ class NgramsExtractor:
    """
    def extract_ngrams(self, contents):
        tagged_ngrams = self.tagger.tag_text(contents)
-        if len(tagged_ngrams)==0: return []
-        
-        grammar = nltk.RegexpParser(self._rule)
-        result = []
-        # try:
-        grammar_parsed = grammar.parse(tagged_ngrams)
-        for subtree in grammar_parsed.subtrees():
-            if subtree.label() == self._label:
-                result.append(subtree.leaves())
-        # except Exception as e:
-        #     print("Problem while parsing rule '%s'" % (self._rule, ))
-        #     print(e)
-        return result
-        
-        
-    
+        if len(tagged_ngrams):
+            grammar_parsed = self._grammar.parse(tagged_ngrams)
+            for subtree in grammar_parsed.subtrees():
+                if subtree.label() == self._label:
+                    yield subtree.leaves()
--- a/parsing/NgramsExtractors/__init__.py
+++ b/parsing/NgramsExtractors/__init__.py
 from .FrenchNgramsExtractor import FrenchNgramsExtractor
 from .TurboNgramsExtractor import TurboNgramsExtractor as EnglishNgramsExtractor
-# from parsing.NgramsExtractors.EnglishNgramsExtractor import EnglishNgramsExtractor
+# from .EnglishNgramsExtractor import EnglishNgramsExtractor
 from .NgramsExtractor import NgramsExtractor

--- a/parsing/Taggers/Tagger.py
+++ b/parsing/Taggers/Tagger.py
@@ -58,9 +58,11 @@ class Tagger:
        if single:
            self.tagging_end()
        return []
-        
+    
+
    """Send a text to be tagged.
    """
+    # Not used right now
    def tag_text(self, text):
        tokens_tags = []
        self.tagging_start()
@@ -69,4 +71,3 @@ class Tagger:
            tokens_tags += self.tag_tokens(tokens, False)
        self.tagging_end()
        return tokens_tags
-
--- a/parsing/Taggers/nlpserver/client.py
+++ b/parsing/Taggers/nlpserver/client.py
@@ -9,15 +9,24 @@ from .settings import implemented_methods
 class NLPClient:

    def __init__(self):
-        self._socket = socket.socket(*server_type_client)
-        self._socket.connect((server_host, server_port))
+        self._socket = None
        for method_name in dir(self):
            if method_name[0] != '_':
                if method_name.upper() not in implemented_methods:
                    setattr(self, method_name, self._notimplemented)

    def __del__(self):
-        self._socket.close()
+        self._disconnect()
+
+    def _connect(self):
+        self._disconnect()
+        self._socket = socket.socket(*server_type_client)
+        self._socket.connect((server_host, server_port))
+
+    def _disconnect(self):
+        if self._socket is not None:
+            self._socket.close()
+            self._socket = None

    def _notimplemented(self, *args, **kwargs):
        raise NotImplementedError(
@@ -51,7 +60,7 @@ class NLPClient:
        data += language + '\n'
        data += re.sub(r'\n+', '\n', text)
        data += '\n\n'
-        self.__init__()
+        self._connect()
        self._socket.sendall(data.encode())
        sentence = []
        if keys is None:
@@ -73,7 +82,6 @@ class NLPClient:
                    continue
                values = line.split('\t')
                sentence.append(dict(zip(keys, line.split('\t'))))
-        self.__del__()

    def tokenize(self, text, language='english', asdict=False):
        keys = ('token', ) if asdict else None

--- a/parsing/Taggers/nlpserver/settings.py
+++ b/parsing/Taggers/nlpserver/settings.py
@@ -4,7 +4,7 @@ import socketserver

 # Server parameters
 server_host = 'localhost'
-server_port = 1234
+server_port = 7777
 server_type_server = socketserver.TCPServer
 server_type_client = socket.AF_INET, socket.SOCK_STREAM
 server_timeout = 2.0

--- a/parsing/corpustools.py
+++ b/parsing/corpustools.py
--- a/scrap_pubmed/MedlineFetcherDavid2015.py
+++ b/scrap_pubmed/MedlineFetcherDavid2015.py
@@ -40,7 +40,7 @@ class MedlineFetcher:

        "Get number of results for query 'query' in variable 'count'"
        "Get also 'queryKey' and 'webEnv', which are used by function 'medlineEfetch'"
-        print(query)
+        # print(query)
        origQuery = query
        query = query.replace(' ', '%20')
            
@@ -79,7 +79,7 @@ class MedlineFetcher:

        queryNoSpace = query.replace(' ', '') # No space in directory and file names, avoids stupid errors
        
-        print ("LOG::TIME: ",'medlineEfetchRAW :Query "' , query , '"\t:\t' , count , ' results')
+        # print ("LOG::TIME: ",'medlineEfetchRAW :Query "' , query , '"\t:\t' , count , ' results')

        retstart = 0
        eFetch = '%s/efetch.fcgi?email=youremail@example.org&rettype=%s&retmode=xml&retstart=%s&retmax=%s&db=%s&query_key=%s&WebEnv=%s' %(self.pubMedEutilsURL, self.reportType, retstart, retmax, self.pubMedDB, queryKey, webEnv)
@@ -94,7 +94,7 @@ class MedlineFetcher:
    def downloadFile(self, item):
        url = item[0]
        filename = item[1]
-        print("\tin test_downloadFile:")
+        # print("\tin test_downloadFile:")
        # print(url,filename)
        data = urlopen(url)
        f = codecs.open(filename, "w" ,encoding='utf-8')
@@ -110,7 +110,7 @@ class MedlineFetcher:
    def test_downloadFile(self, item):
        url = item[0]
        filename = item[1]
-        print("\tin downloadFile:")
+        # print("\tin downloadFile:")
        data = urlopen(url)
        return data

@@ -119,7 +119,7 @@ class MedlineFetcher:
        # time.sleep(1) # pretend to do some lengthy work.
        returnvalue = self.medlineEsearch(item)
        with self.lock:
-            print(threading.current_thread().name, item)
+            # print(threading.current_thread().name, item)
            return returnvalue

    # The worker thread pulls an item from the queue and processes it
@@ -160,13 +160,13 @@ class MedlineFetcher:

        N = 0

-        print ("MedlineFetcher::serialFetcher :")
+        # print ("MedlineFetcher::serialFetcher :")
        thequeries = []
        globalresults = []
        for i in range(yearsNumber):
            year = str(2015 - i)
-            print ('YEAR ' + year)
-            print ('---------\n')
+            # print ('YEAR ' + year)
+            # print ('---------\n')
            pubmedquery = str(year) + '[dp] '+query
            self.q.put( pubmedquery ) #put task in the queue
        
@@ -196,5 +196,6 @@ class MedlineFetcher:
            retmax_forthisyear = int(round(globalLimit*proportion))
            query["retmax"] = retmax_forthisyear
            if query["retmax"]==0: query["retmax"]+=1
+            print(query["string"],"\t[",k,">",query["retmax"],"]")

        return thequeries
--- a/scrap_pubmed/views.py
+++ b/scrap_pubmed/views.py
-from django.shortcuts import redirect
-from django.shortcuts import render

-from django.http import Http404, HttpResponse, HttpResponseRedirect
 from django.template.loader import get_template
 from django.template import Context
 from django.contrib.auth.models import User, Group

 from scrap_pubmed.MedlineFetcherDavid2015 import MedlineFetcher

-from gargantext_web.api import JsonHttpResponse
 from urllib.request import urlopen, urlretrieve
 import json

-from gargantext_web.settings import MEDIA_ROOT
 # from datetime import datetime
 import time
 import datetime
@@ -21,9 +16,23 @@ import threading
 from django.core.files import File
 from gargantext_web.settings import DEBUG

-from node.models import Language, ResourceType, Resource, \
-        Node, NodeType, Node_Resource, Project, Corpus, \
-        Ngram, Node_Ngram, NodeNgramNgram, NodeNodeNgram
+
+from django.shortcuts import redirect
+from django.shortcuts import render
+from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseForbidden
+
+from sqlalchemy import func
+from sqlalchemy.orm import aliased
+
+from collections import defaultdict
+import threading
+
+from node.admin import CustomForm
+from gargantext_web.db import *
+from gargantext_web.settings import DEBUG, MEDIA_ROOT
+from gargantext_web.api import JsonHttpResponse
+
+from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf


 def getGlobalStats(request ):
@@ -31,7 +40,7 @@ def getGlobalStats(request ):
 	alist = ["bar","foo"]

 	if request.method == "POST":
-		N = 100
+		N = 1000
 		query = request.POST["query"]
 		print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" query =", query )
 		print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" N =", N )
@@ -72,9 +81,34 @@ def getGlobalStatsISTEXT(request ):
 def doTheQuery(request , project_id):
 	alist = ["hola","mundo"]

-	if request.method == "POST":
+	# SQLAlchemy session
+	session = Session()
+
+	# do we have a valid project id?
+	try:
+		project_id = int(project_id)
+	except ValueError:
+		raise Http404()
+
+	# do we have a valid project?
+	project = (session
+		.query(Node)
+		.filter(Node.id == project_id)
+		.filter(Node.type_id == cache.NodeType['Project'].id)
+	).first()
+
+	if project is None:
+		raise Http404()

-		
+	# do we have a valid user?
+	user = request.user
+	if not user.is_authenticated():
+		return redirect('/login/?next=%s' % request.path)
+	if project.user_id != user.id:
+		return HttpResponseForbidden()
+
+
+	if request.method == "POST":
 		query = request.POST["query"]
 		name = request.POST["string"]

@@ -86,30 +120,26 @@ def doTheQuery(request , project_id):
 			urlreqs.append( instancia.medlineEfetchRAW( yearquery ) )
 		alist = ["tudo fixe" , "tudo bem"]

-		"""
-		urlreqs: List of urls to query.
-		- Then, to each url in urlreqs you do:
-			eFetchResult = urlopen(url)
-			eFetchResult.read()  # this will output the XML... normally you write this to a XML-file.
-		"""
-
-		thefile = "how we do this here?"
-		resource_type = ResourceType.objects.get(name="pubmed" )
-
-		parent      = Node.objects.get(id=project_id)
-		node_type   = NodeType.objects.get(name='Corpus')
-		type_id = NodeType.objects.get(name='Document').id
-		user_id = User.objects.get( username=request.user ).id
-
+		resourcetype = cache.ResourceType["pubmed"]

+		# corpus node instanciation as a Django model
 		corpus = Node(
-			user=request.user,
-			parent=parent,
-			type=node_type,
-			name=name,
+			name = name,
+			user_id = request.user.id,
+			parent_id = project_id,
+			type_id = cache.NodeType['Corpus'].id,
+			language_id = None,
 		)
+		session.add(corpus)
+		session.commit()
+
+		# """
+		# urlreqs: List of urls to query.
+		# - Then, to each url in urlreqs you do:
+		# 	eFetchResult = urlopen(url)
+		# 	eFetchResult.read()  # this will output the XML... normally you write this to a XML-file.
+		# """

-		corpus.save()

 		tasks = MedlineFetcher()
 		for i in range(8):
@@ -124,24 +154,30 @@ def doTheQuery(request , project_id):
 		dwnldsOK = 0
 		for filename in tasks.firstResults:
 			if filename!=False:
-				corpus.add_resource( user=request.user, type=resource_type, file=filename )
+				# add the uploaded resource to the corpus
+				add_resource(corpus,
+					user_id = request.user.id,
+					type_id = resourcetype.id,
+					file = filename,
+				)
 				dwnldsOK+=1
 			
 		if dwnldsOK == 0: return JsonHttpResponse(["fail"])

-		# do the WorkFlow
 		try:
-			if DEBUG is True:
-				corpus.workflow()
-				# corpus.workflow__MOV()
+			def apply_workflow(corpus):
+				parse_resources(corpus)
+				extract_ngrams(corpus, ['title'])
+				compute_tfidf(corpus)
+			if DEBUG:
+				apply_workflow(corpus)
 			else:
-				corpus.workflow.apply_async((), countdown=3)
-
-			return JsonHttpResponse(["workflow","finished"])
+				thread = threading.Thread(target=apply_workflow, args=(corpus, ), daemon=True)
+				thread.start()
 		except Exception as error:
+			print('WORKFLOW ERROR')
 			print(error)
-
-		return JsonHttpResponse(["workflow","finished","outside the try-except"])
+		return HttpResponseRedirect('/project/' + str(project_id))

 	data = alist
 	return JsonHttpResponse(data)
@@ -164,59 +200,59 @@ def testISTEX(request , project_id):
 		print(query_string , query , N)


-		urlreqs = []
-		pagesize = 50
-		tasks = MedlineFetcher()
-		chunks = list(tasks.chunks(range(N), pagesize))
-		for k in chunks:
-			if (k[0]+pagesize)>N: pagesize = N-k[0]
-			urlreqs.append("http://api.istex.fr/document/?q="+query_string+"&output=*&"+"from="+str(k[0])+"&size="+str(pagesize))
-		print(urlreqs)
-
-		urlreqs = ["http://localhost/374255" , "http://localhost/374278" ]
-		print(urlreqs)
-
-		resource_type = ResourceType.objects.get(name="istext" )
-
-		parent      = Node.objects.get(id=project_id)
-		node_type   = NodeType.objects.get(name='Corpus')
-		type_id = NodeType.objects.get(name='Document').id
-		user_id = User.objects.get( username=request.user ).id
-
-		corpus = Node(
-			user=request.user,
-			parent=parent,
-			type=node_type,
-			name=query,
-		)
-
-		corpus.save()
-
-		# configuring your queue with the event
-		for i in range(8):
-			t = threading.Thread(target=tasks.worker2) #thing to do
-			t.daemon = True  # thread dies when main thread (only non-daemon thread) exits.
-			t.start()
-		for url in urlreqs:
-			filename = MEDIA_ROOT + '/corpora/%s/%s' % (request.user, str(datetime.now().microsecond))
-			tasks.q.put( [url , filename]) #put a task in th queue
-		tasks.q.join() # wait until everything is finished
-		for filename in tasks.firstResults:
-			corpus.add_resource( user=request.user, type=resource_type, file=filename )
-
-
-		corpus.save()
-		print("DEBUG:",DEBUG)
-		# do the WorkFlow
-		try:
-			if DEBUG is True:
-				corpus.workflow()
-			else:
-				corpus.workflow.apply_async((), countdown=3)
-
-			return JsonHttpResponse(["workflow","finished"])
-		except Exception as error:
-			print(error)
+		# urlreqs = []
+		# pagesize = 50
+		# tasks = MedlineFetcher()
+		# chunks = list(tasks.chunks(range(N), pagesize))
+		# for k in chunks:
+		# 	if (k[0]+pagesize)>N: pagesize = N-k[0]
+		# 	urlreqs.append("http://api.istex.fr/document/?q="+query_string+"&output=*&"+"from="+str(k[0])+"&size="+str(pagesize))
+		# print(urlreqs)
+
+		# urlreqs = ["http://localhost/374255" , "http://localhost/374278" ]
+		# print(urlreqs)
+
+		# resource_type = ResourceType.objects.get(name="istext" )
+
+		# parent      = Node.objects.get(id=project_id)
+		# node_type   = NodeType.objects.get(name='Corpus')
+		# type_id = NodeType.objects.get(name='Document').id
+		# user_id = User.objects.get( username=request.user ).id
+
+		# corpus = Node(
+		# 	user=request.user,
+		# 	parent=parent,
+		# 	type=node_type,
+		# 	name=query,
+		# )
+
+		# corpus.save()
+
+		# # configuring your queue with the event
+		# for i in range(8):
+		# 	t = threading.Thread(target=tasks.worker2) #thing to do
+		# 	t.daemon = True  # thread dies when main thread (only non-daemon thread) exits.
+		# 	t.start()
+		# for url in urlreqs:
+		# 	filename = MEDIA_ROOT + '/corpora/%s/%s' % (request.user, str(datetime.now().microsecond))
+		# 	tasks.q.put( [url , filename]) #put a task in th queue
+		# tasks.q.join() # wait until everything is finished
+		# for filename in tasks.firstResults:
+		# 	corpus.add_resource( user=request.user, type=resource_type, file=filename )
+
+
+		# corpus.save()
+		# print("DEBUG:",DEBUG)
+		# # do the WorkFlow
+		# try:
+		# 	if DEBUG is True:
+		# 		corpus.workflow()
+		# 	else:
+		# 		corpus.workflow.apply_async((), countdown=3)
+
+		# 	return JsonHttpResponse(["workflow","finished"])
+		# except Exception as error:
+		# 	print(error)

 	data = [query_string,query,N]
 	return JsonHttpResponse(data)

--- a/static/docs/gargantua_book/Source.txt
+++ b/static/docs/gargantua_book/Source.txt
+Project Gutenberg's Gargantua and Pantagruel, Complete., by Francois Rabelais
+
+This eBook is for the use of anyone anywhere at no cost and with
+almost no restrictions whatsoever.  You may copy it, give it away or
+re-use it under the terms of the Project Gutenberg License included
+with this eBook or online at www.gutenberg.net
+
+
+Title: Gargantua and Pantagruel, Complete.
+       Five Books Of The Lives, Heroic Deeds And Sayings Of Gargantua And
+       His Son Pantagruel
+       
+
+Author: Francois Rabelais
+
+Release Date: August 8, 2004 [EBook #1200]
+
+Language: English
+
+
+*** START OF THIS PROJECT GUTENBERG EBOOK GARGANTUA AND PANTAGRUEL, ***
+
+
+
+
+Produced by Sue Asscher and David Widger
+
+
+
+
+
+MASTER FRANCIS RABELAIS
+
+
+FIVE BOOKS OF THE LIVES, HEROIC DEEDS AND SAYINGS OF
+
+GARGANTUA AND HIS SON PANTAGRUEL
+
+
+
+Translated into English by
+
+Sir Thomas Urquhart of Cromarty
+
+and
+
+Peter Antony Motteux
+
+
+
+
+The text of the first Two Books of Rabelais has been reprinted from the
+first edition (1653) of Urquhart's translation.  Footnotes initialled 'M.'
+are drawn from the Maitland Club edition (1838); other footnotes are by the
+translator.  Urquhart's translation of Book III. appeared posthumously in
+1693, with a new edition of Books I. and II., under Motteux's editorship.
+Motteux's rendering of Books IV. and V. followed in 1708.  Occasionally (as
+the footnotes indicate) passages omitted by Motteux have been restored from
+the 1738 copy edited by Ozell.
+
+
+
--- a/static/docs/gargantua_book/gargantua_chapter_1.txt
+++ b/static/docs/gargantua_book/gargantua_chapter_1.txt
+Chapter 1.I. Of the Genealogy and Antiquity of Gargantua.
+Chapter 1.II. The Antidoted Fanfreluches:  or, a Galimatia of extravagant Conceits found in an ancient Monument.
+Chapter 1.III. How Gargantua was carried eleven months in his mother's belly.
+Chapter 1.IV. How Gargamelle, being great with Gargantua, did eat a huge deal of tripes.
+Chapter 1.IX. The colours and liveries of Gargantua.
+Chapter 1.L. Gargantua's speech to the vanquished.
+Chapter 1.LI. How the victorious Gargantuists were recompensed after the battle.
+Chapter 1.LII. How Gargantua caused to be built for the Monk the Abbey of Theleme.
+Chapter 1.LIII. How the abbey of the Thelemites was built and endowed.
+Chapter 1.LIV. The inscription set upon the great gate of Theleme.
+Chapter 1.LV. What manner of dwelling the Thelemites had.
+Chapter 1.LVI. How the men and women of the religious order of Theleme were apparelled.
+Chapter 1.LVII. How the Thelemites were governed, and of their manner of living.
+Chapter 1.LVIII. A prophetical Riddle.
+Chapter 1.V. The Discourse of the Drinkers.
+Chapter 1.VI. How Gargantua was born in a strange manner.
+Chapter 1.VII. After what manner Gargantua had his name given him, and how he tippled, bibbed, and curried the can.
+Chapter 1.VIII. How they apparelled Gargantua.
+Chapter 1.X. Of that which is signified by the colours white and blue.
+Chapter 1.XI. Of the youthful age of Gargantua.
+Chapter 1.XII. Of Gargantua's wooden horses.
+Chapter 1.XIII. How Gargantua's wonderful understanding became known to his father Grangousier, by the invention of a torchecul or wipebreech.
+Chapter 1.XIV. How Gargantua was taught Latin by a Sophister.
+Chapter 1.XIX. The oration of Master Janotus de Bragmardo for recovery of the bells.
+Chapter 1.XL. Why monks are the outcasts of the world; and wherefore some have bigger noses than others.
+Chapter 1.XLI. How the Monk made Gargantua sleep, and of his hours and breviaries.
+Chapter 1.XLII. How the Monk encouraged his fellow-champions, and how he hanged upon a tree.
+Chapter 1.XLIII. How the scouts and fore-party of Picrochole were met with by Gargantua, and how the Monk slew Captain Drawforth, and then was taken prisoner by his enemies.
+Chapter 1.XLIV. How the Monk rid himself of his keepers, and how Picrochole's forlorn hope was defeated.
+Chapter 1.XLIX. How Picrochole in his flight fell into great misfortunes, and what Gargantua did after the battle.
+Chapter 1.XLV. How the Monk carried along with him the Pilgrims, and of the good words that Grangousier gave them.
+Chapter 1.XLVI. How Grangousier did very kindly entertain Touchfaucet his prisoner.
+Chapter 1.XLVII. How Grangousier sent for his legions, and how Touchfaucet slew Rashcalf, and was afterwards executed by the command of Picrochole.
+Chapter 1.XLVIII. How Gargantua set upon Picrochole within the rock Clermond, and utterly defeated the army of the said Picrochole.
+Chapter 1.XV. How Gargantua was put under other schoolmasters.
+Chapter 1.XVI. How Gargantua was sent to Paris, and of the huge great mare that he rode on; how she destroyed the oxflies of the Beauce.
+Chapter 1.XVII. How Gargantua paid his welcome to the Parisians, and how he took away the great bells of Our Lady's Church.
+Chapter 1.XVIII. How Janotus de Bragmardo was sent to Gargantua to recover the great bells.
+Chapter 1.XX. How the Sophister carried away his cloth, and how he had a suit in law against the other masters.
+Chapter 1.XXI. The study of Gargantua, according to the discipline of his schoolmasters the Sophisters.
+Chapter 1.XXII. The games of Gargantua.
+Chapter 1.XXIII. How Gargantua was instructed by Ponocrates, and in such sort disciplinated, that he lost not one hour of the day.
+Chapter 1.XXIV. How Gargantua spent his time in rainy weather.
+Chapter 1.XXIX. The tenour of the letter which Grangousier wrote to his son Gargantua.
+Chapter 1.XXV. How there was great strife and debate raised betwixt the cake-bakers of Lerne, and those of Gargantua's country, whereupon were waged great wars.
+Chapter 1.XXVI. How the inhabitants of Lerne, by the commandment of Picrochole their king, assaulted the shepherds of Gargantua unexpectedly and on a sudden.
+Chapter 1.XXVII. How a monk of Seville saved the close of the abbey from being ransacked by the enemy.
+Chapter 1.XXVIII. How Picrochole stormed and took by assault the rock Clermond, and of Grangousier's unwillingness and aversion from the undertaking of war.
+Chapter 1.XXX. How Ulric Gallet was sent unto Picrochole.
+Chapter 1.XXXI. The speech made by Gallet to Picrochole.
+Chapter 1.XXXII. How Grangousier, to buy peace, caused the cakes to be restored.
+Chapter 1.XXXIII. How some statesmen of Picrochole, by hairbrained counsel, put him in extreme danger.
+Chapter 1.XXXIV. How Gargantua left the city of Paris to succour his country, and how Gymnast encountered with the enemy.
+Chapter 1.XXXIX. How the Monk was feasted by Gargantua, and of the jovial discourse they had at supper.
+Chapter 1.XXXV. How Gymnast very souply and cunningly killed Captain Tripet and others of Picrochole's men.
+Chapter 1.XXXVI. How Gargantua demolished the castle at the ford of Vede, and how they passed the ford.
+Chapter 1.XXXVII. How Gargantua, in combing his head, made the great cannon-balls fall out of his hair.
+Chapter 1.XXXVIII. How Gargantua did eat up six pilgrims in a salad.
--- a/static/docs/gargantua_book/gargantua_chapter_2.txt
+++ b/static/docs/gargantua_book/gargantua_chapter_2.txt
+Chapter 2.I. Of the original and antiquity of the great Pantagruel.
+Chapter 2.II. Of the nativity of the most dread and redoubted Pantagruel.
+Chapter 2.III. Of the grief wherewith Gargantua was moved at the decease of his wife Badebec.
+Chapter 2.IV. Of the infancy of Pantagruel.
+Chapter 2.IX. How Pantagruel found Panurge, whom he loved all his lifetime.
+Chapter 2.V. Of the acts of the noble Pantagruel in his youthful age.
+Chapter 2.VI. How Pantagruel met with a Limousin, who too affectedly did counterfeit the French language.
+Chapter 2.VII. How Pantagruel came to Paris, and of the choice books of the Library of St. Victor.
+Chapter 2.VIII. How Pantagruel, being at Paris, received letters from his father Gargantua, and the copy of them.
+Chapter 2.X. How Pantagruel judged so equitably of a controversy, which was wonderfully obscure and difficult, that, by reason of his just decree therein, he was reputed to have a most admirable judgment.
+Chapter 2.XI. How the Lords of Kissbreech and Suckfist did plead before Pantagruel without an attorney.
+Chapter 2.XII. How the Lord of Suckfist pleaded before Pantagruel.
+Chapter 2.XIII. How Pantagruel gave judgment upon the difference of the two lords.
+Chapter 2.XIV. How Panurge related the manner how he escaped out of the hands of the Turks.
+Chapter 2.XIX. How Panurge put to a nonplus the Englishman that argued by signs.
+Chapter 2.XV. How Panurge showed a very new way to build the walls of Paris.
+Chapter 2.XVI. Of the qualities and conditions of Panurge.
+Chapter 2.XVII. How Panurge gained the pardons, and married the old women, and of the suit in law which he had at Paris.
+Chapter 2.XVIII. How a great scholar of England would have argued against Pantagruel, and was overcome by Panurge.
+Chapter 2.XX. How Thaumast relateth the virtues and knowledge of Panurge.
+Chapter 2.XXI. How Panurge was in love with a lady of Paris.
+Chapter 2.XXII. How Panurge served a Parisian lady a trick that pleased her not very well.
+Chapter 2.XXIII. How Pantagruel departed from Paris, hearing news that the Dipsodes had invaded the land of the Amaurots; and the cause wherefore the leagues are so short in France.
+Chapter 2.XXIV. A letter which a messenger brought to Pantagruel from a lady of Paris, together with the exposition of a posy written in a gold ring.
+Chapter 2.XXIX. How Pantagruel discomfited the three hundred giants armed.
+Chapter 2.XXV. How Panurge, Carpalin, Eusthenes, and Epistemon, the gentlemen attendants of Pantagruel, vanquished and discomfited six hundred and threescore horsemen very cunningly.
+Chapter 2.XXVI. How Pantagruel and his company were weary in eating still salt meats; and how Carpalin went a-hunting to have some venison.
+Chapter 2.XXVII. How Pantagruel set up one trophy in memorial of their valour, and Panurge another in remembrance of the hares.  How Pantagruel likewise with his farts begat little men, and with his fisgs little women; and how Panurge broke a great staff over two glasses.
+Chapter 2.XXVIII. How Pantagruel got the victory very strangely over the Dipsodes and the Giants.
+Chapter 2.XXX. How Epistemon, who had his head cut off, was finely healed by Panurge, and of the news which he brought from the devils, and of the damned people in hell.
+Chapter 2.XXXI. How Pantagruel entered into the city of the Amaurots, and how Panurge married King Anarchus to an old lantern-carrying hag, and made him a crier of green sauce.
+Chapter 2.XXXII. How Pantagruel with his tongue covered a whole army, and what the author saw in his mouth.
+Chapter 2.XXXIII. How Pantagruel became sick, and the manner how he was recovered.
+Chapter 2.XXXIV. The conclusion of this present book, and the excuse of the author.
--- a/static/docs/gargantua_book/gargantua_chapter_3.txt
+++ b/static/docs/gargantua_book/gargantua_chapter_3.txt
+Chapter 3.I. How Pantagruel transported a colony of Utopians into Dipsody.
+Chapter 3.II. How Panurge was made Laird of Salmigondin in Dipsody, and did waste his revenue before it came in.
+Chapter 3.III. How Panurge praiseth the debtors and borrowers.
+Chapter 3.IV. Panurge continueth his discourse in the praise of borrowers and lenders.
+Chapter 3.IX. How Panurge asketh counsel of Pantagruel whether he should marry, yea, or no.
+Chapter 3.L. How the famous Pantagruelion ought to be prepared and wrought.
+Chapter 3.LI. Why it is called Pantagruelion, and of the admirable virtues.
+Chapter 3.LII. How a certain kind of Pantagruelion is of that nature that the fire is not able to consume it.
+Chapter 3.V. How Pantagruel altogether abhorreth the debtors and borrowers.
+Chapter 3.VI. Why new married men were privileged from going to the wars.
+Chapter 3.VII. How Panurge had a flea in his ear, and forbore to wear any longer his magnificent codpiece.
+Chapter 3.VIII. Why the codpiece is held to be the chief piece of armour amongst warriors.
+Chapter 3.X. How Pantagruel representeth unto Panurge the difficulty of giving advice in the matter of marriage; and to that purpose mentioneth somewhat of the Homeric and Virgilian lotteries.
+Chapter 3.XI. How Pantagruel showeth the trial of one's fortune by the throwing of dice to be unlawful.
+Chapter 3.XII. How Pantagruel doth explore by the Virgilian lottery what fortune Panurge shall have in his marriage.
+Chapter 3.XIII. How Pantagruel adviseth Panurge to try the future good or bad luck of his marriage by dreams.
+Chapter 3.XIV. Panurge's dream, with the interpretation thereof.
+Chapter 3.XIX. How Pantagruel praiseth the counsel of dumb men.
+Chapter 3.XL. How Bridlegoose giveth reasons why he looked upon those law- actions which he decided by the chance of the dice.
+Chapter 3.XLI. How Bridlegoose relateth the history of the reconcilers of parties at variance in matters of law.
+Chapter 3.XLII. How suits at law are bred at first, and how they come afterwards to their perfect growth.
+Chapter 3.XLIII. How Pantagruel excuseth Bridlegoose in the matter of sentencing actions at law by the chance of the dice.
+Chapter 3.XLIV. How Pantagruel relateth a strange history of the perplexity of human judgment.
+Chapter 3.XLIX. How Pantagruel did put himself in a readiness to go to sea; and of the herb named Pantagruelion.
+Chapter 3.XLV. How Panurge taketh advice of Triboulet.
+Chapter 3.XLVI. How Pantagruel and Panurge diversely interpret the words of Triboulet.
+Chapter 3.XLVII. How Pantagruel and Panurge resolved to make a visit to the Oracle of the Holy Bottle.
+Chapter 3.XLVIII. How Gargantua showeth that the children ought not to marry without the special knowledge and advice of their fathers and mothers.
+Chapter 3.XV. Panurge's excuse and exposition of the monastic mystery concerning powdered beef.
+Chapter 3.XVI. How Pantagruel adviseth Panurge to consult with the Sibyl of Panzoust.
+Chapter 3.XVII. How Panurge spoke to the Sibyl of Panzoust.
+Chapter 3.XVIII. How Pantagruel and Panurge did diversely expound the verses of the Sibyl of Panzoust.
+Chapter 3.XX. How Goatsnose by signs maketh answer to Panurge.
+Chapter 3.XXI. How Panurge consulteth with an old French poet, named Raminagrobis.
+Chapter 3.XXII. How Panurge patrocinates and defendeth the Order of the Begging Friars.
+Chapter 3.XXIII. How Panurge maketh the motion of a return to Raminagrobis.
+Chapter 3.XXIV. How Panurge consulteth with Epistemon.
+Chapter 3.XXIX. How Pantagruel convocated together a theologian, physician, lawyer, and philosopher, for extricating Panurge out of the perplexity wherein he was.
+Chapter 3.XXV. How Panurge consulteth with Herr Trippa.
+Chapter 3.XXVI. How Panurge consulteth with Friar John of the Funnels.
+Chapter 3.XXVII. How Friar John merrily and sportingly counselleth Panurge.
+Chapter 3.XXVIII. How Friar John comforteth Panurge in the doubtful matter of cuckoldry.
+Chapter 3.XXX. How the theologue, Hippothadee, giveth counsel to Panurge in the matter and business of his nuptial enterprise.
+Chapter 3.XXXI. How the physician Rondibilis counselleth Panurge.
+Chapter 3.XXXII. How Rondibilis declareth cuckoldry to be naturally one of the appendances of marriage.
+Chapter 3.XXXIII. Rondibilis the physician's cure of cuckoldry.
+Chapter 3.XXXIV. How women ordinarily have the greatest longing after things prohibited.
+Chapter 3.XXXIX. How Pantagruel was present at the trial of Judge Bridlegoose, who decided causes and controversies in law by the chance and fortune of the dice.
+Chapter 3.XXXV. How the philosopher Trouillogan handleth the difficulty of marriage.
+Chapter 3.XXXVI. A continuation of the answer of the Ephectic and Pyrrhonian philosopher Trouillogan.
+Chapter 3.XXXVII. How Pantagruel persuaded Panurge to take counsel of a fool.
+Chapter 3.XXXVIII. How Triboulet is set forth and blazed by Pantagruel and Panurge.
--- a/static/docs/gargantua_book/gargantua_chapter_4.txt
+++ b/static/docs/gargantua_book/gargantua_chapter_4.txt
+Chapter 4.I. How Pantagruel went to sea to visit the oracle of Bacbuc, alias the Holy Bottle.
+Chapter 4.II. How Pantagruel bought many rarities in the island of Medamothy.
+Chapter 4.III. How Pantagruel received a letter from his father Gargantua, and of the strange way to have speedy news from far distant places.
+Chapter 4.IV. How Pantagruel writ to his father Gargantua, and sent him several curiosities.
+Chapter 4.IX. How Pantagruel arrived at the island of Ennasin, and of the strange ways of being akin in that country.
+Chapter 4.L. How Homenas showed us the archetype, or representation of a pope.
+Chapter 4.LI. Table-talk in praise of the decretals.
+Chapter 4.LII. A continuation of the miracles caused by the decretals.
+Chapter 4.LIII. How, by the virtue of the decretals, gold is subtilely drawn out of France to Rome.
+Chapter 4.LIV. How Homenas gave Pantagruel some bon-Christian pears.
+Chapter 4.LIX. Of the ridiculous statue Manduce; and how and what the Gastrolaters sacrifice to their ventripotent god.
+Chapter 4.LV. How Pantagruel, being at sea, heard various unfrozen words.
+Chapter 4.LVI. How among the frozen words Pantagruel found some odd ones.
+Chapter 4.LVII. How Pantagruel went ashore at the dwelling of Gaster, the first master of arts in the world.
+Chapter 4.LVIII. How, at the court of the master of ingenuity, Pantagruel detested the Engastrimythes and the Gastrolaters.
+Chapter 4.LX. What the Gastrolaters sacrificed to their god on interlarded fish-days.
+Chapter 4.LXI. How Gaster invented means to get and preserve corn.
+Chapter 4.LXII. How Gaster invented an art to avoid being hurt or touched by cannon-balls.
+Chapter 4.LXIII. How Pantagruel fell asleep near the island of Chaneph, and of the problems proposed to be solved when he waked.
+Chapter 4.LXIV. How Pantagruel gave no answer to the problems.
+Chapter 4.LXV. How Pantagruel passed the time with his servants.
+Chapter 4.LXVI. How, by Pantagruel's order, the Muses were saluted near the isle of Ganabim.
+Chapter 4.LXVII. How Panurge berayed himself for fear; and of the huge cat Rodilardus, which he took for a puny devil.
+Chapter 4.V. How Pantagruel met a ship with passengers returning from Lantern-land.
+Chapter 4.VI. How, the fray being over, Panurge cheapened one of Dingdong's sheep.
+Chapter 4.VII. Which if you read you'll find how Panurge bargained with Dingdong.
+Chapter 4.VIII. How Panurge caused Dingdong and his sheep to be drowned in the sea.
+Chapter 4.X. How Pantagruel went ashore at the island of Chely, where he saw King St. Panigon.
+Chapter 4.XI. Why monks love to be in kitchens.
+Chapter 4.XII. How Pantagruel passed by the land of Pettifogging, and of the strange way of living among the Catchpoles.
+Chapter 4.XIII. How, like Master Francis Villon, the Lord of Basche commended his servants.
+Chapter 4.XIV. A further account of catchpoles who were drubbed at Basche's house.
+Chapter 4.XIX. What countenances Panurge and Friar John kept during the.
+Chapter 4.XL. How Friar John fitted up the sow; and of the valiant cooks that went into it.
+Chapter 4.XLI. How Pantagruel broke the Chitterlings at the knees.
+Chapter 4.XLII. How Pantagruel held a treaty with Niphleseth, Queen of the Chitterlings.
+Chapter 4.XLIII. How Pantagruel went into the island of Ruach.
+Chapter 4.XLIV. How small rain lays a high wind.
+Chapter 4.XLIX. How Homenas, Bishop of Papimany, showed us the Uranopet decretals .
+Chapter 4.XLV. How Pantagruel went ashore in the island of Pope-Figland.
+Chapter 4.XLVI. How a junior devil was fooled by a husbandman of Pope- Figland.
+Chapter 4.XLVII. How the devil was deceived by an old woman of Pope- Figland.
+Chapter 4.XLVIII. How Pantagruel went ashore at the island of Papimany.
+Chapter 4.XV. How the ancient custom at nuptials is renewed by the catchpole.
+Chapter 4.XVI. How Friar John made trial of the nature of the catchpoles.
+Chapter 4.XVII. How Pantagruel came to the islands of Tohu and Bohu; and of the strange death of Wide-nostrils, the swallower of windmills.
+Chapter 4.XVIII. How Pantagruel met with a great storm at sea.
+Chapter 4.XX. How the pilots were forsaking their ships in the greatest stress of weather.
+Chapter 4.XXI. A continuation of the storm, with a short discourse on the subject of making testaments at sea.
+Chapter 4.XXII. An end of the storm.
+Chapter 4.XXIII. How Panurge played the good fellow when the storm was over.
+Chapter 4.XXIV. How Panurge was said to have been afraid without reason during the storm.
+Chapter 4.XXIX. How Pantagruel sailed by the Sneaking Island, where Shrovetide reigned.
+Chapter 4.XXV. How, after the storm, Pantagruel went on shore in the islands of the Macreons.
+Chapter 4.XXVI. How the good Macrobius gave us an account of the mansion and decease of the heroes.
+Chapter 4.XXVII. Pantagruel's discourse of the decease of heroic souls; and of the dreadful prodigies that happened before the death of the late Lord de Langey.
+Chapter 4.XXVIII. How Pantagruel related a very sad story of the death of the heroes.
+Chapter 4.XXX. How Shrovetide is anatomized and described by Xenomanes.
+Chapter 4.XXXI. Shrovetide's outward parts anatomized.
+Chapter 4.XXXII. A continuation of Shrovetide's countenance.
+Chapter 4.XXXIII. How Pantagruel discovered a monstrous physeter, or whirlpool, near the Wild Island.
+Chapter 4.XXXIV. How the monstrous physeter was slain by Pantagruel.
+Chapter 4.XXXIX. How Friar John joined with the cooks to fight the Chitterlings.
+Chapter 4.XXXV. How Pantagruel went on shore in the Wild Island, the ancient abode of the Chitterlings.
+Chapter 4.XXXVI. How the wild Chitterlings laid an ambuscado for Pantagruel.
+Chapter 4.XXXVII. How Pantagruel sent for Colonel Maul-chitterling and Colonel Cut-pudding; with a discourse well worth your hearing about the names of places and persons.
+Chapter 4.XXXVIII. How Chitterlings are not to be slighted by men.
--- a/static/docs/gargantua_book/gargantua_chapter_5.txt
+++ b/static/docs/gargantua_book/gargantua_chapter_5.txt
+Chapter 5.I. How Pantagruel arrived at the Ringing Island, and of the noise that we heard.
+Chapter 5.II. How the Ringing Island had been inhabited by the Siticines, who were become birds.
+Chapter 5.III. How there is but one pope-hawk in the Ringing Island.
+Chapter 5.IV. How the birds of the Ringing Island were all passengers.
+Chapter 5.IX. How we arrived at the island of Tools.
+Chapter 5.V. Of the dumb Knight-hawks of the Ringing Island.
+Chapter 5.VI. How the birds are crammed in the Ringing Island.
+Chapter 5.VII. How Panurge related to Master Aedituus the fable of the horse and the ass.
+Chapter 5.VIII. How with much ado we got a sight of the pope-hawk.
+Chapter 5.X. How Pantagruel arrived at the island of Sharping.
+Chapter 5.XI. How we passed through the wicket inhabited by Gripe-men-all, Archduke of the Furred Law-cats.
+Chapter 5.XII. How Gripe-men-all propounded a riddle to us.
+Chapter 5.XIII. How Panurge solved Gripe-men-all's riddle.
+Chapter 5.XIV. How the Furred Law-cats live on corruption.
+Chapter 5.XIX. How we arrived at the queendom of Whims or Entelechy.
+Chapter 5.XL. How the battle in which the good Bacchus overthrew the Indians was represented in mosaic work.
+Chapter 5.XLI. How the temple was illuminated with a wonderful lamp.
+Chapter 5.XLII. How the Priestess Bacbuc showed us a fantastic fountain in the temple, and how the fountain-water had the taste of wine, according to the imagination of those who drank of it.
+Chapter 5.XLIII. How the Priestess Bacbuc equipped Panurge in order to have the word of the Bottle.
+Chapter 5.XLIV. How Bacbuc, the high-priestess, brought Panurge before the Holy Bottle.
+Chapter 5.XLV. How Bacbuc explained the word of the Goddess-Bottle.
+Chapter 5.XLVI. How Panurge and the rest rhymed with poetic fury.
+Chapter 5.XLVII. How we took our leave of Bacbuc, and left the Oracle of the Holy Bottle.
+Chapter 5.XV. How Friar John talks of rooting out the Furred Law-cats.
+Chapter 5.XVI. How Pantagruel came to the island of the Apedefers, or Ignoramuses, with long claws and crooked paws, and of terrible adventures and monsters there.
+Chapter 5.XVII. How we went forwards, and how Panurge had like to have been killed.
+Chapter 5.XVIII. How our ships were stranded, and we were relieved by some people that were subject to Queen Whims (qui tenoient de la Quinte).
+Chapter 5.XX. How the Quintessence cured the sick with a song.
+Chapter 5.XXI. How the Queen passed her time after dinner.
+Chapter 5.XXII. How Queen Whims' officers were employed; and how the said lady retained us among her abstractors.
+Chapter 5.XXIII. How the Queen was served at dinner, and of her way of eating.
+Chapter 5.XXIV. How there was a ball in the manner of a tournament, at which Queen Whims was present.
+Chapter 5.XXIX. How Epistemon disliked the institution of Lent.
+Chapter 5.XXV. How the thirty-two persons at the ball fought.
+Chapter 5.XXVI. How we came to the island of Odes, where the ways go up and down.
+Chapter 5.XXVII. How we came to the island of Sandals; and of the order of Semiquaver Friars.
+Chapter 5.XXVIII. How Panurge asked a Semiquaver Friar many questions, and was only answered in monosyllables.
+Chapter 5.XXX. How we came to the land of Satin.
+Chapter 5.XXXI. How in the land of Satin we saw Hearsay, who kept a school of vouching.
+Chapter 5.XXXII. How we came in sight of Lantern-land.
+Chapter 5.XXXIII. How we landed at the port of the Lychnobii, and came to Lantern-land.
+Chapter 5.XXXIV. How we arrived at the Oracle of the Bottle.
+Chapter 5.XXXIX. How we saw Bacchus's army drawn up in battalia in mosaic work.
+Chapter 5.XXXV. How we went underground to come to the Temple of the Holy Bottle, and how Chinon is the oldest city in the world.
+Chapter 5.XXXVI. How we went down the tetradic steps, and of Panurge's fear.
+Chapter 5.XXXVII. How the temple gates in a wonderful manner opened of themselves.
+Chapter 5.XXXVIII. Of the temple's admirable pavement.
--- a/static/img/Gargantextuel-212x300.jpg
+++ b/static/img/Gargantextuel-212x300.jpg
--- a/templates/corpus.html
+++ b/templates/corpus.html
@@ -42,12 +42,12 @@
 							</p>
 							{% endif %}

-							<!--						<a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.pk }}/">Add file</a> -->
-						<a class="btn btn-primary btn-lg" role="button" href="/project/{{project.pk}}/corpus/{{ corpus.pk }}/corpus.csv">Save as</a>
-						<a class="btn btn-primary btn-lg" role="button" href="/project/{{project.pk}}/corpus/{{ corpus.pk }}/delete">Delete</a></p>
+							<!--						<a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.id }}/">Add file</a> -->
+						<a class="btn btn-primary btn-lg" role="button" href="/project/{{project.id}}/corpus/{{ corpus.id }}/corpus.csv">Save as</a>
+						<a class="btn btn-primary btn-lg" role="button" href="/delete/{{ corpus.id }}">Delete</a></p>

 						{% if number == 0 %}
-						<a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.pk }}/">Add documents</a></p>
+						<a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.id }}/">Add documents</a></p>
 						{% endif %}
 	
 		</div>

--- a/templates/explorer.html
+++ b/templates/explorer.html
@@ -260,8 +260,29 @@

        </div>
        
-        
        <div id="topPapers"></div>
+        <!--
+        <div id="tab-container-top" class='tab-container'>
+
+           <ul class='etabs'>
+             <li id="tabmed" class='tab active'><a href="#tabs3">Medline Pubs</a></li>
+             <li id="tabgps" class='tab'><a href="#tabs4">+</a></li>
+           </ul>
+
+           <div class='panel-container'>
+              <div id="tabs3">
+                <div id="topPapers"></div>
+              </div>
+              <div id="tabs4">
+                <div id="topProposals"></div>
+              </div>
+           </div>
+        </div>
+        -->
+
+
+
+
        <div id="information"></div>
      </div>


--- a/templates/home.html
+++ b/templates/home.html
@@ -19,18 +19,16 @@
 						<div class="col-md-4 content">
 						<h1>Gargantext</h1>
 						<p>A web platform to explore text-mining</p>
-						<a class="btn btn-primary btn-lg" href="/projects">Test Gargantext</a>
+						<a class="btn btn-primary btn-lg" href="/projects" title="Click and test by yourself">Test Gargantext</a>
 						</div>
-						<div class="col-md-3 content">
-						</div>
-						<div class="col-md-5 content">
-								<!--
-								<h3>Project Manager:</h3> 
-								<h4><a href="http://alexandre.delanoe.org" target="blank">Alexandre Delanoë</a></h4>
-								<h3>Scientific board:</h3> 
-								<h4><a href="http://chavalarias.com" target="blank">David Chavalarias</a> and <a href="http://alexandre.delanoe.org" target="blank">Alexandre Delanoë</a></h4>
-						<h3><a href="/about/#collapseTeam" target="blank">Thanks to all the team</a></h3>
-						--!>
+						<div class="col-md-2 content"></div>
+						<div class="col-md-2 content"></div>
+						<div class="col-md-2 content">
+								<p class="right">
+										<div style="border:15px">
+												<img src="{% static "img/logo.png"%}" title="Logo designed by anoe" style="100px; height:150px; border:3px solid white">
+										</div>
+								</p>
 						</div>
 				</div>
        </div>
@@ -39,7 +37,7 @@
 				<div class="row">
 						<div class="content">
 						<center>
-						<img src="{% static "img/logo.png"%}" alt="Logo Gargantext" style="100px; height:150px">
+						<img src="{% static "img/Gargantextuel-212x300.jpg"%}" title="Gargantextuel drawn by Cecile Meadel" style="border:2px solid black">

 						<!--
 						<h2>Introduction Video</h2>
@@ -63,57 +61,23 @@
 <div class="row">

 <div class="col-md-4 content">
-				<h3><a href="#">Historic</a></h3>
-												<p>
-
-    Chapter 1.VI. -- How Gargantua was born in a strange manner.
-    Chapter 2.XXIII. -- How Pantagruel departed from Paris, hearing
-    news that the Dipsodes had invaded the land of the Amaurots; and
-    the cause wherefore the leagues are so short in France. Chapter
-    3.XLVI. -- How Pantagruel and Panurge diversely interpret the
-    words of Triboulet. Chapter 4.LV. -- How Pantagruel, being at sea,
-    heard various unfrozen words. Chapter 5.IX. -- How we arrived at
-    the island of Tools.
-
-												</p>
+				<h3><a href="#" title="Random sentences in Gargantua's Books chapters, historically true">Historic</a></h3>
+												<p> {{ paragraph_gargantua }}</p>
 										</div>

 <div class="col-md-4 content">
-<h3><a href="#">Presentation</a></h3>
-												<p>
-
-										Lorem ipsum dolor sit amet, consectetur adipiscing elit,
-				sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
-				Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris
-				nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in
-				reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla
-				pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-				culpa qui officia deserunt mollit anim id est laborum.
-
+<h3><a href="#" title="Randomized words, semantically and syntaxically falses." >Presentation</a></h3>
+												<p> {{ paragraph_lorem }}
 												</p>
 										</div>

  
 <div class="col-md-4 content">
-<h3><a href="#">Tutoreil</a></h3>
+<h3><a href="#" title="Randomized letters, true or false ?">Tutoreil</a></h3>
 												<p>
+                        {{ paragraph_tutoreil }}
 												<!-- Why not French ? -->
 												<!-- find Cambridge source which inspired this --!>
-
-        Il praaît que l'odrre des ltetres dnas un mot n'a pas
-        d'iprnorotncae. La pmeirère et la drenèire letrte diovent
-        êrte à la bnnoe pclae. Le rsete peut êrte dnas un dsérorde
-        ttoal et on puet tujoruos lrie snas poribême. On ne lit
-        donc pas chuaqe ltetre en elle-mmêe, mias le mot cmome un
-        tuot. Un chnagmnet de réfretniel et nuos tarnsposns ce
-        rselutat au txete lui-mmêe: l'odrre des mtos est faiblement
-        imoprtnat copmraé au cnotxete du txete qui, lui, est copmté:
-        comptexter avec Gargantext.
-
-												
-												
-												
-												
 												</p>
 										</div>
        

--- a/templates/menu.html
+++ b/templates/menu.html
@@ -17,16 +17,16 @@
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
          </button>
-					<a class="navbar-brand" style="line-height:15px; height:10px; padding: 10px 10px;" href="/"><img src="/img/logo.svg"></a>
+					<a class="navbar-brand" style="line-height:15px; height:10px; padding: 10px 10px;" href="/"><img src="/img/logo.svg" title="Back to home."></a>
        </div>
        
        <div class="navbar-collapse collapse">
          <ul class="nav navbar-nav">
 							<!-- <li><a href="/admin/">Admin/</a></li> --!>
-            <li><a href="/about/">About</a>
+            <li><a href="/about/" title="More informations about the project, its sponsors and its authors.">About</a>
 						</li>
 						{% if user.is_authenticated %}
-            <li><a href="/projects/">Projects</a></li>
+            <li><a href="/projects/" title="All your projects are here.">Projects</a></li>
 						{% endif %}
 						{% if project %}
 						<li><a href="/project/{{project.id}}">{{project.name}}</a></li>
@@ -40,14 +40,14 @@

 				<ul class="nav pull-right">
 						<li class="dropdown">
-								<a href="#" role="button" class="dropdown-toggle" data-toggle="dropdown"><i class="icon-user"></i> {{ user }}<i class="caret"></i>
+								<a href="#" role="button" class="dropdown-toggle" data-toggle="dropdown" title="That is your login"><i class="icon-user"></i> {{ user }}<i class="caret"></i>

 								</a>
 								<ul class="dropdown-menu">
-										<li><a tabindex="-1" href="http://www.iscpif.fr/tiki-index.php?page=gargantext_feedback" target="blank" >Report Feedback</a></li>
+										<li><a tabindex="-1" href="http://www.iscpif.fr/tiki-index.php?page=gargantext_feedback" title="Send us a message (bug, thanks, congrats...)">Report Feedback</a></li>
 										<li class="divider"></li>
 								{% if user.is_authenticated %}
-										<li><a tabindex="-1" href="/auth/logout">Logout</a></li>
+										<li><a tabindex="-1" href="/auth/logout" title="Click here to logout especially on public devices">Logout</a></li>
 										{% else %}
 										<li><a tabindex="-1" href="/auth/">Login</a></li>
 								{% endif %}
@@ -66,8 +66,8 @@
 <hr>

 <footer>
-		<p>Gargantext, version 1.0.6, <a href="http://www.cnrs.fr" target="blank">Copyrights CNRS {{ date.year }}</a>, 
-		<a href="http://www.gnu.org/licenses/agpl-3.0.html" target="blank">Licence aGPLV3</a>.</p>
+		<p>Gargantext, version 1.0.6, <a href="http://www.cnrs.fr" target="blank" title="Institution that enables this project.">Copyrights CNRS {{ date.year }}</a>, 
+		<a href="http://www.gnu.org/licenses/agpl-3.0.html" target="blank" title="Legal instructions of the project.">Licence aGPLV3</a>.</p>
 </footer>



--- a/templates/project.html
+++ b/templates/project.html
@@ -84,19 +84,16 @@
 												<ul>
 														{% for corpus in corpora %}
 														<li> {% ifnotequal corpus.count 0 %}
-																		<a href="/project/{{project.id}}/corpus/{{corpus.id}}"> 
-																			{{corpus.name}}
-																		</a>
-																		, {{ corpus.count }} Documents 
-																 {% else %}
-																 {{corpus.name}} : <img width="20px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img> Processing, drink a cup of tea, and refresh the page :)
-																 {% endifnotequal %}
+																		<a href="/project/{{project.id}}/corpus/{{corpus.id}}">  {{corpus.name}} </a> , {{ corpus.count }} Documents 
+															 {% else %}
+																 	{{corpus.name}} : <img width="20px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img> Processing, drink a cup of tea, and refresh the page :)
+															 {% endifnotequal %}
 																		<button type="button" class="btn btn-xs btn-default" data-container="body" data-toggle="popover" data-placement="bottom" 
 																		data-content='
 																		<ul>
 																		<li> Rename </li>
 																		<li> Add new documents </li>
-																		<li><a href="/project/{{ project.id }}/corpus/{{ corpus.id}}/delete">Delete</a></li>
+																		<li><a href="/delete/{{corpus.id}}">Delete</a></li>
 																		</ul>
 																		'>Manage</button>
 																</li>
@@ -330,7 +327,7 @@
 					console.log("enabling "+"#"+value.id)
 					$("#"+value.id).attr('onclick','getGlobalResults(this);');
 					// $("#submit_thing").prop('disabled' , false)
-					$("#submit_thing").html("Process a 100 sample!")
+					$("#submit_thing").html("Process a 1000 sample!")

 		            thequeries = data
 		            var N=0,k=0;
@@ -427,8 +424,8 @@
 		//CSS events for changing the Select element
 		function CustomForSelect( selected ) {
 			// show Radio-Inputs and trigger FileOrNotFile>@upload-file events
-			//if(selected=="pubmed" || selected=="istext") {
-			if(selected=="pubmed") {
+			if(selected=="pubmed" || selected=="istext") {
+			// if(selected=="pubmed") {
 				console.log("show the button for: "+selected)
 				$("#pubmedcrawl").css("visibility", "visible"); 
 				$("#pubmedcrawl").show();

--- a/templates/projects.html
+++ b/templates/projects.html
@@ -44,7 +44,7 @@
 																		<ul>
 																		<li> Rename </li>
 																		<li> Add new corpus </li>
-																		<li><a href="/project/{{ project.id }}/delete">Delete</a></li>
+																		<li><a href="/delete/{{ project.id }}">Delete</a></li>
 																		</ul>
 																		'>Manage</button>


--- a/templates/subcorpus.html
+++ b/templates/subcorpus.html
@@ -19,18 +19,21 @@

 {% if documents %}

-
-          <div id="delAll" style="visibility: hidden;">
-              <button onclick="deleteDuplicates(theurl);">Delete Duplicates</button>
-          </div>
-
-
 <ul>
 {% for doc in documents %}
    {% if doc.date %}
    <li><div id="doc_{{doc.id}}"> <b>{{ doc.date }}</b>: <a target="_blank" href="/nodeinfo/{{doc.id}}">{{ doc.name}}</a> , @ {{ doc.metadata.source}}</div></li>
    {% endif %}
 {% endfor %}
+
+
+          <div id="delAll" style="visibility: hidden;">
+							<center>
+              <button onclick="deleteDuplicates(theurl);">Delete all Duplicates in one click</button>
+							</center>
+          </div>
+
+
 </ul>

 <script>

--- a/test_db.py
+++ b/test_db.py
+# Without this, we couldn't use the Django environment
+import os
+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
+os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")
+
+# database tools
+from node import models
+from gargantext_web.db import *
+from parsing.corpustools import *
+
+
+
+
+user = session.query(User).first()
+
+project = session.query(Node).filter(Node.name == 'A').first()
+
+corpus = Node(
+    parent_id = project.id,
+    name = 'Test 456',
+    type_id = cache.NodeType['Corpus'].id,
+    user_id = user.id,
+)
+
+session.add(corpus)
+session.commit()
+
+add_resource(corpus,
+    # file = './data_samples/pubmed_result.xml',
+    file = '/srv/gargantext_lib/data_samples/pubmed_2013-04-01_HoneyBeesBeeBees.xml',
+    type_id = cache.ResourceType['pubmed'].id,
+)
+parse_resources(corpus)
+extract_ngrams(corpus, ('title', ))
+
+
+
+# print(corpus)
+# corpus = session.query(Node).filter(Node.id == 72771).first()
+# corpus = session.query(Node).filter(Node.id == 73017).first()
+compute_tfidf(corpus)