Merge branch 'stable-jsonb' into tina-jsonb

042591de · Administrator · 2b67af45 · 63d15bd8 · 042591de · 042591de
Commit 042591de authored Apr 01, 2015 by Administrator
53 changed files
--- a/analysis/InterUnion.py
+++ b/analysis/InterUnion.py
+import networkx as nx
+from itertools import combinations
+class Utils:
+	def __init__(self):
+		self.G = nx.Graph()
+	def unique(self,a):
+		""" return the list with duplicate elements removed """
+		return list(set(a))
+	def intersect(self,a, b):
+		""" return the intersection of two lists """
+		return list(set(a) & set(b))
+	def union(self,a, b):
+		""" return the union of two lists """
+		return list(set(a) | set(b))
+	def addCompleteSubGraph(self,terms):
+		G=self.G
+		# <addnode> #
+		for i in terms:
+			G.add_node(i)
+		# </addnode> #
+		# <addedge> #
+		edges = combinations(terms, 2)
+		for n in edges:
+			n1=n[0]
+			n2=n[1]
+			one=float(1)
+			if G.has_edge(n1,n2):
+				G[n1][n2]['weight']+=one
+			else: G.add_edge(n1,n2,weight=one)
+		self.G = G
\ No newline at end of file
--- a/analysis/functions.py
+++ b/analysis/functions.py
-from node.models import Language, ResourceType, Resource, \
+from gargantext_web.db import *
-        Node, NodeType, Node_Resource, Project, Corpus, \
-        Node_Ngram, NodeNgramNgram, NodeNodeNgram
 from collections import defaultdict
 from django.db import connection, transaction
@@ -13,29 +11,26 @@ def create_blacklist(user, corpus):
 def create_synonymes(user, corpus):
    pass
-def create_whitelist(user, corpus, size=100):
+def create_whitelist(user, corpus_id, size=100):
    cursor = connection.cursor()
-    try:
+    whitelist_type_id = cache.NodeType['WhiteList'].id
-        whitelist_type = NodeType.objects.get(name='WhiteList')
+    blacklist_type_id = cache.NodeType['BlackList'].id
-        blacklist_type = NodeType.objects.get(name='BlackList')
+    type_document_id  = cache.NodeType['Document'].id
-        type_document  = NodeType.objects.get(name='Document')
-    except:
-        whitelist_type = NodeType(name='WhiteList')
-        whitelist_type.save()
-        blacklist_type = NodeType(name='BlackList')
-        blacklist_type.save()
-    white_list = Node.objects.create(name='WhiteList Corpus ' + str(corpus.id), user=user, parent=corpus, type=whitelist_type)
-    black_list = Node.objects.create(name='BlackList Corpus ' + str(corpus.id), user=user, parent=corpus, type=blacklist_type)
+    white_list = Node(name='WhiteList Corpus ' + str(corpus_id), user_id=user.id, parent_id=corpus_id, type_id=whitelist_type_id)
+    black_list = Node(name='BlackList Corpus ' + str(corpus_id), user_id=user.id, parent_id=corpus_id, type_id=blacklist_type_id)
+    session.add(white_list)
+    session.add(black_list)
+    session.commit()
    # delete avant pour éviter les doublons
-#    try:
+    #    try:
-#        Node_Ngram.objects.filter(node=white_list).all().delete()
+    #        Node_Ngram.objects.filter(node=white_list).all().delete()
-#    except:
+    #    except:
-#        print('First time we compute cooc')
+    #        print('First time we compute cooc')
-#
+    #
    query_whitelist = """
        INSERT INTO node_node_ngram (node_id, ngram_id, weight)
        SELECT
@@ -67,28 +62,29 @@ def create_whitelist(user, corpus, size=100):
        LIMIT
            %d
        ;
-    """  % (white_list.id, corpus.id, type_document.id, size)
+    """  % (white_list.id, int(corpus_id), int(type_document_id), size)
+    # print("PRINTING QYERY OF WHITELIST:")
+    # print(query_whitelist)
    cursor.execute(query_whitelist)
    return white_list
 #def create_cooc(user, corpus, whitelist, blacklist, synonymes):
-def create_cooc(user=None, corpus=None, whitelist=None, size=150, year_start=None, year_end=None):
+def create_cooc(user=None, corpus_id=None, whitelist=None, size=150, year_start=None, year_end=None):
    cursor = connection.cursor()
-    try:
+    cooc_type_id  = cache.NodeType['Cooccurrence'].id
-        cooc_type  = NodeType.objects.get(name='Cooccurrence')
-    except:
-        cooc_type = NodeType(name='Cooccurrence')
-        cooc_type.save()
    # pour les tests on supprime les cooc
-    Node.objects.filter(type=cooc_type, parent=corpus).delete()
+    #session.Node.objects.filter(type=cooc_type, parent=corpus).delete()
-    cooc = Node.objects.create(user=user,\
+    cooc = Node(user_id=user.id,\
-                           parent=corpus,\
+                           parent_id=corpus_id,\
-                           type=cooc_type,\
+                           type_id=cooc_type_id,\
-                           name="Cooccurrences corpus " + str(corpus.pk))
+                           name="Cooccurrences corpus " + str(corpus_id))
+    session.add(cooc)
+    session.commit()
    query_cooc = """
    INSERT INTO node_nodengramngram (node_id, "ngramx_id", "ngramy_id", score)
@@ -133,10 +129,11 @@ def create_cooc(user=None, corpus=None, whitelist=None, size=150, year_start=Non
        score DESC
    LIMIT
        %d
-    """ % (cooc.pk, corpus.id, whitelist.id, whitelist.id, size)
+    """ % (cooc.id, corpus_id, whitelist.id, whitelist.id, size)
+    # print(query_cooc)
    cursor.execute(query_cooc)
-    return cooc
+    return cooc.id
 def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150):
    import pandas as pd
@@ -153,36 +150,37 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150
    labels = dict()
    weight = dict()
-    corpus = Node.objects.get(id=corpus_id)
+    type_cooc_id = cache.NodeType['Cooccurrence'].id
-    type_cooc = NodeType.objects.get(name="Cooccurrence")
-    if Node.objects.filter(type=type_cooc, parent=corpus).first() is None:
+    if session.query(Node).filter(Node.type_id==type_cooc_id, Node.parent_id==corpus_id).first() is None:
        print("Coocurrences do not exist yet, create it.")
-        whitelist = create_whitelist(request.user, corpus, size=n)
+        whitelist = create_whitelist(request.user, corpus_id=corpus_id, size=n)
-        cooccurrence_node = create_cooc(user=request.user, corpus=corpus, whitelist=whitelist, size=n)
+        cooccurrence_node_id = create_cooc(user=request.user, corpus_id=corpus_id, whitelist=whitelist, size=n)
-        print(cooccurrence_node.id, "Cooc created")
    else:
-        cooccurrence_node = Node.objects.filter(type=type_cooc, parent=corpus).first()
+        cooccurrence_node_id = session.query(Node.id).filter(Node.type_id==type_cooc_id, Node.parent_id==corpus_id).first()
-    for cooccurrence in NodeNgramNgram.objects.filter(node=cooccurrence_node):
+    for cooccurrence in session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooccurrence_node_id).all():
+        # print(cooccurrence.ngramx.terms," <=> ",cooccurrence.ngramy.terms,"\t",cooccurrence.score)
-        ids[cooccurrence.ngramx.terms] = cooccurrence.ngramx.id
-        ids[cooccurrence.ngramy.terms] = cooccurrence.ngramy.id
-        labels[cooccurrence.ngramx.id] = cooccurrence.ngramx.terms
+        labels[cooccurrence.ngramx_id] = session.query(Ngram.terms).filter(Ngram.id == cooccurrence.ngramx_id).first()[0]
-        labels[cooccurrence.ngramy.id] = cooccurrence.ngramy.terms
+        labels[cooccurrence.ngramy_id] = session.query(Ngram.terms).filter(Ngram.id == cooccurrence.ngramy_id).first()[0]
-        matrix[cooccurrence.ngramx.id][cooccurrence.ngramy.id] = cooccurrence.score
+        ids[labels[cooccurrence.ngramx_id]] = cooccurrence.ngramx_id
-        matrix[cooccurrence.ngramy.id][cooccurrence.ngramx.id] = cooccurrence.score
+        ids[labels[cooccurrence.ngramy_id]] = cooccurrence.ngramy_id
-        weight[cooccurrence.ngramy.terms] = weight.get(cooccurrence.ngramy.terms, 0) + cooccurrence.score
+        matrix[cooccurrence.ngramx_id][cooccurrence.ngramy_id] = cooccurrence.score
-        weight[cooccurrence.ngramx.terms] = weight.get(cooccurrence.ngramx.terms, 0) + cooccurrence.score
+        matrix[cooccurrence.ngramy_id][cooccurrence.ngramx_id] = cooccurrence.score
+        weight[cooccurrence.ngramx_id] = weight.get(cooccurrence.ngramx_id, 0) + cooccurrence.score
+        weight[cooccurrence.ngramy_id] = weight.get(cooccurrence.ngramy_id, 0) + cooccurrence.score
    df = pd.DataFrame(matrix).fillna(0)
    x = copy(df.values)
    x = x / x.sum(axis=1)
+    # import pprint
+    # pprint.pprint(ids)
    # Removing unconnected nodes
    threshold = min(x.max(axis=1))
    matrix_filtered = np.where(x >= threshold, 1, 0)
@@ -191,32 +189,41 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150
    G = nx.from_numpy_matrix(matrix_filtered)
    G = nx.relabel_nodes(G, dict(enumerate([ labels[label] for label in list(df.columns)])))
    #G = nx.relabel_nodes(G, dict(enumerate(df.columns)))
    # Removing too connected nodes (find automatic way to do it)
-#    outdeg = G.degree()
+    #    outdeg = G.degree()
-#    to_remove = [n for n in outdeg if outdeg[n] >= 10]
+    #    to_remove = [n for n in outdeg if outdeg[n] >= 10]
-#    G.remove_nodes_from(to_remove)
+    #    G.remove_nodes_from(to_remove)
    partition = best_partition(G)
    if type == "node_link":
-        for community in set(partition.values()):
-            #print(community)
-            G.add_node("cluster " + str(community), hidden=1)
        for node in G.nodes():
            try:
                #node,type(labels[node])
+                G.node[node]['pk'] = ids[node]
                G.node[node]['label']   = node
-                G.node[node]['name']    = node
+                # G.node[node]['pk']      = ids[str(node)]
-                G.node[node]['pk']      = ids[str(node)]
+                G.node[node]['size']    = weight[ids[node]]
-                G.node[node]['size']    = weight[node]
                G.node[node]['group']   = partition[node]
-                G.add_edge(node, "cluster " + str(partition[node]), weight=3)
+                # G.add_edge(node, "cluster " + str(partition[node]), weight=3)
-#            G.node[node]['color'] = '19,180,300'
            except Exception as error:
-                print(error)
+                print("error01: ",error)
        data = json_graph.node_link_data(G)
+        links = []
+        i=1
+        for e in G.edges_iter():
+            s = e[0]
+            t = e[1]
+            info = { "id":i , "source":ids[s] , "target":ids[t]}
+            # print(info)
+            links.append(info)
+            i+=1
+        # print(data)
+        data["links"] = []
+        data["links"] = links
    elif type == "adjacency":
        for node in G.nodes():
@@ -227,48 +234,26 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150
                #G.node[node]['size']    = weight[node]
                G.node[node]['group']   = partition[node]
                #G.add_edge(node, partition[node], weight=3)
-#            G.node[node]['color'] = '19,180,300'
            except Exception as error:
-                print(error)
+                print("error02: ",error)
        data = json_graph.node_link_data(G)
-#    data = json_graph.node_link_data(G, attrs={\
+    #    data = json_graph.node_link_data(G, attrs={\
-#            'source':'source',\
+    #            'source':'source',\
-#            'target':'target',\
+    #            'target':'target',\
-#            'weight':'weight',\
+    #            'weight':'weight',\
-#            #'label':'label',\
+    #            #'label':'label',\
-#            #'color':'color',\
+    #            #'color':'color',\
-#            'id':'id',})
+    #            'id':'id',})
    #print(data)
    return data
-#def tfidf(corpus, document, ngram):
-#    '''
-#    Compute TF-IDF (Term Frequency - Inverse Document Frequency)
-#    See: http://en.wikipedia.org/wiki/Tf%E2%80%93idf
-#    '''
-#    try:
-#        occurences_of_ngram = Node_Ngram.objects.get(node=document, ngram=ngram).weight
-#        ngrams_by_document = sum([ x.weight for x in Node_Ngram.objects.filter(node=document)])
-#        term_frequency = occurences_of_ngram / ngrams_by_document
-#    
-#        xx = Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")).count()
-#        yy = Node_Ngram.objects.filter(ngram=ngram).count() # filter: ON node.parent=corpus
-#        inverse_document_frequency= log(xx/yy)
-#        
-#        # result = tf * idf
-#        result = term_frequency * inverse_document_frequency
-#    except Exception as error:
-#        print(error, ngram)
-#        result = 0
-#    return result
 from analysis.tfidf import tfidf
 def do_tfidf(corpus, reset=True):
-    print("=========== doing tfidf ===========")
+    # print("=========== doing tfidf ===========")
    with transaction.atomic():
        if reset==True:
            NodeNodeNgram.objects.filter(nodex=corpus).delete()
@@ -278,8 +263,7 @@ def do_tfidf(corpus, reset=True):
            # # for i in Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")):
            for document in Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")):
                # print("the doc:",document)
-                somevariable = Node_Ngram.objects.filter(node=document)
+                for node_ngram in Node_Ngram.objects.filter(node=document):
-                for node_ngram in somevariable:
                    # print("\tngram:",node_ngram.ngram)
                    try:
                        nnn = NodeNodeNgram.objects.get(nodex=corpus, nodey=document, ngram=node_ngram.ngram)
@@ -288,7 +272,7 @@ def do_tfidf(corpus, reset=True):
                        score = tfidf(corpus, document, node_ngram.ngram)
                        nnn = NodeNodeNgram(nodex=corpus, nodey=node_ngram.node, ngram=node_ngram.ngram, score=score)
                        nnn.save()
-                        # print("\t\tEXC: ",score)
+                        # print("\t\t",node_ngram.ngram," : ",score)
            # print("- - - - - - - - - - \n")
        else:
            print("Only corpus implemented yet, you put instead:", type(corpus))

--- a/analysis/tfidf.py
+++ b/analysis/tfidf.py
@@ -60,6 +60,7 @@ def tfidf(corpus, document, ngram):
                .filter(NodeNgram.ngram_id == ngram.id)\
                .count()
+        # print("\t\t\t","occs:",occurrences_of_ngram," || ngramsbydoc:",ngrams_by_document," || TF = occ/ngramsbydoc:",term_frequency," |||||| x:",xx," || y:",yy," || IDF = log(x/y):",log(xx/yy))
        inverse_document_frequency= log(xx/yy)
        # result = tf * idf

--- a/api/__init__.py
+++ b/api/__init__.py
--- a/api/analyses/__init__.py
+++ b/api/analyses/__init__.py
--- a/api/crawls/__init__.py
+++ b/api/crawls/__init__.py
--- a/api/lists/__init__.py
+++ b/api/lists/__init__.py
--- a/api/nodes/__init__.py
+++ b/api/nodes/__init__.py
--- a/gargantext_web/api.py
+++ b/gargantext_web/api.py
 from django.http import HttpResponseNotFound, HttpResponse, Http404
 from django.core.exceptions import PermissionDenied, SuspiciousOperation
 from django.core.exceptions import ValidationError
+from django.core.urlresolvers import reverse
 from django.db.models import Avg, Max, Min, Count, Sum
 # from node.models import Language, ResourceType, Resource
@@ -10,8 +11,9 @@ from sqlalchemy import text, distinct
 from sqlalchemy.sql import func
 from sqlalchemy.orm import aliased
+from gargantext_web.views import move_to_trash
 from .db import *
+from node import models
 def DebugHttpResponse(data):
    return HttpResponse('<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), ))
@@ -45,10 +47,14 @@ _ngrams_order_columns = {
 }
+from rest_framework.authentication import SessionAuthentication, BasicAuthentication
+from rest_framework.permissions import IsAuthenticated
 from rest_framework.views import APIView
 from rest_framework.response import Response
 from rest_framework.exceptions import APIException as _APIException
 class APIException(_APIException):
    def __init__(self, message, code=500):
        self.status_code = code
@@ -82,7 +88,7 @@ class NodesChildrenNgrams(APIView):
    def get(self, request, node_id):
        # query ngrams
        ParentNode = aliased(Node)
-        ngrams_query = (Ngram
+        ngrams_query = (session
            .query(Ngram.terms, func.count().label('count'))
            # .query(Ngram.id, Ngram.terms, func.count().label('count'))
            .join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
@@ -128,7 +134,7 @@ class NodesChildrenDuplicates(APIView):
            raise APIException('Missing GET parameter: "keys"', 400)
        keys = request.GET['keys'].split(',')
        # metadata retrieval
-        metadata_query = (Metadata
+        metadata_query = (session
            .query(Metadata)
            .filter(Metadata.name.in_(keys))
        )
@@ -187,6 +193,7 @@ class NodesChildrenDuplicates(APIView):
        # get the minimum ID for each of the nodes sharing the same metadata
        kept_node_ids_query = self._fetch_duplicates(request, node_id, [func.min(Node.id).label('id')], 0)
        kept_node_ids = [kept_node.id for kept_node in kept_node_ids_query]
+        # TODO with new orm
        duplicate_nodes =  models.Node.objects.filter( parent_id=node_id ).exclude(id__in=kept_node_ids)
        # # delete the stuff
        # delete_query = (session
@@ -197,7 +204,7 @@ class NodesChildrenDuplicates(APIView):
        count = len(duplicate_nodes)
        for node in duplicate_nodes:
            print("deleting node ",node.id)
-            node.delete()
+            move_to_trash(node.id)
        # print(delete_query)
        # # delete_query.delete(synchronize_session=True)
        # session.flush()
@@ -213,7 +220,7 @@ class NodesChildrenMetatadata(APIView):
        # query metadata keys
        ParentNode = aliased(Node)
-        metadata_query = (Metadata
+        metadata_query = (session
            .query(Metadata)
            .join(Node_Metadata, Node_Metadata.metadata_id == Metadata.id)
            .join(Node, Node.id == Node_Metadata.node_id)
@@ -233,7 +240,7 @@ class NodesChildrenMetatadata(APIView):
            values_to = None
            if metadata.type != 'text':
                value_column = getattr(Node_Metadata, 'value_' + metadata.type)
-                node_metadata_query = (Node_Metadata
+                node_metadata_query = (session
                    .query(value_column)
                    .join(Node, Node.id == Node_Metadata.node_id)
                    .filter(Node.parent_id == node_id)
@@ -381,9 +388,9 @@ class NodesChildrenQueries(APIView):
        for field_name in fields_names:
            split_field_name = field_name.split('.')
            if split_field_name[0] == 'metadata':
-                metadata = Metadata.query(Metadata).filter(Metadata.name == split_field_name[1]).first()
+                metadata = session.query(Metadata).filter(Metadata.name == split_field_name[1]).first()
                if metadata is None:
-                    metadata_query = Metadata.query(Metadata.name).order_by(Metadata.name)
+                    metadata_query = session.query(Metadata.name).order_by(Metadata.name)
                    metadata_names = [metadata.name for metadata in metadata_query.all()]
                    raise APIException('Invalid key for "%s" in parameter "field", should be one of the following values: "%s". "%s" was found instead' % (field[0], '", "'.join(metadata_names), field[1]), 400)
                # check or create Node_Metadata alias; join if necessary
@@ -422,7 +429,7 @@ class NodesChildrenQueries(APIView):
            )
        # starting the query!
-        document_type_id = NodeType.query(NodeType.id).filter(NodeType.name == 'Document').scalar()
+        document_type_id = cache.NodeType['Document'].id ##session.query(NodeType.id).filter(NodeType.name == 'Document').scalar()
        query = (session
            .query(*fields_list)
            .select_from(Node)
@@ -451,9 +458,9 @@ class NodesChildrenQueries(APIView):
            # 
            if field[0] == 'metadata':
                # which metadata?
-                metadata = Metadata.query(Metadata).filter(Metadata.name == field[1]).first()
+                metadata = session.query(Metadata).filter(Metadata.name == field[1]).first()
                if metadata is None:
-                    metadata_query = Metadata.query(Metadata.name).order_by(Metadata.name)
+                    metadata_query = session.query(Metadata.name).order_by(Metadata.name)
                    metadata_names = [metadata.name for metadata in metadata_query.all()]
                    raise APIException('Invalid key for "%s" in parameter "field", should be one of the following values: "%s". "%s" was found instead' % (field[0], '", "'.join(metadata_names), field[1]), 400)                
                # check or create Node_Metadata alias; join if necessary
@@ -475,7 +482,7 @@ class NodesChildrenQueries(APIView):
                ))
            elif field[0] == 'ngrams': 
                query = query.filter(
-                    Node.id.in_(Node_Metadata
+                    Node.id.in_(session
                        .query(Node_Ngram.node_id)
                        .filter(Node_Ngram.ngram_id == Ngram.id)
                        .filter(operator(
@@ -549,11 +556,13 @@ class NodesChildrenQueries(APIView):
 class NodesList(APIView):
+    authentication_classes = (SessionAuthentication, BasicAuthentication)
    def get(self, request):
-        query = (Node
+        print("user id : " + str(request.user))
+        query = (session
            .query(Node.id, Node.name, NodeType.name.label('type'))
-            .filter(Node.user_id == request.session._session_cache['_auth_user_id'])
+            .filter(Node.user_id == int(request.user.id))
            .join(NodeType)
        )
        if 'type' in request.GET:
@@ -576,8 +585,11 @@ class Nodes(APIView):
        return JsonHttpResponse({
            'id': node.id,
            'name': node.name,
+            'parent_id': node.parent_id,
+            'type': cache.NodeType[node.type_id].name,
            # 'type': node.type__name,
-            'metadata': dict(node.metadata),
+            #'metadata': dict(node.metadata),
+            'metadata': node.metadata,
        })
    # deleting node by id
@@ -585,13 +597,19 @@ class Nodes(APIView):
    # it should take the subnodes into account as well,
    # for better constistency...
    def delete(self, request, node_id):
-        node = models.Node.objects.filter(id = node_id)
-        msgres = ""
+        user = request.user
+        node = session.query(Node).filter(Node.id == node_id).first()
+        msgres = str()
        try:
-            node.delete()
-            msgres = node_id+" deleted!"
+            move_to_trash(node_id)
-        except:
+            msgres = node_id+" moved to Trash"
-            msgres ="error deleting: "+node_id
+        except Exception as error:
+            msgres ="error deleting : " + node_id + str(error)
 class CorpusController:
@@ -602,9 +620,9 @@ class CorpusController:
            corpus_id = int(corpus_id)
        except:
            raise ValidationError('Corpora are identified by an integer.', 400)
-        corpusQuery = Node.objects.filter(id = corpus_id)
+        corpusQuery = session.query(Node).filter(Node.id == corpus_id).first()
        # print(str(corpusQuery))
-        # raise Http404("C'est toujours ça de pris.")
+        # raise Http404("404 error.")
        if not corpusQuery:
            raise Http404("No such corpus: %d" % (corpus_id, ))
        corpus = corpusQuery.first()
@@ -623,7 +641,7 @@ class CorpusController:
        # build query
        ParentNode = aliased(Node)
-        query = (Ngram
+        query = (session
            .query(Ngram.terms, func.count('*'))
            .join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
            .join(Node, Node.id == Node_Ngram.node_id)

--- a/gargantext_web/db.py
+++ b/gargantext_web/db.py
-from node import models
 from gargantext_web import settings
+from node import models
+__all__ = ['literalquery', 'session', 'cache', 'Session', 'bulk_insert', 'engine', 'get_cursor']
+# initialize sqlalchemy
+from sqlalchemy.orm import Session, mapper
+from sqlalchemy.ext.automap import automap_base
+from sqlalchemy import create_engine, MetaData, Table, Column, ForeignKey
+from sqlalchemy.types import Integer, String, DateTime
+from sqlalchemy.dialects.postgresql import JSON
-__all__ = ['literalquery', 'session', 'cache']
+engine = create_engine('postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}/{NAME}'.format(
+    **settings.DATABASES['default']
+))
+Base = automap_base()
+Base.prepare(engine, reflect=True)
+# model representation
+def model_repr(modelname):
+    def _repr(obj):
+        result = '<' + modelname
+        isfirst = True
+        for key, value in obj.__dict__.items():
+            if key[0] != '_':
+                value = repr(value)
+                if len(value) > 64:
+                    value = value[:30] + '....' + value[-30:]
+                if isfirst:
+                    isfirst = False
+                else:
+                    result += ','
+                result += ' ' + key + '=' + value
+        result += '>'
+        return result
+    return _repr
 # map the Django models found in node.models to SQLAlchemy models
 for model_name, model in models.__dict__.items():
-    if hasattr(model, 'sa'):
+    if hasattr(model, '_meta') :
-        globals()[model_name] = model.sa
+        table_name = model._meta.db_table
-        __all__.append(model_name)
+        if hasattr(Base.classes, table_name):
+            sqla_model = getattr(Base.classes, table_name)
+            setattr(sqla_model, '__repr__', model_repr(model_name))
+            globals()[model_name] = sqla_model
+            __all__.append(model_name)
 NodeNgram = Node_Ngram
+NodeResource = Node_Resource
 # debugging tool, to translate SQLAlchemy queries to string
@@ -61,16 +103,17 @@ def literalquery(statement, dialect=None):
 # SQLAlchemy session management
-def get_sessionmaker():
+def get_engine():
-    from django.db import connections
-    from sqlalchemy.orm import sessionmaker
    from sqlalchemy import create_engine
-    alias = 'default'
-    connection = connections[alias]
    url = 'postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}/{NAME}'.format(
        **settings.DATABASES['default']
    )
-    engine = create_engine(url, use_native_hstore=True)
+    return create_engine(url, use_native_hstore=True)
+engine = get_engine()
+def get_sessionmaker():
+    from sqlalchemy.orm import sessionmaker
    return sessionmaker(bind=engine)
 Session = get_sessionmaker()
@@ -84,7 +127,7 @@ from sqlalchemy import or_
 class ModelCache(dict):
    def __init__(self, model, preload=False):
-        self._model = model.sa
+        self._model = globals()[model.__name__]
        self._columns_names = [column.name for column in model._meta.fields if column.unique]
        self._columns = [getattr(self._model, column_name) for column_name in self._columns_names]
        self._columns_validators = []
@@ -92,20 +135,16 @@ class ModelCache(dict):
            self.preload()
    def __missing__(self, key):
-        for column in self._columns:
+        #print(key)
-            conditions = []
+        conditions = [
-            try:
+            (column == str(key))
-                formatted_key = column.type.python_type(key)
+            for column in self._columns
-                conditions.append(column == key)
+            if column.type.python_type == str or key.__class__ == column.type.python_type
-            except ValueError:
+        ]
-                pass
+        element = session.query(self._model).filter(or_(*conditions)).first()
-        if formatted_key in self:
+        if element is None:
-            self[key] = self[formatted_key]
+            raise KeyError
-        else:
+        self[key] = element
-            element = session.query(self._model).filter(or_(*conditions)).first()
-            if element is None:
-                raise KeyError
-            self[key] = element
        return element
    def preload(self):
@@ -115,7 +154,7 @@ class ModelCache(dict):
                key = getattr(element, column_name)
                self[key] = element
-class Cache:
+class Cache():
    def __getattr__(self, key):
        try:
@@ -127,3 +166,50 @@ class Cache:
        return modelcache
 cache = Cache()
+# Insert many elements at once
+import psycopg2
+def get_cursor():
+    db_settings = settings.DATABASES['default']
+    db = psycopg2.connect(**{
+        'database': db_settings['NAME'],
+        'user':     db_settings['USER'],
+        'password': db_settings['PASSWORD'],
+        'host':     db_settings['HOST'],
+    })
+    return db, db.cursor()
+class bulk_insert:
+    def __init__(self, table, keys, data, cursor=None):
+        # prepare the iterator
+        self.iter = iter(data)
+        # template
+        self.template = '%s' + (len(keys) - 1) * '\t%s' + '\n'
+        # prepare the cursor
+        if cursor is None:
+            db, cursor = get_cursor()
+            mustcommit = True
+        else:
+            mustcommit = False
+        # insert data
+        if not isinstance(table, str):
+            table = table.__table__.name
+        cursor.copy_from(self, table, columns=keys)
+        # commit if necessary
+        if mustcommit:
+            db.commit()
+    def read(self, size=None):
+        try:
+            return self.template % tuple(
+                str(x).replace('\r', '').replace('\n', '\\n').replace('\t', '\\t') for x in next(self.iter)
+            )
+        except StopIteration:
+            return ''
+    readline = read
--- a/gargantext_web/home.py
+++ b/gargantext_web/home.py
+import random
+import random_words
+from math import pi
+def paragraph_lorem(size_target=450):
+    '''
+    Function that returns paragraph with false latin language.
+    size_target is the number of random words that will be given.
+    '''
+    lorem = random_words.LoremIpsum()
+    sentences_list = lorem.get_sentences_list(sentences=5)
+    paragraph_size = 0
+    while paragraph_size < size_target :
+        sentences_list.append(lorem.get_sentence())
+        paragraph = ' '.join(sentences_list)
+        paragraph_size = len(paragraph)
+    return(paragraph)
+def paragraph_gargantua(size_target=500):
+    '''
+    Function that returns paragraph with chapter titles of Gargantua.
+    size_target is the number of random words that will be given.
+    '''
+    paragraph = list()
+    paragraph_size = 0
+    chapter_number = 1
+    while paragraph_size < size_target and chapter_number < 6:
+        chapitre = open('/srv/gargantext/static/docs/gargantua_book/gargantua_chapter_' + str(chapter_number) + '.txt', 'r')
+        paragraph.append(random.choice(chapitre.readlines()).strip())
+        chapitre.close()
+        paragraph_size = len(' '.join(paragraph))
+        chapter_number += 1
+    return(' '.join(paragraph))
+def random_letter(mot, size_min=5):
+    '''
+    Functions that randomize order letters of a 
+    word which size is greater that size_min.
+    '''
+    if len(mot) > size_min:
+        size = round(len(mot) / pi)
+        first_letters = mot[:size]
+        last_letters  = mot[-size:]
+        others_letters = list(mot[size:-size])
+        random.shuffle(others_letters)
+        mot_list = list()
+        mot_list.append(first_letters)
+        for letter in others_letters:
+            mot_list.append(letter)
+        mot_list.append(last_letters)
+        return(''.join(mot_list))  
+    else:
+        return(mot)
+tutoriel = """Il paraît que l'ordre des lettres dans un mot n'a pas d'importance. La première et la dernière lettre doivent être à la bonne place. Le reste peut être dans un désordre total et on peut toujours lire sans problème. On ne lit donc pas chaque lettre en elle-même, mais le mot comme un tout. Un changement de référentiel et nous transposons ce résultat au texte lui-même: l'ordre des mots est faiblement important comparé au contexte du texte qui, lui, est compté"""
+def paragraph_tutoreil(tutoriel=tutoriel):
+    '''
+    Functions that returns paragraph of words with words with
+    randomized letters.
+    '''
+    paragraph = ' '.join([ random_letter(mot) for mot in tutoriel.split(" ")]) \
+            + ": comptexter avec Gargantext."
+    return(paragraph)
--- a/gargantext_web/settings.py
+++ b/gargantext_web/settings.py
@@ -64,12 +64,11 @@ INSTALLED_APPS = (
    'django.contrib.messages',
    'django.contrib.staticfiles',
    'django_extensions',
-    'south',
+    'django_pg',
    'cte_tree',
    'node',
    'ngram',
    'scrap_pubmed',
-    'django_hstore',
    'djcelery',
    'aldjemy',
    'rest_framework',
@@ -84,6 +83,16 @@ MIDDLEWARE_CLASSES = (
    'django.middleware.clickjacking.XFrameOptionsMiddleware',
 )
+REST_SESSION_LOGIN = False
+REST_FRAMEWORK = {
+    'DEFAULT_AUTHENTICATION_CLASSES': (
+        'rest_framework.authentication.TokenAuthentication',
+        'rest_framework.authentication.SessionAuthentication',
+    ),
+   'DEFAULT_PERMISSION_CLASSES': (
+        'rest_framework.permissions.AllowAny',
+    ),
+}
 WSGI_APPLICATION = 'wsgi.application'

--- a/gargantext_web/urls.py
+++ b/gargantext_web/urls.py
@@ -3,7 +3,7 @@ from django.conf.urls import patterns, include, url
 from django.contrib import admin
 from django.contrib.auth.views import login
-from gargantext_web import views
+from gargantext_web import views, views_optimized
 import gargantext_web.api
 import scrap_pubmed.views as pubmedscrapper
@@ -20,22 +20,23 @@ urlpatterns = patterns('',
    url(r'^auth/$', views.login_user),
    url(r'^auth/logout/$', views.logout_user),
+    # Dynamic CSS
    url(r'^img/logo.svg$', views.logo),
    url(r'^css/bootstrap.css$', views.css),
    # User Home view
-    url(r'^$', views.home),
+    url(r'^$', views.home_view),
    url(r'^about/', views.get_about),
    url(r'^maintenance/', views.get_maintenance),
    # Project Management
    url(r'^projects/$', views.projects),
-    url(r'^project/(\d+)/delete/$', views.delete_project),
+    url(r'^project/(\d+)/$', views_optimized.project),
-    url(r'^project/(\d+)/$', views.project),
+    url(r'^delete/(\d+)$', views.delete_node), # => api.node('id' = id, children = 'True', copies = False)
    # Corpus management
    url(r'^project/(\d+)/corpus/(\d+)/$', views.corpus),
-    url(r'^project/(\d+)/corpus/(\d+)/delete/$', views.delete_corpus),
    url(r'^project/(\d+)/corpus/(\d+)/corpus.csv$', views.corpus_csv),
    url(r'^project/(\d+)/corpus/(tests_mvc_listdocuments+)/corpus.tests_mvc_listdocuments$', views.corpus_csv),
@@ -47,16 +48,19 @@ urlpatterns = patterns('',
    url(r'^project/(\d+)/corpus/(\d+)/matrix$', views.matrix),
    # Data management
-    url(r'^chart/corpus/(\d+)/data.csv$', views.send_csv),
+    url(r'^chart/corpus/(\d+)/data.csv$', views.send_csv),  # => api.node.children('type' : 'data', 'format' : 'csv')
-    url(r'^corpus/(\d+)/node_link.json$', views.node_link),
+    url(r'^corpus/(\d+)/node_link.json$', views.node_link), # => api.analysis('type': 'node_link', 'format' : 'json')
-    url(r'^corpus/(\d+)/adjacency.json$', views.adjacency),
+    url(r'^corpus/(\d+)/adjacency.json$', views.adjacency), # => api.analysis('type': 'adjacency', 'format' : 'json')
-    url(r'^api/tfidf/(\d+)/(\w+)$', views.tfidf),
+    url(r'^api/tfidf/(\d+)/(\w+)$', views_optimized.tfidf),
+    # url(r'^api/tfidf/(\d+)/(\w+)$', views.tfidf),
+    url(r'^api/tfidf2/(\d+)/(\w+)$', views.tfidf2),
    # Data management
-    url(r'^api$', gargantext_web.api.Root),
+    #url(r'^api$', gargantext_web.api.Root), # = ?
    url(r'^api/nodes$', gargantext_web.api.NodesList.as_view()),
    url(r'^api/nodes/(\d+)$', gargantext_web.api.Nodes.as_view()),
-    url(r'^api/nodes/(\d+)/children/ngrams$', gargantext_web.api.NodesChildrenNgrams.as_view()),
+    url(r'^api/nodes/(\d+)/children/ngrams$', gargantext_web.api.NodesChildrenNgrams.as_view()),  # => repeated children ?
    url(r'^api/nodes/(\d+)/children/metadata$', gargantext_web.api.NodesChildrenMetatadata.as_view()),
    url(r'^api/nodes/(\d+)/children/queries$', gargantext_web.api.NodesChildrenQueries.as_view()),
    url(r'^api/nodes/(\d+)/children/duplicates$', gargantext_web.api.NodesChildrenDuplicates.as_view()),
@@ -66,12 +70,13 @@ urlpatterns = patterns('',
    url(r'^api/nodes/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),
-    url(r'^ngrams$', views.ngrams),
+    # Provisory tests
-    url(r'^nodeinfo/(\d+)$', views.nodeinfo),
+    url(r'^ngrams$', views.ngrams),  # to be removed 
+    url(r'^nodeinfo/(\d+)$', views.nodeinfo), # to be removed ?
    url(r'^tests/mvc$', views.tests_mvc),
    url(r'^tests/mvc-listdocuments$', views.tests_mvc_listdocuments),
-    url(r'^tests/istextquery$', pubmedscrapper.getGlobalStatsISTEXT),
+    url(r'^tests/istextquery$', pubmedscrapper.getGlobalStatsISTEXT), # api/query?type=istext ?
    url(r'^tests/pubmedquery$', pubmedscrapper.getGlobalStats),
    url(r'^tests/project/(\d+)/pubmedquery/go$', pubmedscrapper.doTheQuery),
    url(r'^tests/project/(\d+)/ISTEXquery/go$', pubmedscrapper.testISTEX)
@@ -90,3 +95,15 @@ if settings.DEBUG:
        }),
 )
+if settings.MAINTENANCE:
+    urlpatterns = patterns('',
+    url(r'^img/logo.svg$', views.logo),
+    url(r'^css/bootstrap.css$', views.css),
+    url(r'^$', views.home_view),
+    url(r'^about/', views.get_about),
+    url(r'^.*', views.get_maintenance),
+    )
--- a/gargantext_web/views.py
+++ b/gargantext_web/views.py
 from django.shortcuts import redirect
 from django.shortcuts import render
+from django.db import transaction
-from django.http import Http404, HttpResponse, HttpResponseRedirect
+from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseForbidden
 from django.template.loader import get_template
 from django.template import Context
 from node import models
-from node.models import Language, ResourceType, Resource, \
+#from node.models import Language, ResourceType, Resource, \
-        Node, NodeType, Node_Resource, Project, Corpus, \
+#        Node, NodeType, Node_Resource, Project, Corpus, \
-        Ngram, Node_Ngram, NodeNgramNgram, NodeNodeNgram
+#        Ngram, Node_Ngram, NodeNgramNgram, NodeNodeNgram
 from node.admin import CorpusForm, ProjectForm, ResourceForm, CustomForm
@@ -25,10 +26,12 @@ from django import forms
 from collections import defaultdict
 from parsing.FileParsers import *
+import os
 # SOME FUNCTIONS
-from gargantext_web.settings import DEBUG, STATIC_ROOT, MAINTENANCE
+from gargantext_web import settings
 from django.http import *
 from django.shortcuts import render_to_response,redirect
 from django.template import RequestContext
@@ -37,6 +40,12 @@ from django.contrib.auth import authenticate, login, logout
 from scrap_pubmed.admin import Logger
+from gargantext_web.db import *
+from sqlalchemy import or_, func
+from gargantext_web import about
 def login_user(request):
    logout(request)
    username = password = ''
@@ -50,9 +59,7 @@ def login_user(request):
            if user.is_active:
                login(request, user)
-                print("MAINTENANCE:",MAINTENANCE)
+                return HttpResponseRedirect('/projects/')
-                if MAINTENANCE: return HttpResponseRedirect('/maintenance/')
-                else: return HttpResponseRedirect('/projects/')
    return render_to_response('authentication.html', context_instance=RequestContext(request))
@@ -72,7 +79,7 @@ def logo(request):
    svg_data = template.render(Context({\
            'color': color,\
            }))
-    return HttpResponse(svg_data, mimetype="image/svg+xml")
+    return HttpResponse(svg_data, content_type="image/svg+xml")
 def css(request):
    template = get_template('bootstrap.css')
@@ -147,7 +154,6 @@ def date_range(start_dt, end_dt = None, format=None):
 # SOME VIEWS
-from gargantext_web import about
 def get_about(request):
    '''
    About Gargantext, the team and sponsors
@@ -183,8 +189,8 @@ def get_maintenance(request):
    return HttpResponse(html)
+from gargantext_web import home
-def home(request):
+def home_view(request):
    '''
    Home describes the platform.
    A video draws the narratives.
@@ -197,6 +203,9 @@ def home(request):
    html = t.render(Context({\
            'user': user,\
            'date': date,\
+            'paragraph_gargantua': home.paragraph_gargantua(),\
+            'paragraph_lorem' : home.paragraph_lorem(),\
+            'paragraph_tutoreil': home.paragraph_tutoreil(),\
            }))
    return HttpResponse(html)
@@ -209,16 +218,17 @@ def projects(request):
    '''
    if not request.user.is_authenticated():
        return redirect('/auth/')
-    if MAINTENANCE: return HttpResponseRedirect('/maintenance/')
    t = get_template('projects.html')
-    user = request.user
+    user_id         = cache.User[request.user.username].id
+    project_type_id = cache.NodeType['Project'].id
    date = datetime.datetime.now()
    print(Logger.write("STATIC_ROOT"))
-    project_type = NodeType.objects.get(name='Project')
+    projects = session.query(Node).filter(Node.user_id == user_id, Node.type_id == project_type_id).order_by(Node.date).all()
-    projects = Node.objects.filter(user=user, type_id = project_type.id).order_by("-date")
    number = len(projects)
    form = ProjectForm()
@@ -227,7 +237,9 @@ def projects(request):
        # TODO : protect from sql injection here
        name = str(request.POST['name'])
        if name != "" :
-            Project(name=name, type=project_type, user=user).save()
+            new_project = Project(name=name, type_id=project_type_id, user_id=user_id)
+            session.add(new_project)
+            session.commit()
            return HttpResponseRedirect('/projects/')
    else:
        form = ProjectForm()
@@ -240,190 +252,6 @@ def projects(request):
        })
-def project(request, project_id):
-    '''
-    This view represents all corpora in a panoramic way.
-    The title sums all corpora
-    The donut summerizes composition of the project.
-    The list of lists enalbles to navigate throw it.
-    '''
-    if not request.user.is_authenticated():
-        return redirect('/login/?next=%s' % request.path)
-    try:
-        offset = str(project_id)
-    except ValueError:
-        raise Http404()
-    if MAINTENANCE: return HttpResponseRedirect('/maintenance/')
-    user = request.user
-    date = datetime.datetime.now()
-    type_corpus     = NodeType.objects.get(name='Corpus')
-    type_document   = NodeType.objects.get(name='Document')
-#    type_whitelist  = NodeType.objects.get(name='WhiteList')
-#    type_blacklist  = NodeType.objects.get(name='BlackList')
-#    type_cooclist   = NodeType.objects.get(name='Cooccurrence')
-    project = Node.objects.get(id=project_id)
-    corpora = Node.objects.filter(parent=project, type=type_corpus)
-    number  = len(corpora)
-    # DONUT corpora representation
-    list_corpora    = defaultdict(list)
-    donut_part      = defaultdict(int)
-    docs_total      = 0
-    # List of resources
-    # filter for each project here
-    whitelists      = ""#.children.filter(type=type_whitelist)
-    blacklists      = ""#.children.filter(type=type_blacklist)
-    cooclists       = ""#.children.filter(type=type_cooclist)
-    for corpus in corpora:
-        # print("corpus", corpus.pk , corpus.name , corpus.type_id)
-        docs_count =  Node.objects.filter(parent=corpus, type=type_document).count()
-        docs_total += docs_count
-        corpus_view = dict()
-        corpus_view['id']         = corpus.pk
-        corpus_view['name']       = corpus.name
-        corpus_view['count']      = docs_count
-        #just get first element of the corpora and get his type.
-        resource_corpus = Node_Resource.objects.filter(node=corpus)
-        if len(resource_corpus)>0:
-            # print(Node_Resource.objects.filter(node=corpus).all())
-            corpus_type = Node_Resource.objects.filter(node=corpus)[0].resource.type
-            list_corpora[corpus_type].append(corpus_view)
-            donut_part[corpus_type] += docs_count
-        else: print(" Node_Resource = this.corpus(",corpus.pk,") ... nothing, why?")
-        ## For avoiding to list repeated elements, like when u use the dynamic query (per each xml, 1)
-        # for node_resource in Node_Resource.objects.filter(node=corpus):
-        #     print( "node_resource.id:",node_resource.id , node_resource.resource.file )
-        #     donut_part[node_resource.resource.type] += docs_count
-        #     list_corpora[node_resource.resource.type.name].append(corpus_view)
-            # print(node_resource.resource.type.name)
-    list_corpora = dict(list_corpora)
-    if docs_total == 0 or docs_total is None:
-        docs_total = 1
-    # The donut will show: percentage by  
-    donut = [ {'source': key, 
-                'count': donut_part[key] , 
-                'part' : round(donut_part[key] * 100 / docs_total) } \
-                        for key in donut_part.keys() ]
-    dauser = User.objects.get( username=user )
-    groups = len(dauser.groups.filter(name="PubMed_0.1"))
-    print("*groupslen*:",groups)
-    if request.method == 'POST':
-        form = CustomForm(request.POST, request.FILES)
-        if form.is_valid():
-            name = form.cleaned_data['name']
-            thefile = form.cleaned_data['file']
-            resource_type = ResourceType.objects.get(name=str( form.cleaned_data['type'] ))
-            print("-------------")
-            print(name,"|",resource_type,"|",thefile)
-            print("-------------")
-            try:
-                parent      = Node.objects.get(id=project_id)
-                node_type   = NodeType.objects.get(name='Corpus')
-                if resource_type.name == "europress_french":
-                    language    = Language.objects.get(iso2='fr')
-                elif resource_type.name == "europress_english":
-                    language    = Language.objects.get(iso2='en')
-                try:
-                    corpus = Node(
-                            user=request.user,
-                            parent=parent,
-                            type=node_type,
-                            language=language,
-                            name=name,
-                            )
-                except:
-                    corpus = Node(
-                            user=request.user,
-                            parent=parent,
-                            type=node_type,
-                            name=name,
-                            )
-                corpus.save()
-                corpus.add_resource(
-                        user=request.user,
-                        type=resource_type,
-                        file=thefile
-                        )
-                try:
-                    #corpus.parse_and_extract_ngrams()
-                    #corpus.parse_and_extract_ngrams.apply_async((), countdown=3)
-                    if DEBUG is True:
-                        corpus.workflow()
-                    else:
-                        corpus.workflow.apply_async((), countdown=3)
-                except Exception as error:
-                    print(error)
-                return HttpResponseRedirect('/project/' + str(project_id))
-            except Exception as error:
-                print('ee', error)
-                form = CorpusForm(request=request)
-                formResource = ResourceForm()
-        else:
-            print("bad form, bad form")
-            return render(request, 'project.html', {
-                    'form'          : form,
-                    'user'          : user,
-                    'date'          : date,
-                    'project'       : project,
-                    'donut'         : donut,
-                    'list_corpora'  : list_corpora,
-                    'whitelists'    : whitelists,
-                    'blacklists'    : blacklists,
-                    'cooclists'     : cooclists,
-                    'number'        : number,
-                })
-    else:
-        form = CustomForm()
-    return render(request, 'project.html', {
-            'form'          : form,
-            'user'          : user,
-            'date'          : date,
-            'project'       : project,
-            'donut'         : donut,
-            'list_corpora'  : list_corpora,
-            'whitelists'    : whitelists,
-            'blacklists'    : blacklists,
-            'cooclists'     : cooclists,
-            'number'        : number,
-        })
 def corpus(request, project_id, corpus_id):
    if not request.user.is_authenticated():
        return redirect('/login/?next=%s' % request.path)
@@ -433,77 +261,25 @@ def corpus(request, project_id, corpus_id):
        offset = str(corpus_id)
    except ValueError:
        raise Http404()
-    if MAINTENANCE: return HttpResponseRedirect('/maintenance/')
    t = get_template('corpus.html')
    user = request.user
    date = datetime.datetime.now()
-    project = Node.objects.get(id=project_id)
+    project = cache.Node[int(project_id)]
-    corpus  = Node.objects.get(id=corpus_id)
+    corpus  = cache.Node[int(corpus_id)]
-    type_doc = NodeType.objects.get(name="Document")
-    number = Node.objects.filter(parent=corpus, type=type_doc).count()
+    type_doc_id = cache.NodeType['Document'].id
+    number = session.query(func.count(Node.id)).filter(Node.parent_id==corpus_id, Node.type_id==type_doc_id).all()[0][0]
-#    try:
-#        sources = defaultdict(int)
-#        for document in documents.all():
-#            sources[document.metadata['journal']] += 1
-#        
-#        sources_donut = []
-#        
-#        for source in sources.keys():
-#            source_count = dict()
-#            source_count['count'] = source['count']
-#            try:
-#                source_count['part'] = round(source_count['count'] * 100 / number)
-#            except:
-#                source_count['part'] = None
-#            source_count['source'] = source['source']
-#            sources_donut.append(source_count)
-#    except:
-#        sources_donut = []
-    # Do a javascript query/api for that
-#    query_date = """
-#        SELECT
-#            id,
-#            metadata -> 'publication_year' as year,
-#            metadata -> 'publication_month' as month, 
-#            metadata -> 'publication_day' as day,
-#            metadata -> 'title'
-#        FROM
-#            node_node AS n
-#        WHERE
-#            n.parent_id = %d
-#        ORDER BY
-#            year, month, day DESC
-#        LIMIT
-#            20
-#        OFFSET
-#            %d
-#    """ % (corpus.id, 0)
-#    try:
-#        cursor = connection.cursor()
-#
-#        cursor.execute(query_date)
-#        documents = list()
-#        while True:
-#            document = dict()
-#            row = cursor.fetchone()
-#            
-#            if row is None:
-#                break
-#            document['id']      = row[0]
-#            document['date']    = row[1] + '/' + row[2] + '/' + row[3]
-#            document['title']   = row[4]
-#            documents.append(document)
-#    except Exception as error:
-#        print(error)
    try:
        chart = dict()
        chart['first'] = parse(corpus.children.first().metadata['publication_date']).strftime("%Y, %m, %d")
+        # TODO write with sqlalchemy
+        #chart['first'] = parse(session.query(Node.metadata['publication_date']).filter(Node.parent_id==corpus.id, Node.type_id==type_doc_id).first()).strftime("%Y, %m, %d")
        chart['last']  = parse(corpus.children.last().metadata['publication_date']).strftime("%Y, %m, %d")
        print(chart)
    except Exception as error:
@@ -550,12 +326,12 @@ def subcorpus(request, project_id, corpus_id, start , end ):
    user = request.user
    date = datetime.datetime.now()
-    project = Node.objects.get(id=project_id)
+    project = session.query(Node).filter(Node.id==project_id).first()
-    corpus = Node.objects.get(id=corpus_id)
+    corpus  = session.query(Node).filter(Node.id==corpus_id).first()
-    type_document = NodeType.objects.get(name="Document")
+    type_document_id = cache.NodeType['Document'].id
    # retrieving all the documents
    # documents  = corpus.children.all()
-    documents  = corpus.__class__.objects.filter(parent_id=corpus_id , type = type_document )
+    documents  = session.query(Node).filter(Node.parent_id==corpus_id , Node.type_id == type_document_id ).all()
    number = len(documents)
    filtered_docs = []
@@ -667,25 +443,82 @@ def subcorpusJSON(request, project_id, corpus_id, start , end ):
    return HttpResponse( serializer.data , content_type='application/json')
+def empty_trash():
+    nodes = models.Node.objects.filter(type_id=cache.NodeType['Trash'].id).all()
+    with transaction.atomic():
+        for node in nodes:
+            try:
+                node.children.delete()
+            except Exception as error:
+                print(error)
-def delete_project(request, node_id):
+            node.delete()
-    Node.objects.filter(id=node_id).all().delete()
-    return HttpResponseRedirect('/projects/')
+def move_to_trash(node_id):
+    try:
+        node = session.query(Node).filter(Node.id == node_id).first()
+        previous_type_id = node.type_id
+        node.type_id = cache.NodeType['Trash'].id
+        session.add(node)
+        session.commit()
+        return(previous_type_id)
+    except Exception as error:
+        print("can not move to trash Node" + node_id + ":" + error)
+def delete_node(request, node_id):
+    # do we have a valid user?
+    user = request.user
+    node = session.query(Node).filter(Node.id == node_id).first()
+    if not user.is_authenticated():
+        return redirect('/login/?next=%s' % request.path)
+    if node.user_id != user.id:
+        return HttpResponseForbidden()
+    previous_type_id = move_to_trash(node_id)
+    if previous_type_id == cache.NodeType['Corpus'].id:
+        return HttpResponseRedirect('/project/' + str(node.parent_id))
+    else:
+        return HttpResponseRedirect('/projects/')
+    if settings.DEBUG == True:
+        empty_trash()
-def delete_corpus(request, project_id, corpus_id):
-    Node.objects.filter(id=corpus_id).all().delete()
-    return HttpResponseRedirect('/project/' + project_id)
+def delete_corpus(request, project_id, node_id):
+    # ORM Django
+    with transaction.atomic():
+        node = models.Node.objects.get(id=node_id)
+        try:
+            node.children.delete()
+        except Exception as error:
+            print(error)
+        node.delete()
+    # SQLA Django
+#    node = session.query(Node).filter(Node.id == node_id).first()
+#    session.delete(node)
+#    session.commit()
+#    session.flush()
+    return HttpResponseRedirect('/project/' + project_id)
 def chart(request, project_id, corpus_id):
    ''' Charts to compare, filter, count'''
-    if MAINTENANCE: return HttpResponseRedirect('/maintenance/')
    t = get_template('chart.html')
    user = request.user
    date = datetime.datetime.now()
-    project = Node.objects.get(id=project_id)
+    project = session.query(Node).filter(Node.id==project_id).first()
-    corpus  = Node.objects.get(id=corpus_id)
+    corpus  = session.query(Node).filter(Node.id==corpus_id).first()
    html = t.render(Context({
        'user'      : user,
@@ -696,13 +529,12 @@ def chart(request, project_id, corpus_id):
    return HttpResponse(html)
 def matrix(request, project_id, corpus_id):
-    if MAINTENANCE: return HttpResponseRedirect('/maintenance/')
    t = get_template('matrix.html')
    user = request.user
    date = datetime.datetime.now()
-    project = Node.objects.get(id=project_id)
+    project = session.query(Node).filter(Node.id==project_id).first()
-    corpus = Node.objects.get(id=corpus_id)
+    corpus =  session.query(Node).filter(Node.id==corpus_id).first()
    html = t.render(Context({\
            'user'      : user,\
@@ -714,29 +546,24 @@ def matrix(request, project_id, corpus_id):
    return HttpResponse(html)
 def graph(request, project_id, corpus_id):
-    if MAINTENANCE: return HttpResponseRedirect('/maintenance/')
    t = get_template('explorer.html')
    user = request.user
    date = datetime.datetime.now()
-    project = Node.objects.get(id=project_id)
+    project = session.query(Node).filter(Node.id==project_id).first()
-    corpus = Node.objects.get(id=corpus_id)
+    corpus  = session.query(Node).filter(Node.id==corpus_id).first()
    html = t.render(Context({\
            'user'      : user,\
            'date'      : date,\
            'corpus'    : corpus,\
            'project'   : project,\
+            'graphfile' : "hola_mundo",\
            }))
    return HttpResponse(html)
 def exploration(request):
-    if MAINTENANCE: return HttpResponseRedirect('/maintenance/')
    t = get_template('exploration.html')
    user = request.user
    date = datetime.datetime.now()
@@ -749,7 +576,6 @@ def exploration(request):
    return HttpResponse(html)
 def explorer_chart(request):
-    if MAINTENANCE: return HttpResponseRedirect('/maintenance/')
    t = get_template('chart.html')
    user = request.user
    date = datetime.datetime.now()
@@ -773,9 +599,9 @@ def corpus_csv(request, project_id, corpus_id):
    writer = csv.writer(response)
-    corpus = Node.objects.get(id=corpus_id)
+    corpus_id = session.query(Node.id).filter(Node.id==corpus_id).first()
-    type_document = NodeType.objects.get(name="Document")
+    type_document_id = cache.NodeType['Document'].id
-    documents = Node.objects.filter(parent=corpus, type=type_document)
+    documents = session.query(Node).filter(Node.parent_id==corpus_id, Node.type_id==type_document_id).all()
    keys = list(documents[0].metadata.keys())
    writer.writerow(keys)
@@ -792,8 +618,6 @@ def corpus_csv(request, project_id, corpus_id):
    return response
 def send_csv(request, corpus_id):
    '''
    Create the HttpResponse object with the appropriate CSV header.
@@ -806,9 +630,9 @@ def send_csv(request, corpus_id):
    cursor.execute("""
    SELECT
-        metadata -> 'publication_year' as year,
+        metadata ->> 'publication_year' as year,
-        metadata -> 'publication_month' as month,
+        metadata ->> 'publication_month' as month,
-        metadata -> 'publication_day' as day,
+        metadata ->> 'publication_day' as day,
        COUNT(*)
    FROM
        node_node AS n
@@ -834,23 +658,26 @@ def send_csv(request, corpus_id):
    return response
 # To get the data
 from gargantext_web.api import JsonHttpResponse
 from analysis.functions import get_cooc
 import json
 def node_link(request, corpus_id):
    '''
    Create the HttpResponse object with the node_link dataset.
-    '''
+    '''   
-    import time
-    print("In node_link() START")
+    data = []
-    start = time.time()
-    data = get_cooc(request=request, corpus_id=corpus_id, type="node_link")
+    corpus = session.query(Node).filter(Node.id==corpus_id).first()
-    end = time.time()
+    filename = settings.MEDIA_ROOT + '/corpora/%s/%s_%s.json' % (request.user , corpus.parent_id, corpus_id)
-    print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" get_cooc() [s]",(end - start))
+    print("file exists?:",os.path.isfile(filename))
-    print("In node_link() END")
+    if os.path.isfile(filename):
+        json_data = open(filename,"r")
+        data = json.load(json_data)
+        json_data.close()
+    else:
+        data = get_cooc(request=request, corpus_id=corpus_id, type="node_link")
    return JsonHttpResponse(data)
 def adjacency(request, corpus_id):
@@ -926,6 +753,35 @@ def nodeinfo(request , node_id):
    return HttpResponse(html)
+def tfidf2(request, corpus_id, ngram_id):
+    """
+    Takes IDs of corpus and ngram and returns list of relevent documents in json format
+    according to TFIDF score (order is decreasing).
+    """
+    #it will receive something like:  api/tfidf/corpus_id/NGRAM1aNGRAM2aNGRAM3aNGRAM4...
+    docsids = ngram_id.split("a")
+    tfidf_list = []
+    for i in docsids:
+        pub = Node.objects.get(id=i)
+        finalpub = {}
+        finalpub["id"] = pub.id
+        pubmetadata = pub.metadata
+        if "title" in pubmetadata: finalpub["title"] = pubmetadata['title']
+        if "publication_date" in pubmetadata: finalpub["publication_date"] = pubmetadata['publication_date']
+        if "journal" in pubmetadata: finalpub["journal"] = pubmetadata['journal']
+        if "authors" in pubmetadata: finalpub["authors"] = pubmetadata['authors']
+        if "fields" in pubmetadata: finalpub["fields"] = pubmetadata['fields']
+        tfidf_list.append(finalpub) # doing a dictionary with only available atributes
+        if len(tfidf_list)==6: break # max 6 papers
+    data = json.dumps(tfidf_list) 
+    # data = ["hola","mundo"]
+    return JsonHttpResponse(data)
 def tfidf(request, corpus_id, ngram_id):
    """
    Takes IDs of corpus and ngram and returns list of relevent documents in json format

--- a/gargantext_web/views_optimized.py
+++ b/gargantext_web/views_optimized.py
+from django.shortcuts import redirect
+from django.shortcuts import render
+from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseForbidden
+from sqlalchemy import func, and_, or_
+from sqlalchemy.orm import aliased
+from collections import defaultdict
+from datetime import datetime
+from threading import Thread
+from node.admin import CustomForm
+from gargantext_web.db import *
+from gargantext_web.settings import DEBUG, MEDIA_ROOT
+from gargantext_web.api import JsonHttpResponse
+import json
+from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf
+def project(request, project_id):
+    # SQLAlchemy session
+    session = Session()
+    # do we have a valid project id?
+    try:
+        project_id = int(project_id)
+    except ValueError:
+        raise Http404()
+    # do we have a valid project?
+    project = (session
+        .query(Node)
+        .filter(Node.id == project_id)
+        .filter(Node.type_id == cache.NodeType['Project'].id)
+    ).first()
+    if project is None:
+        raise Http404()
+    # do we have a valid user?
+    user = request.user
+    if not user.is_authenticated():
+        return redirect('/login/?next=%s' % request.path)
+    if project.user_id != user.id:
+        return HttpResponseForbidden()
+    # Let's find out about the children nodes of the project
+    ChildrenNode = aliased(Node)
+    # This query is giving you the wrong number of docs from the pubmedquerier (x 5)
+    #  ... sqlalchemy.func by Resource.type_id is the guilty
+    # ISSUE L51
+    corpus_query = (session
+        .query(Node.id, Node.name, func.count(ChildrenNode.id))
+        #.query(Node.id, Node.name, Resource.type_id, func.count(ChildrenNode.id))
+        #.join(Node_Resource, Node_Resource.node_id == Node.id)
+        #.join(Resource, Resource.id == Node_Resource.resource_id)
+        .filter(Node.parent_id == project.id)
+        .filter(Node.type_id == cache.NodeType['Corpus'].id)
+        .filter(and_(ChildrenNode.parent_id  == Node.id, ChildrenNode.type_id  == cache.NodeType['Document'].id))
+        .group_by(Node.id, Node.name)
+        .order_by(Node.name)
+        .all()
+    )
+    corpora_by_resourcetype = defaultdict(list)
+    documents_count_by_resourcetype = defaultdict(int)
+    corpora_count = 0
+    corpusID_dict = {}
+    for corpus_id, corpus_name, document_count in corpus_query:
+        # Not optimized GOTO ISSUE L51
+        resource_type_id = (session.query(Resource.type_id)
+                                   .join(Node_Resource, Node_Resource.resource_id == Resource.id)
+                                   .join(Node, Node.id == Node_Resource.node_id )
+                                   .filter(Node.id==corpus_id)
+                                   .first())[0]
+        if not corpus_id in corpusID_dict:
+            if resource_type_id is None:
+                resourcetype_name = '(no resource)'
+            else:
+                resourcetype = cache.ResourceType[resource_type_id]
+                resourcetype_name = resourcetype.name
+            corpora_by_resourcetype[resourcetype_name].append({
+                'id': corpus_id,
+                'name': corpus_name,
+                'count': document_count,
+            })
+            documents_count_by_resourcetype[resourcetype_name] += document_count
+            corpora_count += 1
+            corpusID_dict[corpus_id]=True
+    # do the donut
+    total_documents_count = sum(documents_count_by_resourcetype.values())
+    donut = [
+        {   'source': key, 
+            'count': value,
+            'part' : round(value * 100 / total_documents_count) if total_documents_count else 0,
+        }
+        for key, value in documents_count_by_resourcetype.items()
+    ]
+    # deal with the form
+    if request.method == 'POST':
+        # form validation
+        form = CustomForm(request.POST, request.FILES)
+        if form.is_valid():
+            # extract information from the form
+            name = form.cleaned_data['name']
+            thefile = form.cleaned_data['file']
+            resourcetype = cache.ResourceType[form.cleaned_data['type']]
+            # which default language shall be used?
+            if resourcetype.name == "europress_french":
+                language_id = cache.Language['fr'].id
+            elif resourcetype.name == "europress_english":
+                language_id = cache.Language['en'].id
+            else:
+                language_id = None
+            # corpus node instanciation as a Django model
+            corpus = Node(
+                name = name,
+                user_id = request.user.id,
+                parent_id = project_id,
+                type_id = cache.NodeType['Corpus'].id,
+                language_id = language_id,
+            )
+            session.add(corpus)
+            session.commit()
+            # save the uploaded file
+            filepath = '%s/corpora/%s/%s' % (MEDIA_ROOT, request.user.username, thefile._name)
+            f = open(filepath, 'wb')
+            f.write(thefile.read())
+            f.close()
+            # add the uploaded resource to the corpus
+            add_resource(corpus,
+                user_id = request.user.id,
+                type_id = resourcetype.id,
+                file = filepath,
+            )
+            # let's start the workflow
+            try:
+                def apply_workflow(corpus):
+                    parse_resources(corpus)
+                    extract_ngrams(corpus, ['title'])
+                    compute_tfidf(corpus)
+                if DEBUG:
+                    apply_workflow(corpus)
+                else:
+                    thread = Thread(target=apply_workflow, args=(corpus, ), daemon=True)
+                    thread.start()
+            except Exception as error:
+                print('WORKFLOW ERROR')
+                print(error)
+            # redirect to the main project page
+            return HttpResponseRedirect('/project/' + str(project_id))
+        else:
+            print('ERROR: BAD FORM')
+    else:
+        form = CustomForm()
+    # HTML output
+    return render(request, 'project.html', {
+        'form'          : form,
+        'user'          : user,
+        'date'          : datetime.now(),
+        'project'       : project,
+        'donut'         : donut,
+        'list_corpora'  : dict(corpora_by_resourcetype),
+        'whitelists'    : '',
+        'blacklists'    : '',
+        'cooclists'     : '',
+        'number'        : corpora_count,
+    })
+def tfidf(request, corpus_id, ngram_ids):
+    """Takes IDs of corpus and ngram and returns list of relevent documents in json format
+    according to TFIDF score (order is decreasing).
+    """
+    limit=6
+    nodes_list = []
+    # filter input
+    ngram_ids = ngram_ids.split('a')
+    ngram_ids = [int(i) for i in ngram_ids]
+    # request data
+    nodes_query = (session
+        .query(Node, func.sum(NodeNodeNgram.score))
+        .join(NodeNodeNgram, NodeNodeNgram.nodey_id == Node.id)
+        .filter(NodeNodeNgram.nodex_id == corpus_id)
+        .filter(NodeNodeNgram.ngram_id.in_(ngram_ids))
+        .group_by(Node)
+        .order_by(func.sum(NodeNodeNgram.score).desc())
+        .limit(limit)
+    )
+    # convert query result to a list of dicts
+    for node, score in nodes_query:
+        node_dict = {
+            'id': node.id,
+            'score': score,
+        }
+        for key in ('title', 'publication_date', 'journal', 'authors', 'fields'):
+            if key in node.metadata:
+                node_dict[key] = node.metadata[key]
+        nodes_list.append(node_dict)
+    data = json.dumps(nodes_list) 
+    return JsonHttpResponse(data)
--- a/init/graph.pdf
+++ b/init/graph.pdf
--- a/init/hstore2jsonb.sql
+++ b/init/hstore2jsonb.sql
+ALTER TABLE ONLY node_node ALTER COLUMN date SET DEFAULT CURRENT_DATE ;
+ALTER TABLE ONLY node_node ALTER COLUMN metadata DROP NOT NULL ;
+ALTER TABLE ONLY node_node ALTER COLUMN metadata DROP DEFAULT ;
+ALTER TABLE ONLY node_node ALTER COLUMN metadata TYPE JSONB USING hstore_to_json(metadata)::jsonb ;
+ALTER TABLE ONLY node_node ALTER COLUMN metadata SET DEFAULT '{}'::jsonb ;
+ALTER TABLE ONLY node_node ALTER COLUMN metadata SET NOT NULL ;
--- a/init/init.py
+++ b/init/init.py
@@ -7,7 +7,7 @@
 #NodeType.objects.all().delete()
-from node.models import Node, NodeType, Project, Corpus, Document, Ngram, Node_Ngram, User, Language, ResourceType
+from node.models import *
 import pycountry
@@ -31,14 +31,8 @@ except:
    me = User(username='pksm3')
    me.save()
+for node_type in ['Trash', 'Root', ]:
-try:
+    NodeType.objects.get_or_create(name=node_type)
-    typeProject = NodeType.objects.get(name='Root')
-except Exception as error:
-    print(error)
-    typeProject = NodeType(name='Root')
-    typeProject.save()  
 try:
    typeProject = NodeType.objects.get(name='Project')
@@ -141,13 +135,7 @@ except Exception as error:
 #Node.objects.all().delete()
-# In[9]:
-try:
-    project = Node.objects.get(name='Bees project')
-except:
-    project = Node(name='Bees project', type=typeProject, user=me)
-    project.save()
 try:
    stem = Node.objects.get(name='Stem')
@@ -158,3 +146,17 @@ except:
+from gargantext_web.db import *
+# Instantiante table NgramTag:
+f = open("part_of_speech_labels.txt", 'r')
+for line in f.readlines():
+    name, description = line.strip().split('\t')
+    _tag = Tag(name=name, description=description)
+    session.add(_tag)
+session.commit()
+f.close()
--- a/init/init.sh
+++ b/init/init.sh
@@ -4,6 +4,11 @@ psql -d gargandb -f init.sql
 sleep 2
 ../manage.py syncdb
+psql -d gargandb -f init2.sql
 sleep 2
-../manage.py shell < init.py
+#../manage.py shell < init.py
+../manage.py shell < init_gargantext.py
+#psql -d gargandb -f hstore2jsonb.sql
--- a/init/init_gargantext.py
+++ b/init/init_gargantext.py
+# Without this, we couldn't use the Django environment
+import os
+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
+os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")
+# We're gonna use all the models!
+# Django models
+from node import models
+# SQLA models
+from gargantext_web.db import *
+# Reset: all data
+#
+#tables_to_empty = [
+#    Node,
+#    Node_Metadata,
+#    Metadata,
+#    NodeType,
+#    ResourceType,
+#    Resource,
+#]
+#for table in tables_to_empty:
+#    print('Empty table "%s"...' % (table._meta.db_table, ))
+#    table.objects.all().delete()
+# Integration: metadata types
+print('Initialize metadata...')
+metadata = {
+    'publication_date': 'datetime',
+    'authors': 'string',
+    'language_fullname': 'string',
+    'abstract': 'text',
+    'title': 'string',
+    'source': 'string',
+    'volume': 'string',
+    'text': 'text',
+    'page': 'string',
+    'doi': 'string',
+    'journal': 'string',
+}
+for name, type in metadata.items():
+    models.Metadata(name=name, type=type).save()
+# Integration: languages
+print('Initialize languages...')
+import pycountry
+Language.objects.all().delete()
+for language in pycountry.languages:
+    if 'alpha2' in language.__dict__:
+        Language(
+            iso2 = language.alpha2,
+            iso3 = language.bibliographic,
+            fullname = language.name,
+            implemented = 1 if language.alpha2 in ['en', 'fr'] else 0,
+        ).save()
+english = Language.objects.get(iso2='en')
+french  = Language.objects.get(iso2='fr')
+# Integration: users
+print('Initialize users...')
+me = models.User.objects.get_or_create(username='alexandre')
+gargantua = models.User.objects.get_or_create(username='gargantua')
+node_root = Node(user_id=gargantua.id, type_id=cache.NodeType['Root'].id, name='Root')
+node_stem = Node(user_id=gargantua.id, type_id=cache.NodeType['Stem'].id, name='Stem', parent_id=node_root.id)
+node_lem = Node(user_id=gargantua.id, type_id=cache.NodeType['Lem'].id, name='Lem', parent_id=node_root.id)
+session.add(node_root)
+session.add(node_stem)
+session.add(node_lem)
+session.commit()
+# Integration: node types
+print('Initialize node types...')
+node_types = [
+        'Root', 'Trash',
+        'Project', 'Corpus', 'Document', 
+        'Stem', 'Lem', 'Tfidf', 
+        'Synonym', 
+        'MiamList', 'StopList',
+        'Cooccurrence', 'WhiteList', 'BlackList'
+        ]
+for node_type in node_types:
+    models.NodeType.objects.get_or_create(name=node_type)
+# Integration: resource types
+print('Initialize resource...')
+resources = [
+        'pubmed', 'isi', 'ris', 'europress_french', 'europress_english']
+for resource in resources:
+    models.ResourceType.objects.get_or_create(name=resource)
+# TODO 
+# here some tests
+# add a new project and some corpora to test it
+# Integration: project
+#
+#print('Initialize project...')
+#try:
+#    project = Node.objects.get(name='Bees project')
+#except:
+#    project = Node(name='Bees project', type=typeProject, user=me)
+#    project.save()
+#
+# Integration: corpus
+#print('Initialize corpus...')
+#try:
+#    corpus_pubmed = Node.objects.get(name='PubMed corpus')
+#except:
+#    corpus_pubmed = Node(parent=project, name='PubMed corpus', type=typeCorpus, user=me)
+#    corpus_pubmed.save()
+#
+#print('Initialize resource...')
+#corpus_pubmed.add_resource(
+#    # file='./data_samples/pubmed.zip',
+#    #file='./data_samples/pubmed_2013-04-01_HoneyBeesBeeBees.xml',
+#    file='/srv/gargantext_lib/data_samples/pubmed.xml',
+#    type=typePubmed,
+#    user=me
+#)
+#
+#for resource in corpus_pubmed.get_resources():
+#    print('Resource #%d - %s - %s' % (resource.id, resource.digest, resource.file))
+#    
+## print('Parse corpus #%d...' % (corpus_pubmed.id, ))
+# corpus_pubmed.parse_resources(verbose=True)
+# print('Extract corpus #%d...' % (corpus_pubmed.id, ))
+# corpus_pubmed.children.all().extract_ngrams(['title',])
+# print('Parsed corpus #%d.' % (corpus_pubmed.id, ))
+# Instantiante table NgramTag:
+f = open("part_of_speech_labels.txt", 'r')
+for line in f.readlines():
+    name, description = line.strip().split('\t')
+    _tag = Tag(name=name, description=description)
+    session.add(_tag)
+session.commit()
+f.close()
+exit()
--- a/init/part_of_speech_labels.txt
+++ b/init/part_of_speech_labels.txt
+CC	Coordinating conjunction
+CD	Cardinal number
+DT	Determiner
+EX	Existential there
+FW	Foreign word
+IN	Preposition or subordinating conjunction
+JJ	Adjective
+JJR	Adjective, comparative
+JJS	Adjective, superlative
+LS	List item marker
+MD	Modal
+NN	Noun, singular or mass
+NNS	Noun, plural
+NNP	Proper noun, singular
+NNPS	Proper noun, plural
+PDT	Predeterminer
+POS	Possessive ending
+PRP	Personal pronoun
+PRP$	Possessive pronoun
+RB	Adverb
+RBR	Adverb, comparative
+RBS	Adverb, superlative
+RP	Particle
+SYM	Symbol
+TO	to
+UH	Interjection
+VB	Verb, base form
+VBD	Verb, past tense
+VBG	Verb, gerund or present participle
+VBN	Verb, past participle
+VBP	Verb, non3rd person singular present
+VBZ	Verb, 3rd person singular present
+WDT	Whdeterminer
+WP	Whpronoun
+WP$	Possessive whpronoun
+WRB	Whadverb
+NGRA	Ngram
--- a/init/requirements.txt
+++ b/init/requirements.txt
 Cython==0.20.2
-Django==1.6.6
+Django==1.6.11
 Jinja2==2.7.3
 MarkupSafe==0.23
 Pillow==2.5.3
 Pygments==1.6
+RandomWords==0.1.12
 SQLAlchemy==0.9.8
 South==1.0
 aldjemy==0.3.10
 amqp==1.4.6
 anyjson==0.3.3
+bibtexparser==0.6.0
 billiard==3.3.0.18
 celery==3.1.15
 certifi==14.05.14
@@ -23,15 +25,20 @@ django-cte-trees==0.9.2
 django-extensions==1.4.0
 django-grappelli==2.5.3
 django-hstore==1.3.1
+django-maintenance==0.1
 django-mptt==0.6.1
 django-nested-inlines==0.1
+django-pgfields==1.4.4
+django-pgjson==0.2.2
+django-pgjsonb==0.0.10
 django-treebeard==2.0
 djangorestframework==3.0.0
+gensim==0.10.3
 graphviz==0.4
 ipython==2.2.0
 kombu==3.0.23
-lxml==3.3.6
+lxml==3.4.1
-matplotlib==1.4.0
+#matplotlib==1.4.0
 networkx==1.9
 #nltk==3.0a4
 nose==1.3.4
@@ -44,13 +51,16 @@ pycparser==2.10
 pydot2==1.0.33
 pyparsing==2.0.2
 python-dateutil==2.2
+python-igraph==0.7
 pytz==2014.7
 pyzmq==14.3.1
 readline==6.2.4.1
 redis==2.10.3
 scikit-learn==0.15.1
 scipy==0.14.0
+simplerandom==0.12.1
 six==1.7.3
 sympy==0.7.5
 tornado==4.0.1
 uWSGI==2.0.7
+ujson==1.33
--- a/init_cooc.py
+++ b/init_cooc.py
-# Without this, we couldn't use the Django environment
-import os
-os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
-os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")
-# We're gonna use all the models!
-from node.models import User, NodeType, Node
-user = User.objects.get(username = 'contro2015.lait')
-# Reset: all data
-try:
-    typeDoc     = NodeType.objects.get(name='Cooccurrence')
-except Exception as error:
-    print(error)
-Node.objects.filter(user=user, type=typeDoc).all().delete()
-exit()
--- a/init_gargantext.py
+++ b/init_gargantext.py
-# Without this, we couldn't use the Django environment
-import os
-os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
-os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")
-# We're gonna use all the models!
-from node.models import *
-# Reset: all data
-tables_to_empty = [
-    Node,
-    Node_Metadata,
-    Metadata,
-    NodeType,
-    ResourceType,
-    Resource,
-]
-for table in tables_to_empty:
-    print('Empty table "%s"...' % (table._meta.db_table, ))
-    table.objects.all().delete()
-# Integration: metadata types
-print('Initialize metadata...')
-metadata = {
-    'publication_date': 'datetime',
-    'authors': 'string',
-    'language_fullname': 'string',
-    'abstract': 'text',
-    'title': 'string',
-    'source': 'string',
-    'volume': 'string',
-    'text': 'text',
-    'page': 'string',
-    'doi': 'string',
-    'journal': 'string',
-}
-for name, type in metadata.items():
-    Metadata(name=name, type=type).save()
-# Integration: languages
-print('Initialize languages...')
-import pycountry
-Language.objects.all().delete()
-for language in pycountry.languages:
-    if 'alpha2' in language.__dict__:
-        Language(
-            iso2 = language.alpha2,
-            iso3 = language.bibliographic,
-            fullname = language.name,
-            implemented = 1 if language.alpha2 in ['en', 'fr'] else 0,
-        ).save()
-english = Language.objects.get(iso2='en')
-french  = Language.objects.get(iso2='fr')
-# Integration: users
-print('Initialize users...')
-try:
-    me = User.objects.get(username='alexandre')
-except:
-    me = User(username='alexandre')
-    me.save()
-# Integration: node types
-print('Initialize node types...')
-try:
-    typeProject = NodeType.objects.get(name='Root')
-except Exception as error:
-    print(error)
-    typeProject = NodeType(name='Root')
-    typeProject.save()  
-try:
-    typeProject = NodeType.objects.get(name='Project')
-except Exception as error:
-    print(error)
-    typeProject = NodeType(name='Project')
-    typeProject.save()  
-try:
-    typeCorpus  = NodeType.objects.get(name='Corpus')
-except Exception as error:
-    print(error)
-    typeCorpus  = NodeType(name='Corpus')
-    typeCorpus.save()
-try:
-    typeDoc     = NodeType.objects.get(name='Document')
-except Exception as error:
-    print(error)
-    typeDoc     = NodeType(name='Document')
-    typeDoc.save()
-try:
-    typeStem     = NodeType.objects.get(name='Stem')
-except Exception as error:
-    print(error)
-    typeStem     = NodeType(name='Stem')
-    typeStem.save()
-try:
-    typeTfidf     = NodeType.objects.get(name='Tfidf')
-except Exception as error:
-    print(error)
-    typeTfidf     = NodeType(name='Tfidf')
-    typeTfidf.save()
-try:
-    typeDoc     = NodeType.objects.get(name='WhiteList')
-except Exception as error:
-    print(error)
-    typeDoc     = NodeType(name='WhiteList')
-    typeDoc.save()
-try:
-    typeDoc     = NodeType.objects.get(name='BlackList')
-except Exception as error:
-    print(error)
-    typeDoc     = NodeType(name='BlackList')
-    typeDoc.save()
-try:
-    typeDoc     = NodeType.objects.get(name='Synonyme')
-except Exception as error:
-    print(error)
-    typeDoc     = NodeType(name='Synonyme')
-    typeDoc.save()
-try:
-    typeDoc     = NodeType.objects.get(name='Cooccurrence')
-except Exception as error:
-    print(error)
-    typeDoc     = NodeType(name='Cooccurrence')
-    typeDoc.save()
-# Integration: resource types
-print('Initialize resource...')
-try:
-    typePubmed      = ResourceType.objects.get(name='pubmed')
-    typeIsi         = ResourceType.objects.get(name='isi')
-    typeRis         = ResourceType.objects.get(name='ris')
-    typePresseFr    = ResourceType.objects.get(name='europress_french')
-    typePresseEn    = ResourceType.objects.get(name='europress_english')
-except Exception as error:
-    print(error)
-    typePubmed = ResourceType(name='pubmed')
-    typePubmed.save()  
-    typeIsi    = ResourceType(name='isi')
-    typeIsi.save()
-    typeRis    = ResourceType(name='ris')
-    typeRis.save()
-    typePresseFr = ResourceType(name='europress_french')
-    typePresseFr.save()
-    typePresseEn = ResourceType(name='europress_english')
-    typePresseEn.save()
-# Integration Node Stem
-try:
-    stem = Node.objects.get(name='Stem')
-except:
-    stem = Node(name='Stem', type=typeStem, user=me)
-    stem.save()
-# Integration: project
-print('Initialize project...')
-try:
-    project = Node.objects.get(name='Bees project')
-except:
-    project = Node(name='Bees project', type=typeProject, user=me)
-    project.save()
-# Integration: corpus
-print('Initialize corpus...')
-try:
-    corpus_pubmed = Node.objects.get(name='PubMed corpus')
-except:
-    corpus_pubmed = Node(parent=project, name='PubMed corpus', type=typeCorpus, user=me)
-    corpus_pubmed.save()
-print('Initialize resource...')
-corpus_pubmed.add_resource(
-    # file='./data_samples/pubmed.zip',
-    #file='./data_samples/pubmed_2013-04-01_HoneyBeesBeeBees.xml',
-    file='/srv/gargantext_lib/data_samples/pubmed.xml',
-    type=typePubmed,
-    user=me
-)
-for resource in corpus_pubmed.get_resources():
-    print('Resource #%d - %s - %s' % (resource.id, resource.digest, resource.file))
-# print('Parse corpus #%d...' % (corpus_pubmed.id, ))
-# corpus_pubmed.parse_resources(verbose=True)
-# print('Extract corpus #%d...' % (corpus_pubmed.id, ))
-# corpus_pubmed.children.all().extract_ngrams(['title',])
-# print('Parsed corpus #%d.' % (corpus_pubmed.id, ))
-exit()
--- a/node/admin.py
+++ b/node/admin.py
@@ -117,23 +117,16 @@ class CustomForm(forms.Form):
    """
    def clean_file(self):
        file_ = self.cleaned_data.get('file')
-        from datetime import datetime
-        file_.name = str(datetime.now().microsecond)
        # #Filename length
        # if len(file_.name)>30:
        #     from datetime import datetime
        #     file_.name = str(datetime.now().microsecond)
        #     # raise forms.ValidationError(_('Come on dude, name too long. Now is:'+file_.name))
-        # #File size
+        #File size
-        # if len(file_)>104857600:
+        if len(file_)>1024 ** 3:
-        #     raise forms.ValidationError(_('File to heavy! (<100MB).'))
+            raise forms.ValidationError(_('File too heavy! (>1GB).'))
-        ## File type:
-        # if file_.content_type == "application/zip":
-        #     raise forms.ValidationError(_('We need a zip pls.'))
        return file_
 class CorpusForm(ModelForm):
    #parent = ModelChoiceField(EmptyQuerySet)
    def __init__(self, *args, **kwargs):
@@ -155,14 +148,14 @@ class CorpusAdmin(NodeAdmin):
 ######################################################################
-class DocumentForm(ModelForm):
+#class DocumentForm(ModelForm):
-    parent = ModelChoiceField(Node.objects.filter(user_id=1, type_id=3))
+#    parent = ModelChoiceField(Node.objects.filter(user_id=1, type_id=3))
-class DocumentAdmin(NodeAdmin):
-    _parent_nodetype_name = 'Corpus'
-    _nodetype_name = 'Document'
-    form = DocumentForm
+#class DocumentAdmin(NodeAdmin):
+#    _parent_nodetype_name = 'Corpus'
+#    _nodetype_name = 'Document'
+#    form = DocumentForm
+#
 class LanguageAdmin(admin.ModelAdmin):
    def get_queryset(self, request):
@@ -178,7 +171,7 @@ admin.site.register(Language, LanguageAdmin)
 admin.site.register(NodeType)
 admin.site.register(Project, ProjectAdmin)
 admin.site.register(Corpus, CorpusAdmin)
-admin.site.register(Document, DocumentAdmin)
+admin.site.register(Document)#, DocumentAdmin)
 admin.site.register(Node_Resource)

--- a/node/models.py
+++ b/node/models.py
-from django.db import models
+from django_pg import models
 from django.utils import timezone
 from django.contrib.auth.models import User
-from django_hstore import hstore
+from django_pgjson.fields import JsonBField
 from cte_tree.models import CTENode, CTENodeManager
 # from cte_tree.query import CTEQuerySet
 #from cte_tree.fields import DepthField, PathField, OrderingField
@@ -14,7 +15,9 @@ from parsing.FileParsers import *
 from time import time
 import datetime
 from multiprocessing import Process
+from math import log
+import collections
 from collections import defaultdict
 import hashlib
@@ -23,6 +26,9 @@ from gargantext_web.settings import MEDIA_ROOT
 from celery.contrib.methods import task_method
 from celery import current_app
+import os
+import subprocess
 # Some usefull functions
 # TODO: start the function name with an underscore (private)
@@ -35,7 +41,7 @@ class Language(models.Model):
    iso2        = models.CharField(max_length=2, unique=True)
    iso3        = models.CharField(max_length=3, unique=True)
    fullname    = models.CharField(max_length=255, unique=True)
-    implemented = models.BooleanField(blank=True)
+    implemented = models.BooleanField(blank=True, default=True)
    def __str__(self):
        return self.fullname
@@ -46,15 +52,35 @@ class ResourceType(models.Model):
    def __str__(self):
        return self.name
+class Tag(models.Model):
+    name             = models.CharField(max_length=4, unique=True)
+    description      = models.CharField(max_length=255, unique=True)
+    def __str__(self):
+        return self.name
 class Ngram(models.Model):
-    language    = models.ForeignKey(Language, blank=True, null=True, on_delete=models.SET_NULL)
+    language    = models.ManyToManyField(blank=True, null=True, through='NgramLanguage', to='Language')
    n           = models.IntegerField()
    terms       = models.CharField(max_length=255, unique=True)
    nodes       = models.ManyToManyField(through='Node_Ngram', to='Node')
+    tag         = models.ManyToManyField(blank=True, null=True, through='NgramTag', to='Tag')
    def __str__(self):
        return self.terms
+class NgramTag(models.Model):
+    ngram   = models.ForeignKey(Ngram, on_delete=models.CASCADE)
+    tag     = models.ForeignKey(Tag)
+    def __str__(self):
+        return "%s: %s" % (self.ngram.terms, self.tag.name)
+class NgramLanguage(models.Model):
+    ngram        = models.ForeignKey(Ngram, on_delete=models.CASCADE)
+    language     = models.ForeignKey(Language)
+    def __str__(self):
+        return "%s: %s" % (self.ngram.terms, self.language.fullname)
 class Resource(models.Model):
    user        = models.ForeignKey(User)
@@ -66,7 +92,7 @@ class Resource(models.Model):
        return self.file
 class NodeType(models.Model):
-    name        = models.CharField(max_length=200, unique=True)
+    name        = models.CharField(max_length=255, unique=True)
    def __str__(self):
        return self.name
@@ -88,7 +114,7 @@ class NodeQuerySet(CTENodeManager.CTEQuerySet):
                if key in metadata_cache:
                    metadata = metadata_cache[key]
                    if metadata.type == 'string':
-                        value = value[:255]
+                        value = value[:200]
                    data.append(Node_Metadata(**{
                        'node_id' : node.id,
                        'metadata_id' : metadata.id,
@@ -107,7 +133,7 @@ class NodeManager(CTENodeManager):
        return getattr(self.get_queryset(), name, *args)
 class Metadata(models.Model):
-    name        = models.CharField(max_length=32, db_index=True)
+    name        = models.CharField(max_length=32, unique=True)
    type        = models.CharField(max_length=16, db_index=True)
 class Node(CTENode):
@@ -116,12 +142,12 @@ class Node(CTENode):
    user        = models.ForeignKey(User)
    type        = models.ForeignKey(NodeType)
-    name        = models.CharField(max_length=200)
+    name        = models.CharField(max_length=255)
    language    = models.ForeignKey(Language, blank=True, null=True, on_delete=models.SET_NULL)
    date        = models.DateField(default=timezone.now, blank=True)
-    metadata    = hstore.DictionaryField(blank=True)
+    metadata    = JsonBField(null=False, default={})
    ngrams      = models.ManyToManyField(through='Node_Ngram', to='Ngram')
@@ -221,21 +247,16 @@ class Node(CTENode):
        associations = defaultdict(float) # float or int?
        if isinstance(keys, dict):
            for key, weight in keys.items():
-                for ngram in extractor.extract_ngrams(self.metadata[key]):
+                text2process = str(self.metadata[key]).replace('[','').replace(']','')
+                for ngram in extractor.extract_ngrams(text2process):
                    terms = ' '.join([token for token, tag in ngram])
                    associations[ngram] += weight
        else:
            for key in keys:
-                for ngram in extractor.extract_ngrams(self.metadata[key]):
+                text2process = str(self.metadata[key]).replace('[','').replace(']','')
+                for ngram in extractor.extract_ngrams(text2process):
                    terms = ' '.join([token for token, tag in ngram])
                    associations[terms] += 1
-        import pprint
-        pprint.pprint(associations)
-        print(" - - - - - ")
-        #print(associations)
-        # insert the occurrences in the database
-        # print(associations.items())
        Node_Ngram.objects.bulk_create([
            Node_Ngram(
                node   = self,
@@ -281,150 +302,7 @@ class Node(CTENode):
        print("In workflow() END")
        self.metadata['Processing'] = 0
        self.save()
-    def parse_resources__MOV(self, verbose=False):
-        # parse all resources into a list of metadata
-        metadata_list = []
-        print("not parsed resources:")
-        print(self.node_resource.filter(parsed=False))
-        print("= = = = = = = = =  = =\n")
-        for node_resource in self.node_resource.filter(parsed=False):
-            resource = node_resource.resource
-            parser = defaultdict(lambda:FileParser.FileParser, {
-                'istext'    : ISText,
-                'pubmed'    : PubmedFileParser,
-                'isi'       : IsiFileParser,
-                'ris'       : RisFileParser,
-                'europress' : EuropressFileParser,
-                'europress_french'  : EuropressFileParser,
-                'europress_english' : EuropressFileParser,
-            })[resource.type.name]()
-            metadata_list += parser.parse(str(resource.file))
-        self.node_resource.update(parsed=True) #writing to DB
-        return metadata_list
-    def writeMetadata__MOV(self, metadata_list=None , verbose=False):
-        type_id = NodeType.objects.get(name='Document').id
-        user_id = self.user.id
-        langages_cache = LanguagesCache()
-        # # insert the new resources in the database!
-        for i, metadata_values in enumerate(metadata_list):
-            name = metadata_values.get('title', '')[:200]
-            language = langages_cache[metadata_values['language_iso2']] if 'language_iso2' in metadata_values else None,
-            if isinstance(language, tuple):
-                language = language[0]
-            Node(
-                user_id  = user_id,
-                type_id  = type_id,
-                name     = name,
-                parent   = self,
-                language_id = language.id if language else None,
-                metadata = metadata_values
-            ).save()
-            metadata_list[i]["thelang"] = language
-        # # make metadata filterable
-        self.children.all().make_metadata_filterable()
-        # # mark the resources as parsed for this node
-        self.node_resource.update(parsed=True)
-    def extract_ngrams__MOV(self, array , keys , ngramsextractorscache=None, ngramscaches=None):
-        if ngramsextractorscache is None:
-            ngramsextractorscache = NgramsExtractorsCache()
-        langages_cache = LanguagesCache()
-        if ngramscaches is None:
-            ngramscaches = NgramsCaches()
-        for metadata in array:
-            associations = defaultdict(float) # float or int?
-            language = langages_cache[metadata['language_iso2']] if 'language_iso2' in metadata else None,
-            if isinstance(language, tuple):
-                language = language[0]
-            metadata["thelang"] = language
-            extractor = ngramsextractorscache[language]
-            ngrams = ngramscaches[language]
-            # print("\t\t number of req keys:",len(keys)," AND isdict?:",isinstance(keys, dict))
-            if isinstance(keys, dict):
-                for key, weight in keys.items():
-                    if key in metadata:
-                        for ngram in extractor.extract_ngrams(metadata[key]):
-                            terms = ' '.join([token for token, tag in ngram])
-                            associations[ngram] += weight
-            else:
-                for key in keys:
-                    if key in metadata:
-                        # print("the_content:[[[[[[__",metadata[key],"__]]]]]]")
-                        for ngram in extractor.extract_ngrams(metadata[key]):
-                            terms = ' '.join([token for token, tag in ngram])
-                            associations[terms] += 1
-            if len(associations.items())>0:
-                Node_Ngram.objects.bulk_create([
-                    Node_Ngram(
-                        node   = self,
-                        ngram  = ngrams[ngram_text],
-                        weight = weight
-                    )
-                    for ngram_text, weight in associations.items()
-                ])
-                # for ngram_text, weight in associations.items():
-                #     print("ngram_text:",ngram_text,"  | weight:",weight, " | ngrams[ngram_text]:",ngrams[ngram_text])
-    def runInParallel(self, *fns):
-        proc = []
-        for fn in fns:
-            p = Process(target=fn)
-            p.start()
-            proc.append(p)
-        for p in proc:
-            p.join()
-    def workflow__MOV(self, keys=None, ngramsextractorscache=None, ngramscaches=None, verbose=False):
-        import time
-        total = 0
-        self.metadata['Processing'] = 1
-        self.save()
-        print("LOG::TIME: In workflow()    parse_resources__MOV()")
-        start = time.time()
-        theMetadata = self.parse_resources__MOV()
-        end = time.time()
-        total += (end - start)
-        print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" parse_resources()__MOV [s]",(end - start))
-        print("LOG::TIME: In workflow()    writeMetadata__MOV()")
-        start = time.time()
-        self.writeMetadata__MOV( metadata_list=theMetadata )
-        end = time.time()
-        total += (end - start)
-        print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" writeMetadata__MOV() [s]",(end - start))
-        print("LOG::TIME: In workflow()    extract_ngrams__MOV()")
-        start = time.time()
-        self.extract_ngrams__MOV(theMetadata , keys=['title','abstract',] )
-        end = time.time()
-        total += (end - start)
-        print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" extract_ngrams__MOV() [s]",(end - start))
-        # # this is not working
-        # self.runInParallel( self.writeMetadata__MOV( metadata_list=theMetadata ) , self.extract_ngrams__MOV(theMetadata , keys=['title','abstract',] ) )
-        start = time.time()
-        print("LOG::TIME: In workflow()    do_tfidf()")
-        from analysis.functions import do_tfidf
-        do_tfidf(self)
-        end = time.time()
-        total += (end - start)
-        print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" do_tfidf() [s]",(end - start))
-        # # print("LOG::TIME: In workflow()    / do_tfidf()")
-        print("LOG::TIME:_ "+datetime.datetime.now().isoformat()+"   In workflow() END")
-        self.metadata['Processing'] = 0
-        self.save()
 class Node_Metadata(models.Model):
    node        = models.ForeignKey(Node, on_delete=models.CASCADE)
@@ -437,7 +315,7 @@ class Node_Metadata(models.Model):
 class Node_Resource(models.Model):
    node     = models.ForeignKey(Node, related_name='node_resource', on_delete=models.CASCADE)
-    resource = models.ForeignKey(Resource)
+    resource = models.ForeignKey(Resource, on_delete=models.CASCADE)
    parsed   = models.BooleanField(default=False)
 class Node_Ngram(models.Model):
@@ -495,15 +373,11 @@ class NodeNodeNgram(models.Model):
    def __str__(self):
        return "%s: %s / %s = %s" % (self.nodex.name, self.nodey.name, self.ngram.terms, self.score)
-class NodeNodeNgram(models.Model):
-    nodex        = models.ForeignKey(Node, related_name="nodex", on_delete=models.CASCADE)
-    nodey        = models.ForeignKey(Node, related_name="nodey", on_delete=models.CASCADE)
-    ngram      = models.ForeignKey(Ngram, on_delete=models.CASCADE)
-    score       = models.FloatField(default=0)
+class NgramNgram(models.Model):
+    ngram      = models.ForeignKey(Ngram, related_name='ngram', on_delete=models.CASCADE)
+    token      = models.ForeignKey(Ngram, related_name='token', on_delete=models.CASCADE)
+    index      = models.IntegerField()
-    def __str__(self):
-        return "%s: %s / %s = %s" % (self.nodex.name, self.nodey.name, self.ngram.terms, self.score)
--- a/parsing/FileParsers/EuropressFileParser.py
+++ b/parsing/FileParsers/EuropressFileParser.py
@@ -41,18 +41,47 @@ class EuropressFileParser(FileParser):
            html = etree.fromstring(contents, html_parser)
            try:
+                format_europresse = 50
                html_articles = html.xpath('/html/body/table/tbody')
                if len(html_articles) < 1:
                    html_articles = html.xpath('/html/body/table')
+                    if len(html_articles) < 1:
+                        format_europresse = 1
+                        html_articles = html.xpath('//div[@id="docContain"]')
            except Exception as error:
                print(error)
+            if format_europresse == 50:
+                name_xpath = "./tr/td/span[@class = 'DocPublicationName']"
+                header_xpath = "//span[@class = 'DocHeader']"
+                title_xpath = "string(./tr/td/span[@class = 'TitreArticleVisu'])"
+                text_xpath  = "./tr/td/descendant-or-self::*[not(self::span[@class='DocHeader'])]/text()"
+            elif format_europresse == 1:
+                name_xpath = "//span[@class = 'DocPublicationName']"
+                header_xpath = "//span[@class = 'DocHeader']"
+                title_xpath = "string(//div[@class = 'titreArticleVisu'])"
+                text_xpath  = "./descendant::*[\
+                        not(\
+                           self::div[@class='Doc-SourceText'] \
+                        or self::span[@class='DocHeader'] \
+                        or self::span[@class='DocPublicationName'] \
+                        or self::span[@id='docNameVisu'] \
+                        or self::span[@class='DocHeader'] \
+                        or self::div[@class='titreArticleVisu'] \
+                        or self::span[@id='docNameContType'] \
+                        or descendant-or-self::span[@id='ucPubliC_lblCertificatIssuedTo'] \
+                        or descendant-or-self::span[@id='ucPubliC_lblEndDate'] \
+                        or self::td[@class='txtCertificat'] \
+                        )]/text()"
+                doi_xpath  = "//span[@id='ucPubliC_lblNodoc']/text()"
-        except:
+        except Exception as error:
-            return []
+            print(error)
-        # initialize the list of metadata
-        metadata_list = []
        # parse all the articles, one by one
        try:
            for html_article in html_articles:
@@ -60,19 +89,20 @@ class EuropressFileParser(FileParser):
                metadata = {}
                if len(html_article):
-                    for name in html_article.xpath("./tr/td/span[@class = 'DocPublicationName']"):
+                    for name in html_article.xpath(name_xpath):
                        if name.text is not None:
                            format_journal = re.compile('(.*), (.*)', re.UNICODE)
                            test_journal = format_journal.match(name.text)
                            if test_journal is not None:
-                                metadata['source'] = test_journal.group(1)
+                                metadata['journal'] = test_journal.group(1)
                                metadata['volume'] = test_journal.group(2)
                            else:
-                                metadata['source'] = name.text.encode(codif)
+                                metadata['journal'] = name.text.encode(codif)
-                    for header in html_article.xpath("./tr/td/span[@class = 'DocHeader']"):
+                    for header in html_article.xpath(header_xpath):
                        try:
                            text = header.text
+                            #print("header", text)
                        except Exception as error:
                            print(error)
@@ -138,8 +168,8 @@ class EuropressFileParser(FileParser):
                        if test_page is not None:
                            metadata['page'] = test_page.group(1).encode(codif)
-                    metadata['title'] = html_article.xpath("string(./tr/td/span[@class = 'TitreArticleVisu'])").encode(codif)
+                    metadata['title'] = html_article.xpath(title_xpath).encode(codif)
-                    metadata['text']  = html_article.xpath("./tr/td/descendant-or-self::*[not(self::span[@class='DocHeader'])]/text()")
+                    metadata['abstract']  = html_article.xpath(text_xpath)
                    line = 0
                    br_tag = 10
@@ -185,32 +215,36 @@ class EuropressFileParser(FileParser):
                    metadata['publication_year']  = metadata['publication_date'].strftime('%Y')
                    metadata['publication_month'] = metadata['publication_date'].strftime('%m')
                    metadata['publication_day']  = metadata['publication_date'].strftime('%d')
-                    metadata['publication_date'] = ""
+                    metadata.pop('publication_date')
+                    if len(metadata['abstract'])>0 and format_europresse == 50: 
+                        metadata['doi'] = str(metadata['abstract'][-9])
+                        metadata['abstract'].pop()
+# Here add separator for paragraphs
+                        metadata['abstract'] = str(' '.join(metadata['abstract']))
+                        metadata['abstract'] = str(re.sub('Tous droits réservés.*$', '', metadata['abstract']))
+                    elif format_europresse == 1:
+                        metadata['doi'] = ' '.join(html_article.xpath(doi_xpath))
+                        metadata['abstract'] = metadata['abstract'][:-9]
+# Here add separator for paragraphs
+                        metadata['abstract'] = str(' '.join(metadata['abstract']))
+                    else: 
+                        metadata['doi'] = "not found"
+                    metadata['length_words'] = len(metadata['abstract'].split(' '))
+                    metadata['length_letters'] = len(metadata['abstract'])
-                    if len(metadata['text'])>0: 
-                        metadata['doi'] = str(metadata['text'][-9])
-                        metadata['text'].pop()
-                        metadata['text'] = str(' '.join(metadata['text']))
-                        metadata['text'] = str(re.sub('Tous droits réservés.*$', '', metadata['text']))
-                    else: metadata['doi'] = "not found"
                    metadata['bdd']  = u'europresse'
                    metadata['url']  = u''
                  #metadata_str = {}
                    for key, value in metadata.items():
                        metadata[key] = value.decode() if isinstance(value, bytes) else value
-                    metadata_list.append(metadata)
+                    yield metadata
                    count += 1
+            file.close()
        except Exception as error:
            print(error)
            pass
-#       from pprint import pprint
-#       pprint(metadata_list)
-#       return []
-        return metadata_list
--- a/parsing/FileParsers/FileParser.py
+++ b/parsing/FileParsers/FileParser.py
@@ -103,15 +103,21 @@ class FileParser:
            zipArchive = zipfile.ZipFile(file)
            for filename in zipArchive.namelist():
                try:
-                    metadata_list += self.parse(zipArchive.open(filename, "r"))
+                    f = zipArchive.open(filename, 'r')
+                    metadata_list += self.parse(f)
+                    f.close()
                except Exception as error:
                    print(error)
        # ...otherwise, let's parse it directly!
        else:
            try:
-                metadata_list += self._parse(file)
+                for metadata in self._parse(file):
+                    metadata_list.append(self.format_metadata(metadata))
+                if hasattr(file, 'close'):
+                    file.close()
            except Exception as error:
                print(error)
        # return the list of formatted metadata
-        return map(self.format_metadata, metadata_list)
+        return metadata_list
--- a/parsing/FileParsers/PubmedFileParser.py
+++ b/parsing/FileParsers/PubmedFileParser.py
@@ -25,6 +25,7 @@ class PubmedFileParser(FileParser):
            metadata_path = {
                "journal"           : 'MedlineCitation/Article/Journal/Title',
                "title"             : 'MedlineCitation/Article/ArticleTitle',
+                "abstract"          : 'MedlineCitation/Article/Abstract/AbstractText',
                "title2"            : 'MedlineCitation/Article/VernacularTitle',
                "language_iso3"     : 'MedlineCitation/Article/Language',
                "doi"               : 'PubmedData/ArticleIdList/ArticleId[@type=doi]',
@@ -101,7 +102,6 @@ class PubmedFileParser(FileParser):
                if "realdate_day_" in metadata: metadata.pop("realdate_day_")
                if "title2" in metadata: metadata.pop("title2")
-                # print(metadata)
                metadata_list.append(metadata)
        # return the list of metadata
        return metadata_list
--- a/parsing/FileParsers/RisFileParser.py
+++ b/parsing/FileParsers/RisFileParser.py
@@ -17,42 +17,34 @@ class RisFileParser(FileParser):
    }
    def _parse(self, file):
-        metadata_list = []
        metadata = {}
        last_key = None
        last_values = []
+        # browse every line of the file
        for line in file:
            if len(line) > 2:
+                # extract the parameter key
                parameter_key = line[:2]
-#                print(parameter_key)
                if parameter_key != b'  ' and parameter_key != last_key:
                    if last_key in self._parameters:
+                        # translate the parameter key
                        parameter = self._parameters[last_key]
                        if parameter["type"] == "metadata":
                            separator = parameter["separator"] if "separator" in parameter else ""
                            metadata[parameter["key"]] = separator.join(last_values)
                        elif parameter["type"] == "delimiter":
-                            #language = self._languages_fullname[metadata["language"].lower()]
+                            if 'language_fullname' not in metadata.keys():
-                            #print(metadata)
+                                if 'language_iso3' not in metadata.keys():
-                            try:
+                                    if 'language_iso2' not in metadata.keys():
-                                #print("append")
+                                        metadata['language_iso2'] = 'en'
-                                if 'language_fullname' not in metadata.keys():
+                            yield metadata
-                                    if 'language_iso3' not in metadata.keys():
+                            metadata = {}
-                                        if 'language_iso2' not in metadata.keys():
-                                            metadata['language_iso2'] = 'en'
-                                metadata_list.append(metadata)
-                                metadata = {}
-                                #print("append succeeded")
-                            except:
-                                pass
                    last_key = parameter_key
                    last_values = []
                try:
                    last_values.append(line[3:-1].decode())
                except Exception as error:
                    print(error)
-                    pass
+        # if a metadata object is left in memory, yield it as well
-        #print(len(metadata_list))
+        if metadata:
-        #print(metadata_list)
+            yield metadata
-        return metadata_list
--- a/parsing/NgramsExtractors/NgramsExtractor.py
+++ b/parsing/NgramsExtractors/NgramsExtractor.py
-from ..Taggers import Tagger
+from ..Taggers import TurboTagger
 import nltk
@@ -13,12 +13,13 @@ class NgramsExtractor:
        self.start()
        self._label = "NP"
        self._rule = self._label + ": " + rule
+        self._grammar = nltk.RegexpParser(self._rule)
    def __del__(self):
        self.stop()
    def start(self):
-        self.tagger = Tagger()
+        self.tagger = TurboTagger()
    def stop(self):
        pass
@@ -29,19 +30,8 @@ class NgramsExtractor:
    """
    def extract_ngrams(self, contents):
        tagged_ngrams = self.tagger.tag_text(contents)
-        if len(tagged_ngrams)==0: return []
+        if len(tagged_ngrams):
+            grammar_parsed = self._grammar.parse(tagged_ngrams)
-        grammar = nltk.RegexpParser(self._rule)
+            for subtree in grammar_parsed.subtrees():
-        result = []
+                if subtree.label() == self._label:
-        # try:
+                    yield subtree.leaves()
-        grammar_parsed = grammar.parse(tagged_ngrams)
-        for subtree in grammar_parsed.subtrees():
-            if subtree.label() == self._label:
-                result.append(subtree.leaves())
-        # except Exception as e:
-        #     print("Problem while parsing rule '%s'" % (self._rule, ))
-        #     print(e)
-        return result
--- a/parsing/NgramsExtractors/__init__.py
+++ b/parsing/NgramsExtractors/__init__.py
 from .FrenchNgramsExtractor import FrenchNgramsExtractor
 from .TurboNgramsExtractor import TurboNgramsExtractor as EnglishNgramsExtractor
-from parsing.NgramsExtractors.EnglishNgramsExtractor import EnglishNgramsExtractor
+# from .EnglishNgramsExtractor import EnglishNgramsExtractor
-#from .NgramsExtractor import NgramsExtractor
+from .NgramsExtractor import NgramsExtractor
--- a/parsing/Taggers/Tagger.py
+++ b/parsing/Taggers/Tagger.py
@@ -58,9 +58,11 @@ class Tagger:
        if single:
            self.tagging_end()
        return []
    """Send a text to be tagged.
    """
+    # Not used right now
    def tag_text(self, text):
        tokens_tags = []
        self.tagging_start()
@@ -69,4 +71,3 @@ class Tagger:
            tokens_tags += self.tag_tokens(tokens, False)
        self.tagging_end()
        return tokens_tags
--- a/parsing/Taggers/nlpserver/client.py
+++ b/parsing/Taggers/nlpserver/client.py
@@ -9,15 +9,24 @@ from .settings import implemented_methods
 class NLPClient:
    def __init__(self):
-        self._socket = socket.socket(*server_type_client)
+        self._socket = None
-        self._socket.connect((server_host, server_port))
        for method_name in dir(self):
            if method_name[0] != '_':
                if method_name.upper() not in implemented_methods:
                    setattr(self, method_name, self._notimplemented)
    def __del__(self):
-        self._socket.close()
+        self._disconnect()
+    def _connect(self):
+        self._disconnect()
+        self._socket = socket.socket(*server_type_client)
+        self._socket.connect((server_host, server_port))
+    def _disconnect(self):
+        if self._socket is not None:
+            self._socket.close()
+            self._socket = None
    def _notimplemented(self, *args, **kwargs):
        raise NotImplementedError(
@@ -51,7 +60,7 @@ class NLPClient:
        data += language + '\n'
        data += re.sub(r'\n+', '\n', text)
        data += '\n\n'
-        self.__init__()
+        self._connect()
        self._socket.sendall(data.encode())
        sentence = []
        if keys is None:
@@ -73,7 +82,6 @@ class NLPClient:
                    continue
                values = line.split('\t')
                sentence.append(dict(zip(keys, line.split('\t'))))
-        self.__del__()
    def tokenize(self, text, language='english', asdict=False):
        keys = ('token', ) if asdict else None

--- a/parsing/Taggers/nlpserver/settings.py
+++ b/parsing/Taggers/nlpserver/settings.py
@@ -4,7 +4,7 @@ import socketserver
 # Server parameters
 server_host = 'localhost'
-server_port = 1234
+server_port = 7777
 server_type_server = socketserver.TCPServer
 server_type_client = socket.AF_INET, socket.SOCK_STREAM
 server_timeout = 2.0

--- a/parsing/corpustools.py
+++ b/parsing/corpustools.py
+from collections import defaultdict
+from datetime import datetime
+from random import random
+from hashlib import md5
+from time import time
+from math import log
+from gargantext_web.db import *
+from .FileParsers import *
+class DebugTime:
+    def __init__(self, prefix):
+        self.prefix = prefix
+        self.message = None
+        self.time = None
+    def __del__(self):
+        if self.message is not None and self.time is not None:
+            print('%s - %s: %.4f' % (self.prefix, self.message, time() - self.time))
+    def show(self, message):
+        self.__del__()
+        self.message = message
+        self.time = time()
+# keep all the parsers in a cache
+class Parsers(defaultdict):
+    _parsers = {
+        'pubmed'            : PubmedFileParser,
+        'isi'               : IsiFileParser,
+        'ris'               : RisFileParser,
+        'europress'         : EuropressFileParser,
+        'europress_french'  : EuropressFileParser,
+        'europress_english' : EuropressFileParser,
+    }
+    def __missing__(self, key):
+        if key not in self._parsers:
+            raise NotImplementedError('No such parser: "%s"' % (key))
+        parser = self._parsers[key]()
+        self[key] = parser
+        return parser
+parsers = Parsers()
+# resources managment
+def add_resource(corpus, **kwargs):
+    # only for tests
+    session = Session()
+    resource = Resource(guid=str(random()), **kwargs )
+    # User
+    if 'user_id' not in kwargs:
+        resource.user_id = corpus.user_id
+    # Compute the digest
+    h = md5()
+    f = open(str(resource.file), 'rb')
+    h.update(f.read())
+    f.close()
+    resource.digest = h.hexdigest()
+    # check if a resource on this node already has this hash
+    tmp_resource = (session
+        .query(Resource)
+        .join(Node_Resource, Node_Resource.resource_id == Resource.id)
+        .filter(Resource.digest == resource.digest)
+        .filter(Node_Resource.node_id == corpus.id)
+    ).first()
+    if tmp_resource is not None:
+        return tmp_resource
+    else:
+        session.add(resource)
+        session.commit()
+    # link with the resource
+    node_resource = Node_Resource(
+        node_id = corpus.id,
+        resource_id = resource.id,
+        parsed = False,
+    )
+    session.add(node_resource)
+    session.commit()
+    # return result
+    return resource
+def parse_resources(corpus, user=None, user_id=None):
+    dbg = DebugTime('Corpus #%d - parsing' % corpus.id)
+    session = Session()
+    corpus_id = corpus.id
+    type_id = cache.NodeType['Document'].id
+    if user_id is None and user is not None:
+        user_id = user.id
+    else:
+        user_id = corpus.user_id
+    # find resource of the corpus
+    resources_query = (session
+        .query(Resource, ResourceType)
+        .join(ResourceType, ResourceType.id == Resource.type_id)
+        .join(Node_Resource, Node_Resource.resource_id == Resource.id)
+        .filter(Node_Resource.node_id == corpus.id)
+        .filter(Node_Resource.parsed == False)
+    )
+    # make a new node for every parsed document of the corpus
+    dbg.show('analyze documents')
+    nodes = list()
+    for resource, resourcetype in resources_query:
+        parser = parsers[resourcetype.name]
+        for metadata_dict in parser.parse(resource.file):
+            # retrieve language ID from metadata
+            if 'language_iso2' in metadata_dict:
+                try:
+                    language_id = cache.Language[metadata_dict['language_iso2']].id
+                except KeyError:
+                    language_id = None
+            else:
+                language_id = None
+            # create new node
+            node = Node(
+                name = metadata_dict.get('title', '')[:200],
+                parent_id = corpus_id,
+                user_id = user_id,
+                type_id = type_id,
+                language_id = language_id,
+                metadata = metadata_dict,
+                date = datetime.utcnow(),
+            )
+            nodes.append(node)
+            #
+            # TODO: mark node-resources associations as parsed
+            # 
+    dbg.show('insert %d documents' % len(nodes))
+    session.add_all(nodes)
+    session.commit()
+    # now, index the metadata
+    dbg.show('insert metadata')
+    node_metadata_lists = defaultdict(list)
+    metadata_types = {
+        metadata.name: metadata
+        for metadata in session.query(Metadata)
+    }
+    for node in nodes:
+        node_id = node.id
+        for metadata_key, metadata_value in node.metadata.items():
+            try:
+                metadata = metadata_types[metadata_key]
+            except KeyError:
+                # Why silent continue here ?
+                continue
+            if metadata.type == 'string':
+                metadata_value = metadata_value[:255]
+            node_metadata_lists[metadata.type].append((
+                node_id,
+                metadata.id,
+                metadata_value,
+            ))
+    for key, values in node_metadata_lists.items():
+        bulk_insert(Node_Metadata, ['node_id', 'metadata_id', 'value_'+key], values)
+    # mark the corpus as parsed
+    corpus.parsed = True
+# ngrams extraction
+from .NgramsExtractors import EnglishNgramsExtractor, FrenchNgramsExtractor, NgramsExtractor
+class NgramsExtractors(defaultdict):
+    def __init__(self):
+        # English
+        self['en'] = EnglishNgramsExtractor()
+        for key in ('eng', 'english'):
+            self[key] = self['en']
+        # French
+        self['fr'] = FrenchNgramsExtractor()
+        for key in ('fre', 'french'):
+            self[key] = self['fr']
+        # default
+        self['default'] = NgramsExtractor()
+    def __missing__(self, key):
+        formatted_key = key.strip().lower()
+        if formatted_key in self:
+            self[key] = self[formatted_key]
+        else:
+            self[key] = self['default']
+            # raise NotImplementedError
+        return self[key]
+ngramsextractors = NgramsExtractors()
+def extract_ngrams(corpus, keys):
+    dbg = DebugTime('Corpus #%d - ngrams' % corpus.id)
+    default_language_iso2 = None if corpus.language_id is None else cache.Language[corpus.language_id].iso2
+    # query the metadata associated with the given keys
+    columns = [Node.id, Node.language_id] + [Node.metadata[key] for key in keys]
+    metadata_query = (session
+        .query(*columns)
+        .filter(Node.parent_id == corpus.id)
+        .filter(Node.type_id == cache.NodeType['Document'].id)
+    )
+    # prepare data to be inserted
+    dbg.show('find ngrams')
+    languages_by_id = {
+        language.id: language.iso2
+        for language in session.query(Language)
+    }
+    ngrams_data = set()
+    ngrams_language_data = set()
+    ngrams_tag_data = set()
+    node_ngram_list = defaultdict(lambda: defaultdict(int))
+    for nodeinfo in metadata_query:
+        node_id = nodeinfo[0]
+        language_id = nodeinfo[1]
+        if language_id is None:
+            language_iso2 = default_language_iso2
+        else:
+            language_iso2 = languages_by_id.get(language_id, None)
+        if language_iso2 is None:
+            continue
+        ngramsextractor = ngramsextractors[language_iso2]
+        for text in nodeinfo[2:]:
+            if text is not None and len(text):
+                ngrams = ngramsextractor.extract_ngrams(text.replace("[","").replace("]",""))
+                for ngram in ngrams:
+                    n = len(ngram)
+                    terms    = ' '.join([token for token, tag in ngram]).lower()
+                    # TODO BUG here
+                    if n == 1:
+                        tag_id   = cache.Tag[ngram[0][1]].id
+                        #tag_id   =  1
+                        #print('tag_id', tag_id)
+                    elif n > 1:
+                        tag_id   = cache.Tag['NN'].id
+                        #tag_id   =  14
+                        #print('tag_id_2', tag_id)
+                    node_ngram_list[node_id][terms] += 1
+                    ngrams_data.add((n, terms))
+                    ngrams_language_data.add((terms, language_id))
+                    ngrams_tag_data.add((terms, tag_id))
+    # insert ngrams to temporary table
+    dbg.show('find ids for the %d ngrams' % len(ngrams_data))
+    db, cursor = get_cursor()
+    cursor.execute('''
+        CREATE TEMPORARY TABLE tmp__ngrams (
+            id INT,
+            n INT NOT NULL,
+            terms VARCHAR(255) NOT NULL
+        )
+    ''')
+    bulk_insert('tmp__ngrams', ['n', 'terms'], ngrams_data, cursor=cursor)
+    # retrieve ngram ids from already inserted stuff
+    cursor.execute('''
+        UPDATE
+            tmp__ngrams
+        SET
+            id = ngram.id
+        FROM
+            %s AS ngram
+        WHERE
+            ngram.terms = tmp__ngrams.terms
+    ''' % (Ngram.__table__.name, ))
+    # insert, then get the ids back
+    cursor.execute('''
+        INSERT INTO
+            %s (n, terms)
+        SELECT
+            n, terms
+        FROM
+            tmp__ngrams
+        WHERE
+            id IS NULL
+    ''' % (Ngram.__table__.name, ))
+    cursor.execute('''
+        UPDATE
+            tmp__ngrams
+        SET
+            id = ngram.id
+        FROM
+            %s AS ngram
+        WHERE
+            ngram.terms = tmp__ngrams.terms
+        AND
+            tmp__ngrams.id IS NULL
+    ''' % (Ngram.__table__.name, ))
+    # get all ids
+    ngram_ids = dict()
+    cursor.execute('SELECT id, terms FROM tmp__ngrams')
+    for row in cursor.fetchall():
+        ngram_ids[row[1]] = row[0]
+    # 
+    dbg.show('insert associations')
+    node_ngram_data = list()
+    for node_id, ngrams in node_ngram_list.items():
+        for terms, weight in ngrams.items():
+            try:
+                ngram_id = ngram_ids[terms]
+                node_ngram_data.append((node_id, ngram_id, weight, ))
+            except Exception as e:
+                print("err01:",e)
+    bulk_insert(Node_Ngram, ['node_id', 'ngram_id', 'weight'], node_ngram_data, cursor=cursor)
+    dbg.message = 'insert %d associations' % len(node_ngram_data)
+    # commit to database
+    db.commit()
+# tfidf calculation
+def compute_tfidf(corpus):
+    dbg = DebugTime('Corpus #%d - tfidf' % corpus.id)
+    # compute terms frequency sum
+    dbg.show('calculate terms frequencies sums')
+    db, cursor = get_cursor()
+    cursor.execute('''
+        CREATE TEMPORARY TABLE tmp__st (
+            node_id INT NOT NULL,
+            frequency DOUBLE PRECISION NOT NULL
+        )
+    ''')
+    cursor.execute('''
+        INSERT INTO
+            tmp__st (node_id, frequency)
+        SELECT
+            node_ngram.node_id,
+            SUM(node_ngram.weight) AS frequency
+        FROM
+            %s AS node
+        INNER JOIN
+            %s AS node_ngram ON node_ngram.node_id = node.id
+        WHERE
+            node.parent_id = %d
+        GROUP BY
+            node_ngram.node_id
+    ''' % (Node.__table__.name, Node_Ngram.__table__.name, corpus.id, ))
+    # compute normalized terms frequencies
+    dbg.show('normalize terms frequencies')
+    cursor.execute('''
+        CREATE TEMPORARY TABLE tmp__tf (
+            node_id INT NOT NULL,
+            ngram_id INT NOT NULL,
+            frequency DOUBLE PRECISION NOT NULL
+        )
+    ''')
+    cursor.execute('''
+        INSERT INTO
+            tmp__tf (node_id, ngram_id, frequency)
+        SELECT
+            node_ngram.node_id,
+            node_ngram.ngram_id,
+            (node_ngram.weight / node.frequency) AS frequency
+        FROM
+            %s AS node_ngram
+        INNER JOIN
+            tmp__st AS node ON node.node_id = node_ngram.node_id
+    ''' % (Node_Ngram.__table__.name, ))
+    # show off
+    dbg.show('compute idf')
+    cursor.execute('''
+        CREATE TEMPORARY TABLE tmp__idf (
+            ngram_id INT NOT NULL,
+            idf DOUBLE PRECISION NOT NULL
+        )
+    ''')
+    cursor.execute('''
+        INSERT INTO
+            tmp__idf(ngram_id, idf)
+        SELECT
+            node_ngram.ngram_id,
+            -ln(COUNT(*))
+        FROM
+            %s AS node
+        INNER JOIN
+            %s AS node_ngram ON node_ngram.node_id = node.id
+        WHERE
+            node.parent_id = %d
+        GROUP BY
+            node_ngram.ngram_id
+    ''' % (Node.__table__.name, Node_Ngram.__table__.name, corpus.id, ))
+    cursor.execute('SELECT COUNT(*) FROM tmp__st')
+    D = cursor.fetchone()[0]
+    if D>0:
+        lnD = log(D)
+        cursor.execute('UPDATE tmp__idf SET idf = idf + %f' % (lnD, ))
+        # show off
+        dbg.show('insert tfidf for %d documents' % D)
+        cursor.execute('''
+            INSERT INTO
+                %s (nodex_id, nodey_id, ngram_id, score)
+            SELECT
+                %d AS nodex_id,
+                tf.node_id AS nodey_id,
+                tf.ngram_id AS ngram_id,
+                (tf.frequency * idf.idf) AS score
+            FROM
+                tmp__idf AS idf
+            INNER JOIN
+                tmp__tf AS tf ON tf.ngram_id = idf.ngram_id
+        ''' % (NodeNodeNgram.__table__.name, corpus.id, ))
+        # # show off
+        # cursor.execute('''
+        #     SELECT
+        #         node.name,
+        #         ngram.terms,
+        #         node_node_ngram.score AS tfidf
+        #     FROM
+        #         %s AS node_node_ngram
+        #     INNER JOIN
+        #         %s AS node ON node.id = node_node_ngram.nodey_id
+        #     INNER JOIN
+        #         %s AS ngram ON ngram.id = node_node_ngram.ngram_id
+        #     WHERE
+        #         node_node_ngram.nodex_id = %d
+        #     ORDER BY
+        #         score DESC
+        # ''' % (NodeNodeNgram.__table__.name, Node.__table__.name, Ngram.__table__.name, corpus.id, ))
+        # for row in cursor.fetchall():
+        #     print(row)
+        # the end!
+        db.commit()
--- a/scrap_pubmed/MedlineFetcherDavid2015.py
+++ b/scrap_pubmed/MedlineFetcherDavid2015.py
@@ -40,7 +40,7 @@ class MedlineFetcher:
        "Get number of results for query 'query' in variable 'count'"
        "Get also 'queryKey' and 'webEnv', which are used by function 'medlineEfetch'"
-        print(query)
+        # print(query)
        origQuery = query
        query = query.replace(' ', '%20')
@@ -79,7 +79,7 @@ class MedlineFetcher:
        queryNoSpace = query.replace(' ', '') # No space in directory and file names, avoids stupid errors
-        print ("LOG::TIME: ",'medlineEfetchRAW :Query "' , query , '"\t:\t' , count , ' results')
+        # print ("LOG::TIME: ",'medlineEfetchRAW :Query "' , query , '"\t:\t' , count , ' results')
        retstart = 0
        eFetch = '%s/efetch.fcgi?email=youremail@example.org&rettype=%s&retmode=xml&retstart=%s&retmax=%s&db=%s&query_key=%s&WebEnv=%s' %(self.pubMedEutilsURL, self.reportType, retstart, retmax, self.pubMedDB, queryKey, webEnv)
@@ -94,7 +94,7 @@ class MedlineFetcher:
    def downloadFile(self, item):
        url = item[0]
        filename = item[1]
-        print("\tin test_downloadFile:")
+        # print("\tin test_downloadFile:")
        # print(url,filename)
        data = urlopen(url)
        f = codecs.open(filename, "w" ,encoding='utf-8')
@@ -110,7 +110,7 @@ class MedlineFetcher:
    def test_downloadFile(self, item):
        url = item[0]
        filename = item[1]
-        print("\tin downloadFile:")
+        # print("\tin downloadFile:")
        data = urlopen(url)
        return data
@@ -119,7 +119,7 @@ class MedlineFetcher:
        # time.sleep(1) # pretend to do some lengthy work.
        returnvalue = self.medlineEsearch(item)
        with self.lock:
-            print(threading.current_thread().name, item)
+            # print(threading.current_thread().name, item)
            return returnvalue
    # The worker thread pulls an item from the queue and processes it
@@ -160,13 +160,13 @@ class MedlineFetcher:
        N = 0
-        print ("MedlineFetcher::serialFetcher :")
+        # print ("MedlineFetcher::serialFetcher :")
        thequeries = []
        globalresults = []
        for i in range(yearsNumber):
            year = str(2015 - i)
-            print ('YEAR ' + year)
+            # print ('YEAR ' + year)
-            print ('---------\n')
+            # print ('---------\n')
            pubmedquery = str(year) + '[dp] '+query
            self.q.put( pubmedquery ) #put task in the queue
@@ -196,5 +196,6 @@ class MedlineFetcher:
            retmax_forthisyear = int(round(globalLimit*proportion))
            query["retmax"] = retmax_forthisyear
            if query["retmax"]==0: query["retmax"]+=1
+            print(query["string"],"\t[",k,">",query["retmax"],"]")
        return thequeries
--- a/scrap_pubmed/views.py
+++ b/scrap_pubmed/views.py
-from django.shortcuts import redirect
-from django.shortcuts import render
-from django.http import Http404, HttpResponse, HttpResponseRedirect
 from django.template.loader import get_template
 from django.template import Context
 from django.contrib.auth.models import User, Group
 from scrap_pubmed.MedlineFetcherDavid2015 import MedlineFetcher
-from gargantext_web.api import JsonHttpResponse
 from urllib.request import urlopen, urlretrieve
 import json
-from gargantext_web.settings import MEDIA_ROOT
 # from datetime import datetime
 import time
 import datetime
@@ -21,9 +16,23 @@ import threading
 from django.core.files import File
 from gargantext_web.settings import DEBUG
-from node.models import Language, ResourceType, Resource, \
-        Node, NodeType, Node_Resource, Project, Corpus, \
+from django.shortcuts import redirect
-        Ngram, Node_Ngram, NodeNgramNgram, NodeNodeNgram
+from django.shortcuts import render
+from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseForbidden
+from sqlalchemy import func
+from sqlalchemy.orm import aliased
+from collections import defaultdict
+import threading
+from node.admin import CustomForm
+from gargantext_web.db import *
+from gargantext_web.settings import DEBUG, MEDIA_ROOT
+from gargantext_web.api import JsonHttpResponse
+from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf
 def getGlobalStats(request ):
@@ -31,7 +40,7 @@ def getGlobalStats(request ):
 	alist = ["bar","foo"]
 	if request.method == "POST":
-		N = 100
+		N = 1000
 		query = request.POST["query"]
 		print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" query =", query )
 		print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" N =", N )
@@ -72,9 +81,34 @@ def getGlobalStatsISTEXT(request ):
 def doTheQuery(request , project_id):
 	alist = ["hola","mundo"]
-	if request.method == "POST":
+	# SQLAlchemy session
+	session = Session()
+	# do we have a valid project id?
+	try:
+		project_id = int(project_id)
+	except ValueError:
+		raise Http404()
+	# do we have a valid project?
+	project = (session
+		.query(Node)
+		.filter(Node.id == project_id)
+		.filter(Node.type_id == cache.NodeType['Project'].id)
+	).first()
+	if project is None:
+		raise Http404()
+	# do we have a valid user?
+	user = request.user
+	if not user.is_authenticated():
+		return redirect('/login/?next=%s' % request.path)
+	if project.user_id != user.id:
+		return HttpResponseForbidden()
+	if request.method == "POST":
 		query = request.POST["query"]
 		name = request.POST["string"]
@@ -86,30 +120,26 @@ def doTheQuery(request , project_id):
 			urlreqs.append( instancia.medlineEfetchRAW( yearquery ) )
 		alist = ["tudo fixe" , "tudo bem"]
-		"""
+		resourcetype = cache.ResourceType["pubmed"]
-		urlreqs: List of urls to query.
-		- Then, to each url in urlreqs you do:
-			eFetchResult = urlopen(url)
-			eFetchResult.read()  # this will output the XML... normally you write this to a XML-file.
-		"""
-		thefile = "how we do this here?"
-		resource_type = ResourceType.objects.get(name="pubmed" )
-		parent      = Node.objects.get(id=project_id)
-		node_type   = NodeType.objects.get(name='Corpus')
-		type_id = NodeType.objects.get(name='Document').id
-		user_id = User.objects.get( username=request.user ).id
+		# corpus node instanciation as a Django model
 		corpus = Node(
-			user=request.user,
+			name = name,
-			parent=parent,
+			user_id = request.user.id,
-			type=node_type,
+			parent_id = project_id,
-			name=name,
+			type_id = cache.NodeType['Corpus'].id,
+			language_id = None,
 		)
+		session.add(corpus)
+		session.commit()
+		# """
+		# urlreqs: List of urls to query.
+		# - Then, to each url in urlreqs you do:
+		# 	eFetchResult = urlopen(url)
+		# 	eFetchResult.read()  # this will output the XML... normally you write this to a XML-file.
+		# """
-		corpus.save()
 		tasks = MedlineFetcher()
 		for i in range(8):
@@ -124,24 +154,30 @@ def doTheQuery(request , project_id):
 		dwnldsOK = 0
 		for filename in tasks.firstResults:
 			if filename!=False:
-				corpus.add_resource( user=request.user, type=resource_type, file=filename )
+				# add the uploaded resource to the corpus
+				add_resource(corpus,
+					user_id = request.user.id,
+					type_id = resourcetype.id,
+					file = filename,
+				)
 				dwnldsOK+=1
 		if dwnldsOK == 0: return JsonHttpResponse(["fail"])
-		# do the WorkFlow
 		try:
-			if DEBUG is True:
+			def apply_workflow(corpus):
-				corpus.workflow()
+				parse_resources(corpus)
-				# corpus.workflow__MOV()
+				extract_ngrams(corpus, ['title'])
+				compute_tfidf(corpus)
+			if DEBUG:
+				apply_workflow(corpus)
 			else:
-				corpus.workflow.apply_async((), countdown=3)
+				thread = threading.Thread(target=apply_workflow, args=(corpus, ), daemon=True)
+				thread.start()
-			return JsonHttpResponse(["workflow","finished"])
 		except Exception as error:
+			print('WORKFLOW ERROR')
 			print(error)
+		return HttpResponseRedirect('/project/' + str(project_id))
-		return JsonHttpResponse(["workflow","finished","outside the try-except"])
 	data = alist
 	return JsonHttpResponse(data)
@@ -164,59 +200,59 @@ def testISTEX(request , project_id):
 		print(query_string , query , N)
-		urlreqs = []
+		# urlreqs = []
-		pagesize = 50
+		# pagesize = 50
-		tasks = MedlineFetcher()
+		# tasks = MedlineFetcher()
-		chunks = list(tasks.chunks(range(N), pagesize))
+		# chunks = list(tasks.chunks(range(N), pagesize))
-		for k in chunks:
+		# for k in chunks:
-			if (k[0]+pagesize)>N: pagesize = N-k[0]
+		# 	if (k[0]+pagesize)>N: pagesize = N-k[0]
-			urlreqs.append("http://api.istex.fr/document/?q="+query_string+"&output=*&"+"from="+str(k[0])+"&size="+str(pagesize))
+		# 	urlreqs.append("http://api.istex.fr/document/?q="+query_string+"&output=*&"+"from="+str(k[0])+"&size="+str(pagesize))
-		print(urlreqs)
+		# print(urlreqs)
-		urlreqs = ["http://localhost/374255" , "http://localhost/374278" ]
+		# urlreqs = ["http://localhost/374255" , "http://localhost/374278" ]
-		print(urlreqs)
+		# print(urlreqs)
-		resource_type = ResourceType.objects.get(name="istext" )
+		# resource_type = ResourceType.objects.get(name="istext" )
-		parent      = Node.objects.get(id=project_id)
+		# parent      = Node.objects.get(id=project_id)
-		node_type   = NodeType.objects.get(name='Corpus')
+		# node_type   = NodeType.objects.get(name='Corpus')
-		type_id = NodeType.objects.get(name='Document').id
+		# type_id = NodeType.objects.get(name='Document').id
-		user_id = User.objects.get( username=request.user ).id
+		# user_id = User.objects.get( username=request.user ).id
-		corpus = Node(
+		# corpus = Node(
-			user=request.user,
+		# 	user=request.user,
-			parent=parent,
+		# 	parent=parent,
-			type=node_type,
+		# 	type=node_type,
-			name=query,
+		# 	name=query,
-		)
+		# )
-		corpus.save()
+		# corpus.save()
-		# configuring your queue with the event
+		# # configuring your queue with the event
-		for i in range(8):
+		# for i in range(8):
-			t = threading.Thread(target=tasks.worker2) #thing to do
+		# 	t = threading.Thread(target=tasks.worker2) #thing to do
-			t.daemon = True  # thread dies when main thread (only non-daemon thread) exits.
+		# 	t.daemon = True  # thread dies when main thread (only non-daemon thread) exits.
-			t.start()
+		# 	t.start()
-		for url in urlreqs:
+		# for url in urlreqs:
-			filename = MEDIA_ROOT + '/corpora/%s/%s' % (request.user, str(datetime.now().microsecond))
+		# 	filename = MEDIA_ROOT + '/corpora/%s/%s' % (request.user, str(datetime.now().microsecond))
-			tasks.q.put( [url , filename]) #put a task in th queue
+		# 	tasks.q.put( [url , filename]) #put a task in th queue
-		tasks.q.join() # wait until everything is finished
+		# tasks.q.join() # wait until everything is finished
-		for filename in tasks.firstResults:
+		# for filename in tasks.firstResults:
-			corpus.add_resource( user=request.user, type=resource_type, file=filename )
+		# 	corpus.add_resource( user=request.user, type=resource_type, file=filename )
-		corpus.save()
+		# corpus.save()
-		print("DEBUG:",DEBUG)
+		# print("DEBUG:",DEBUG)
-		# do the WorkFlow
+		# # do the WorkFlow
-		try:
+		# try:
-			if DEBUG is True:
+		# 	if DEBUG is True:
-				corpus.workflow()
+		# 		corpus.workflow()
-			else:
+		# 	else:
-				corpus.workflow.apply_async((), countdown=3)
+		# 		corpus.workflow.apply_async((), countdown=3)
-			return JsonHttpResponse(["workflow","finished"])
+		# 	return JsonHttpResponse(["workflow","finished"])
-		except Exception as error:
+		# except Exception as error:
-			print(error)
+		# 	print(error)
 	data = [query_string,query,N]
 	return JsonHttpResponse(data)

--- a/static/docs/gargantua_book/Source.txt
+++ b/static/docs/gargantua_book/Source.txt
+Project Gutenberg's Gargantua and Pantagruel, Complete., by Francois Rabelais
+This eBook is for the use of anyone anywhere at no cost and with
+almost no restrictions whatsoever.  You may copy it, give it away or
+re-use it under the terms of the Project Gutenberg License included
+with this eBook or online at www.gutenberg.net
+Title: Gargantua and Pantagruel, Complete.
+       Five Books Of The Lives, Heroic Deeds And Sayings Of Gargantua And
+       His Son Pantagruel
+Author: Francois Rabelais
+Release Date: August 8, 2004 [EBook #1200]
+Language: English
+*** START OF THIS PROJECT GUTENBERG EBOOK GARGANTUA AND PANTAGRUEL, ***
+Produced by Sue Asscher and David Widger
+MASTER FRANCIS RABELAIS
+FIVE BOOKS OF THE LIVES, HEROIC DEEDS AND SAYINGS OF
+GARGANTUA AND HIS SON PANTAGRUEL
+Translated into English by
+Sir Thomas Urquhart of Cromarty
+and
+Peter Antony Motteux
+The text of the first Two Books of Rabelais has been reprinted from the
+first edition (1653) of Urquhart's translation.  Footnotes initialled 'M.'
+are drawn from the Maitland Club edition (1838); other footnotes are by the
+translator.  Urquhart's translation of Book III. appeared posthumously in
+1693, with a new edition of Books I. and II., under Motteux's editorship.
+Motteux's rendering of Books IV. and V. followed in 1708.  Occasionally (as
+the footnotes indicate) passages omitted by Motteux have been restored from
+the 1738 copy edited by Ozell.
--- a/static/docs/gargantua_book/gargantua_chapter_1.txt
+++ b/static/docs/gargantua_book/gargantua_chapter_1.txt
+Chapter 1.I. Of the Genealogy and Antiquity of Gargantua.
+Chapter 1.II. The Antidoted Fanfreluches:  or, a Galimatia of extravagant Conceits found in an ancient Monument.
+Chapter 1.III. How Gargantua was carried eleven months in his mother's belly.
+Chapter 1.IV. How Gargamelle, being great with Gargantua, did eat a huge deal of tripes.
+Chapter 1.IX. The colours and liveries of Gargantua.
+Chapter 1.L. Gargantua's speech to the vanquished.
+Chapter 1.LI. How the victorious Gargantuists were recompensed after the battle.
+Chapter 1.LII. How Gargantua caused to be built for the Monk the Abbey of Theleme.
+Chapter 1.LIII. How the abbey of the Thelemites was built and endowed.
+Chapter 1.LIV. The inscription set upon the great gate of Theleme.
+Chapter 1.LV. What manner of dwelling the Thelemites had.
+Chapter 1.LVI. How the men and women of the religious order of Theleme were apparelled.
+Chapter 1.LVII. How the Thelemites were governed, and of their manner of living.
+Chapter 1.LVIII. A prophetical Riddle.
+Chapter 1.V. The Discourse of the Drinkers.
+Chapter 1.VI. How Gargantua was born in a strange manner.
+Chapter 1.VII. After what manner Gargantua had his name given him, and how he tippled, bibbed, and curried the can.
+Chapter 1.VIII. How they apparelled Gargantua.
+Chapter 1.X. Of that which is signified by the colours white and blue.
+Chapter 1.XI. Of the youthful age of Gargantua.
+Chapter 1.XII. Of Gargantua's wooden horses.
+Chapter 1.XIII. How Gargantua's wonderful understanding became known to his father Grangousier, by the invention of a torchecul or wipebreech.
+Chapter 1.XIV. How Gargantua was taught Latin by a Sophister.
+Chapter 1.XIX. The oration of Master Janotus de Bragmardo for recovery of the bells.
+Chapter 1.XL. Why monks are the outcasts of the world; and wherefore some have bigger noses than others.
+Chapter 1.XLI. How the Monk made Gargantua sleep, and of his hours and breviaries.
+Chapter 1.XLII. How the Monk encouraged his fellow-champions, and how he hanged upon a tree.
+Chapter 1.XLIII. How the scouts and fore-party of Picrochole were met with by Gargantua, and how the Monk slew Captain Drawforth, and then was taken prisoner by his enemies.
+Chapter 1.XLIV. How the Monk rid himself of his keepers, and how Picrochole's forlorn hope was defeated.
+Chapter 1.XLIX. How Picrochole in his flight fell into great misfortunes, and what Gargantua did after the battle.
+Chapter 1.XLV. How the Monk carried along with him the Pilgrims, and of the good words that Grangousier gave them.
+Chapter 1.XLVI. How Grangousier did very kindly entertain Touchfaucet his prisoner.
+Chapter 1.XLVII. How Grangousier sent for his legions, and how Touchfaucet slew Rashcalf, and was afterwards executed by the command of Picrochole.
+Chapter 1.XLVIII. How Gargantua set upon Picrochole within the rock Clermond, and utterly defeated the army of the said Picrochole.
+Chapter 1.XV. How Gargantua was put under other schoolmasters.
+Chapter 1.XVI. How Gargantua was sent to Paris, and of the huge great mare that he rode on; how she destroyed the oxflies of the Beauce.
+Chapter 1.XVII. How Gargantua paid his welcome to the Parisians, and how he took away the great bells of Our Lady's Church.
+Chapter 1.XVIII. How Janotus de Bragmardo was sent to Gargantua to recover the great bells.
+Chapter 1.XX. How the Sophister carried away his cloth, and how he had a suit in law against the other masters.
+Chapter 1.XXI. The study of Gargantua, according to the discipline of his schoolmasters the Sophisters.
+Chapter 1.XXII. The games of Gargantua.
+Chapter 1.XXIII. How Gargantua was instructed by Ponocrates, and in such sort disciplinated, that he lost not one hour of the day.
+Chapter 1.XXIV. How Gargantua spent his time in rainy weather.
+Chapter 1.XXIX. The tenour of the letter which Grangousier wrote to his son Gargantua.
+Chapter 1.XXV. How there was great strife and debate raised betwixt the cake-bakers of Lerne, and those of Gargantua's country, whereupon were waged great wars.
+Chapter 1.XXVI. How the inhabitants of Lerne, by the commandment of Picrochole their king, assaulted the shepherds of Gargantua unexpectedly and on a sudden.
+Chapter 1.XXVII. How a monk of Seville saved the close of the abbey from being ransacked by the enemy.
+Chapter 1.XXVIII. How Picrochole stormed and took by assault the rock Clermond, and of Grangousier's unwillingness and aversion from the undertaking of war.
+Chapter 1.XXX. How Ulric Gallet was sent unto Picrochole.
+Chapter 1.XXXI. The speech made by Gallet to Picrochole.
+Chapter 1.XXXII. How Grangousier, to buy peace, caused the cakes to be restored.
+Chapter 1.XXXIII. How some statesmen of Picrochole, by hairbrained counsel, put him in extreme danger.
+Chapter 1.XXXIV. How Gargantua left the city of Paris to succour his country, and how Gymnast encountered with the enemy.
+Chapter 1.XXXIX. How the Monk was feasted by Gargantua, and of the jovial discourse they had at supper.
+Chapter 1.XXXV. How Gymnast very souply and cunningly killed Captain Tripet and others of Picrochole's men.
+Chapter 1.XXXVI. How Gargantua demolished the castle at the ford of Vede, and how they passed the ford.
+Chapter 1.XXXVII. How Gargantua, in combing his head, made the great cannon-balls fall out of his hair.
+Chapter 1.XXXVIII. How Gargantua did eat up six pilgrims in a salad.
--- a/static/docs/gargantua_book/gargantua_chapter_2.txt
+++ b/static/docs/gargantua_book/gargantua_chapter_2.txt
+Chapter 2.I. Of the original and antiquity of the great Pantagruel.
+Chapter 2.II. Of the nativity of the most dread and redoubted Pantagruel.
+Chapter 2.III. Of the grief wherewith Gargantua was moved at the decease of his wife Badebec.
+Chapter 2.IV. Of the infancy of Pantagruel.
+Chapter 2.IX. How Pantagruel found Panurge, whom he loved all his lifetime.
+Chapter 2.V. Of the acts of the noble Pantagruel in his youthful age.
+Chapter 2.VI. How Pantagruel met with a Limousin, who too affectedly did counterfeit the French language.
+Chapter 2.VII. How Pantagruel came to Paris, and of the choice books of the Library of St. Victor.
+Chapter 2.VIII. How Pantagruel, being at Paris, received letters from his father Gargantua, and the copy of them.
+Chapter 2.X. How Pantagruel judged so equitably of a controversy, which was wonderfully obscure and difficult, that, by reason of his just decree therein, he was reputed to have a most admirable judgment.
+Chapter 2.XI. How the Lords of Kissbreech and Suckfist did plead before Pantagruel without an attorney.
+Chapter 2.XII. How the Lord of Suckfist pleaded before Pantagruel.
+Chapter 2.XIII. How Pantagruel gave judgment upon the difference of the two lords.
+Chapter 2.XIV. How Panurge related the manner how he escaped out of the hands of the Turks.
+Chapter 2.XIX. How Panurge put to a nonplus the Englishman that argued by signs.
+Chapter 2.XV. How Panurge showed a very new way to build the walls of Paris.
+Chapter 2.XVI. Of the qualities and conditions of Panurge.
+Chapter 2.XVII. How Panurge gained the pardons, and married the old women, and of the suit in law which he had at Paris.
+Chapter 2.XVIII. How a great scholar of England would have argued against Pantagruel, and was overcome by Panurge.
+Chapter 2.XX. How Thaumast relateth the virtues and knowledge of Panurge.
+Chapter 2.XXI. How Panurge was in love with a lady of Paris.
+Chapter 2.XXII. How Panurge served a Parisian lady a trick that pleased her not very well.
+Chapter 2.XXIII. How Pantagruel departed from Paris, hearing news that the Dipsodes had invaded the land of the Amaurots; and the cause wherefore the leagues are so short in France.
+Chapter 2.XXIV. A letter which a messenger brought to Pantagruel from a lady of Paris, together with the exposition of a posy written in a gold ring.
+Chapter 2.XXIX. How Pantagruel discomfited the three hundred giants armed.
+Chapter 2.XXV. How Panurge, Carpalin, Eusthenes, and Epistemon, the gentlemen attendants of Pantagruel, vanquished and discomfited six hundred and threescore horsemen very cunningly.
+Chapter 2.XXVI. How Pantagruel and his company were weary in eating still salt meats; and how Carpalin went a-hunting to have some venison.
+Chapter 2.XXVII. How Pantagruel set up one trophy in memorial of their valour, and Panurge another in remembrance of the hares.  How Pantagruel likewise with his farts begat little men, and with his fisgs little women; and how Panurge broke a great staff over two glasses.
+Chapter 2.XXVIII. How Pantagruel got the victory very strangely over the Dipsodes and the Giants.
+Chapter 2.XXX. How Epistemon, who had his head cut off, was finely healed by Panurge, and of the news which he brought from the devils, and of the damned people in hell.
+Chapter 2.XXXI. How Pantagruel entered into the city of the Amaurots, and how Panurge married King Anarchus to an old lantern-carrying hag, and made him a crier of green sauce.
+Chapter 2.XXXII. How Pantagruel with his tongue covered a whole army, and what the author saw in his mouth.
+Chapter 2.XXXIII. How Pantagruel became sick, and the manner how he was recovered.
+Chapter 2.XXXIV. The conclusion of this present book, and the excuse of the author.
--- a/static/docs/gargantua_book/gargantua_chapter_3.txt
+++ b/static/docs/gargantua_book/gargantua_chapter_3.txt
+Chapter 3.I. How Pantagruel transported a colony of Utopians into Dipsody.
+Chapter 3.II. How Panurge was made Laird of Salmigondin in Dipsody, and did waste his revenue before it came in.
+Chapter 3.III. How Panurge praiseth the debtors and borrowers.
+Chapter 3.IV. Panurge continueth his discourse in the praise of borrowers and lenders.
+Chapter 3.IX. How Panurge asketh counsel of Pantagruel whether he should marry, yea, or no.
+Chapter 3.L. How the famous Pantagruelion ought to be prepared and wrought.
+Chapter 3.LI. Why it is called Pantagruelion, and of the admirable virtues.
+Chapter 3.LII. How a certain kind of Pantagruelion is of that nature that the fire is not able to consume it.
+Chapter 3.V. How Pantagruel altogether abhorreth the debtors and borrowers.
+Chapter 3.VI. Why new married men were privileged from going to the wars.
+Chapter 3.VII. How Panurge had a flea in his ear, and forbore to wear any longer his magnificent codpiece.
+Chapter 3.VIII. Why the codpiece is held to be the chief piece of armour amongst warriors.
+Chapter 3.X. How Pantagruel representeth unto Panurge the difficulty of giving advice in the matter of marriage; and to that purpose mentioneth somewhat of the Homeric and Virgilian lotteries.
+Chapter 3.XI. How Pantagruel showeth the trial of one's fortune by the throwing of dice to be unlawful.
+Chapter 3.XII. How Pantagruel doth explore by the Virgilian lottery what fortune Panurge shall have in his marriage.
+Chapter 3.XIII. How Pantagruel adviseth Panurge to try the future good or bad luck of his marriage by dreams.
+Chapter 3.XIV. Panurge's dream, with the interpretation thereof.
+Chapter 3.XIX. How Pantagruel praiseth the counsel of dumb men.
+Chapter 3.XL. How Bridlegoose giveth reasons why he looked upon those law- actions which he decided by the chance of the dice.
+Chapter 3.XLI. How Bridlegoose relateth the history of the reconcilers of parties at variance in matters of law.
+Chapter 3.XLII. How suits at law are bred at first, and how they come afterwards to their perfect growth.
+Chapter 3.XLIII. How Pantagruel excuseth Bridlegoose in the matter of sentencing actions at law by the chance of the dice.
+Chapter 3.XLIV. How Pantagruel relateth a strange history of the perplexity of human judgment.
+Chapter 3.XLIX. How Pantagruel did put himself in a readiness to go to sea; and of the herb named Pantagruelion.
+Chapter 3.XLV. How Panurge taketh advice of Triboulet.
+Chapter 3.XLVI. How Pantagruel and Panurge diversely interpret the words of Triboulet.
+Chapter 3.XLVII. How Pantagruel and Panurge resolved to make a visit to the Oracle of the Holy Bottle.
+Chapter 3.XLVIII. How Gargantua showeth that the children ought not to marry without the special knowledge and advice of their fathers and mothers.
+Chapter 3.XV. Panurge's excuse and exposition of the monastic mystery concerning powdered beef.
+Chapter 3.XVI. How Pantagruel adviseth Panurge to consult with the Sibyl of Panzoust.
+Chapter 3.XVII. How Panurge spoke to the Sibyl of Panzoust.
+Chapter 3.XVIII. How Pantagruel and Panurge did diversely expound the verses of the Sibyl of Panzoust.
+Chapter 3.XX. How Goatsnose by signs maketh answer to Panurge.
+Chapter 3.XXI. How Panurge consulteth with an old French poet, named Raminagrobis.
+Chapter 3.XXII. How Panurge patrocinates and defendeth the Order of the Begging Friars.
+Chapter 3.XXIII. How Panurge maketh the motion of a return to Raminagrobis.
+Chapter 3.XXIV. How Panurge consulteth with Epistemon.
+Chapter 3.XXIX. How Pantagruel convocated together a theologian, physician, lawyer, and philosopher, for extricating Panurge out of the perplexity wherein he was.
+Chapter 3.XXV. How Panurge consulteth with Herr Trippa.
+Chapter 3.XXVI. How Panurge consulteth with Friar John of the Funnels.
+Chapter 3.XXVII. How Friar John merrily and sportingly counselleth Panurge.
+Chapter 3.XXVIII. How Friar John comforteth Panurge in the doubtful matter of cuckoldry.
+Chapter 3.XXX. How the theologue, Hippothadee, giveth counsel to Panurge in the matter and business of his nuptial enterprise.
+Chapter 3.XXXI. How the physician Rondibilis counselleth Panurge.
+Chapter 3.XXXII. How Rondibilis declareth cuckoldry to be naturally one of the appendances of marriage.
+Chapter 3.XXXIII. Rondibilis the physician's cure of cuckoldry.
+Chapter 3.XXXIV. How women ordinarily have the greatest longing after things prohibited.
+Chapter 3.XXXIX. How Pantagruel was present at the trial of Judge Bridlegoose, who decided causes and controversies in law by the chance and fortune of the dice.
+Chapter 3.XXXV. How the philosopher Trouillogan handleth the difficulty of marriage.
+Chapter 3.XXXVI. A continuation of the answer of the Ephectic and Pyrrhonian philosopher Trouillogan.
+Chapter 3.XXXVII. How Pantagruel persuaded Panurge to take counsel of a fool.
+Chapter 3.XXXVIII. How Triboulet is set forth and blazed by Pantagruel and Panurge.
--- a/static/docs/gargantua_book/gargantua_chapter_4.txt
+++ b/static/docs/gargantua_book/gargantua_chapter_4.txt
+Chapter 4.I. How Pantagruel went to sea to visit the oracle of Bacbuc, alias the Holy Bottle.
+Chapter 4.II. How Pantagruel bought many rarities in the island of Medamothy.
+Chapter 4.III. How Pantagruel received a letter from his father Gargantua, and of the strange way to have speedy news from far distant places.
+Chapter 4.IV. How Pantagruel writ to his father Gargantua, and sent him several curiosities.
+Chapter 4.IX. How Pantagruel arrived at the island of Ennasin, and of the strange ways of being akin in that country.
+Chapter 4.L. How Homenas showed us the archetype, or representation of a pope.
+Chapter 4.LI. Table-talk in praise of the decretals.
+Chapter 4.LII. A continuation of the miracles caused by the decretals.
+Chapter 4.LIII. How, by the virtue of the decretals, gold is subtilely drawn out of France to Rome.
+Chapter 4.LIV. How Homenas gave Pantagruel some bon-Christian pears.
+Chapter 4.LIX. Of the ridiculous statue Manduce; and how and what the Gastrolaters sacrifice to their ventripotent god.
+Chapter 4.LV. How Pantagruel, being at sea, heard various unfrozen words.
+Chapter 4.LVI. How among the frozen words Pantagruel found some odd ones.
+Chapter 4.LVII. How Pantagruel went ashore at the dwelling of Gaster, the first master of arts in the world.
+Chapter 4.LVIII. How, at the court of the master of ingenuity, Pantagruel detested the Engastrimythes and the Gastrolaters.
+Chapter 4.LX. What the Gastrolaters sacrificed to their god on interlarded fish-days.
+Chapter 4.LXI. How Gaster invented means to get and preserve corn.
+Chapter 4.LXII. How Gaster invented an art to avoid being hurt or touched by cannon-balls.
+Chapter 4.LXIII. How Pantagruel fell asleep near the island of Chaneph, and of the problems proposed to be solved when he waked.
+Chapter 4.LXIV. How Pantagruel gave no answer to the problems.
+Chapter 4.LXV. How Pantagruel passed the time with his servants.
+Chapter 4.LXVI. How, by Pantagruel's order, the Muses were saluted near the isle of Ganabim.
+Chapter 4.LXVII. How Panurge berayed himself for fear; and of the huge cat Rodilardus, which he took for a puny devil.
+Chapter 4.V. How Pantagruel met a ship with passengers returning from Lantern-land.
+Chapter 4.VI. How, the fray being over, Panurge cheapened one of Dingdong's sheep.
+Chapter 4.VII. Which if you read you'll find how Panurge bargained with Dingdong.
+Chapter 4.VIII. How Panurge caused Dingdong and his sheep to be drowned in the sea.
+Chapter 4.X. How Pantagruel went ashore at the island of Chely, where he saw King St. Panigon.
+Chapter 4.XI. Why monks love to be in kitchens.
+Chapter 4.XII. How Pantagruel passed by the land of Pettifogging, and of the strange way of living among the Catchpoles.
+Chapter 4.XIII. How, like Master Francis Villon, the Lord of Basche commended his servants.
+Chapter 4.XIV. A further account of catchpoles who were drubbed at Basche's house.
+Chapter 4.XIX. What countenances Panurge and Friar John kept during the.
+Chapter 4.XL. How Friar John fitted up the sow; and of the valiant cooks that went into it.
+Chapter 4.XLI. How Pantagruel broke the Chitterlings at the knees.
+Chapter 4.XLII. How Pantagruel held a treaty with Niphleseth, Queen of the Chitterlings.
+Chapter 4.XLIII. How Pantagruel went into the island of Ruach.
+Chapter 4.XLIV. How small rain lays a high wind.
+Chapter 4.XLIX. How Homenas, Bishop of Papimany, showed us the Uranopet decretals .
+Chapter 4.XLV. How Pantagruel went ashore in the island of Pope-Figland.
+Chapter 4.XLVI. How a junior devil was fooled by a husbandman of Pope- Figland.
+Chapter 4.XLVII. How the devil was deceived by an old woman of Pope- Figland.
+Chapter 4.XLVIII. How Pantagruel went ashore at the island of Papimany.
+Chapter 4.XV. How the ancient custom at nuptials is renewed by the catchpole.
+Chapter 4.XVI. How Friar John made trial of the nature of the catchpoles.
+Chapter 4.XVII. How Pantagruel came to the islands of Tohu and Bohu; and of the strange death of Wide-nostrils, the swallower of windmills.
+Chapter 4.XVIII. How Pantagruel met with a great storm at sea.
+Chapter 4.XX. How the pilots were forsaking their ships in the greatest stress of weather.
+Chapter 4.XXI. A continuation of the storm, with a short discourse on the subject of making testaments at sea.
+Chapter 4.XXII. An end of the storm.
+Chapter 4.XXIII. How Panurge played the good fellow when the storm was over.
+Chapter 4.XXIV. How Panurge was said to have been afraid without reason during the storm.
+Chapter 4.XXIX. How Pantagruel sailed by the Sneaking Island, where Shrovetide reigned.
+Chapter 4.XXV. How, after the storm, Pantagruel went on shore in the islands of the Macreons.
+Chapter 4.XXVI. How the good Macrobius gave us an account of the mansion and decease of the heroes.
+Chapter 4.XXVII. Pantagruel's discourse of the decease of heroic souls; and of the dreadful prodigies that happened before the death of the late Lord de Langey.
+Chapter 4.XXVIII. How Pantagruel related a very sad story of the death of the heroes.
+Chapter 4.XXX. How Shrovetide is anatomized and described by Xenomanes.
+Chapter 4.XXXI. Shrovetide's outward parts anatomized.
+Chapter 4.XXXII. A continuation of Shrovetide's countenance.
+Chapter 4.XXXIII. How Pantagruel discovered a monstrous physeter, or whirlpool, near the Wild Island.
+Chapter 4.XXXIV. How the monstrous physeter was slain by Pantagruel.
+Chapter 4.XXXIX. How Friar John joined with the cooks to fight the Chitterlings.
+Chapter 4.XXXV. How Pantagruel went on shore in the Wild Island, the ancient abode of the Chitterlings.
+Chapter 4.XXXVI. How the wild Chitterlings laid an ambuscado for Pantagruel.
+Chapter 4.XXXVII. How Pantagruel sent for Colonel Maul-chitterling and Colonel Cut-pudding; with a discourse well worth your hearing about the names of places and persons.
+Chapter 4.XXXVIII. How Chitterlings are not to be slighted by men.
--- a/static/docs/gargantua_book/gargantua_chapter_5.txt
+++ b/static/docs/gargantua_book/gargantua_chapter_5.txt
+Chapter 5.I. How Pantagruel arrived at the Ringing Island, and of the noise that we heard.
+Chapter 5.II. How the Ringing Island had been inhabited by the Siticines, who were become birds.
+Chapter 5.III. How there is but one pope-hawk in the Ringing Island.
+Chapter 5.IV. How the birds of the Ringing Island were all passengers.
+Chapter 5.IX. How we arrived at the island of Tools.
+Chapter 5.V. Of the dumb Knight-hawks of the Ringing Island.
+Chapter 5.VI. How the birds are crammed in the Ringing Island.
+Chapter 5.VII. How Panurge related to Master Aedituus the fable of the horse and the ass.
+Chapter 5.VIII. How with much ado we got a sight of the pope-hawk.
+Chapter 5.X. How Pantagruel arrived at the island of Sharping.
+Chapter 5.XI. How we passed through the wicket inhabited by Gripe-men-all, Archduke of the Furred Law-cats.
+Chapter 5.XII. How Gripe-men-all propounded a riddle to us.
+Chapter 5.XIII. How Panurge solved Gripe-men-all's riddle.
+Chapter 5.XIV. How the Furred Law-cats live on corruption.
+Chapter 5.XIX. How we arrived at the queendom of Whims or Entelechy.
+Chapter 5.XL. How the battle in which the good Bacchus overthrew the Indians was represented in mosaic work.
+Chapter 5.XLI. How the temple was illuminated with a wonderful lamp.
+Chapter 5.XLII. How the Priestess Bacbuc showed us a fantastic fountain in the temple, and how the fountain-water had the taste of wine, according to the imagination of those who drank of it.
+Chapter 5.XLIII. How the Priestess Bacbuc equipped Panurge in order to have the word of the Bottle.
+Chapter 5.XLIV. How Bacbuc, the high-priestess, brought Panurge before the Holy Bottle.
+Chapter 5.XLV. How Bacbuc explained the word of the Goddess-Bottle.
+Chapter 5.XLVI. How Panurge and the rest rhymed with poetic fury.
+Chapter 5.XLVII. How we took our leave of Bacbuc, and left the Oracle of the Holy Bottle.
+Chapter 5.XV. How Friar John talks of rooting out the Furred Law-cats.
+Chapter 5.XVI. How Pantagruel came to the island of the Apedefers, or Ignoramuses, with long claws and crooked paws, and of terrible adventures and monsters there.
+Chapter 5.XVII. How we went forwards, and how Panurge had like to have been killed.
+Chapter 5.XVIII. How our ships were stranded, and we were relieved by some people that were subject to Queen Whims (qui tenoient de la Quinte).
+Chapter 5.XX. How the Quintessence cured the sick with a song.
+Chapter 5.XXI. How the Queen passed her time after dinner.
+Chapter 5.XXII. How Queen Whims' officers were employed; and how the said lady retained us among her abstractors.
+Chapter 5.XXIII. How the Queen was served at dinner, and of her way of eating.
+Chapter 5.XXIV. How there was a ball in the manner of a tournament, at which Queen Whims was present.
+Chapter 5.XXIX. How Epistemon disliked the institution of Lent.
+Chapter 5.XXV. How the thirty-two persons at the ball fought.
+Chapter 5.XXVI. How we came to the island of Odes, where the ways go up and down.
+Chapter 5.XXVII. How we came to the island of Sandals; and of the order of Semiquaver Friars.
+Chapter 5.XXVIII. How Panurge asked a Semiquaver Friar many questions, and was only answered in monosyllables.
+Chapter 5.XXX. How we came to the land of Satin.
+Chapter 5.XXXI. How in the land of Satin we saw Hearsay, who kept a school of vouching.
+Chapter 5.XXXII. How we came in sight of Lantern-land.
+Chapter 5.XXXIII. How we landed at the port of the Lychnobii, and came to Lantern-land.
+Chapter 5.XXXIV. How we arrived at the Oracle of the Bottle.
+Chapter 5.XXXIX. How we saw Bacchus's army drawn up in battalia in mosaic work.
+Chapter 5.XXXV. How we went underground to come to the Temple of the Holy Bottle, and how Chinon is the oldest city in the world.
+Chapter 5.XXXVI. How we went down the tetradic steps, and of Panurge's fear.
+Chapter 5.XXXVII. How the temple gates in a wonderful manner opened of themselves.
+Chapter 5.XXXVIII. Of the temple's admirable pavement.
--- a/static/img/Gargantextuel-212x300.jpg
+++ b/static/img/Gargantextuel-212x300.jpg
--- a/templates/corpus.html
+++ b/templates/corpus.html
@@ -42,12 +42,12 @@
 							</p>
 							{% endif %}
-							<!--						<a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.pk }}/">Add file</a> -->
+							<!--						<a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.id }}/">Add file</a> -->
-						<a class="btn btn-primary btn-lg" role="button" href="/project/{{project.pk}}/corpus/{{ corpus.pk }}/corpus.csv">Save as</a>
+						<a class="btn btn-primary btn-lg" role="button" href="/project/{{project.id}}/corpus/{{ corpus.id }}/corpus.csv">Save as</a>
-						<a class="btn btn-primary btn-lg" role="button" href="/project/{{project.pk}}/corpus/{{ corpus.pk }}/delete">Delete</a></p>
+						<a class="btn btn-primary btn-lg" role="button" href="/delete/{{ corpus.id }}">Delete</a></p>
 						{% if number == 0 %}
-						<a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.pk }}/">Add documents</a></p>
+						<a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.id }}/">Add documents</a></p>
 						{% endif %}
 		</div>

--- a/templates/explorer.html
+++ b/templates/explorer.html
@@ -260,8 +260,29 @@
        </div>
        <div id="topPapers"></div>
+        <!--
+        <div id="tab-container-top" class='tab-container'>
+           <ul class='etabs'>
+             <li id="tabmed" class='tab active'><a href="#tabs3">Medline Pubs</a></li>
+             <li id="tabgps" class='tab'><a href="#tabs4">+</a></li>
+           </ul>
+           <div class='panel-container'>
+              <div id="tabs3">
+                <div id="topPapers"></div>
+              </div>
+              <div id="tabs4">
+                <div id="topProposals"></div>
+              </div>
+           </div>
+        </div>
+        -->
        <div id="information"></div>
      </div>

--- a/templates/home.html
+++ b/templates/home.html
@@ -19,18 +19,16 @@
 						<div class="col-md-4 content">
 						<h1>Gargantext</h1>
 						<p>A web platform to explore text-mining</p>
-						<a class="btn btn-primary btn-lg" href="/projects">Test Gargantext</a>
+						<a class="btn btn-primary btn-lg" href="/projects" title="Click and test by yourself">Test Gargantext</a>
 						</div>
-						<div class="col-md-3 content">
+						<div class="col-md-2 content"></div>
-						</div>
+						<div class="col-md-2 content"></div>
-						<div class="col-md-5 content">
+						<div class="col-md-2 content">
-								<!--
+								<p class="right">
-								<h3>Project Manager:</h3> 
+										<div style="border:15px">
-								<h4><a href="http://alexandre.delanoe.org" target="blank">Alexandre Delanoë</a></h4>
+												<img src="{% static "img/logo.png"%}" title="Logo designed by anoe" style="100px; height:150px; border:3px solid white">
-								<h3>Scientific board:</h3> 
+										</div>
-								<h4><a href="http://chavalarias.com" target="blank">David Chavalarias</a> and <a href="http://alexandre.delanoe.org" target="blank">Alexandre Delanoë</a></h4>
+								</p>
-						<h3><a href="/about/#collapseTeam" target="blank">Thanks to all the team</a></h3>
-						--!>
 						</div>
 				</div>
        </div>
@@ -39,7 +37,7 @@
 				<div class="row">
 						<div class="content">
 						<center>
-						<img src="{% static "img/logo.png"%}" alt="Logo Gargantext" style="100px; height:150px">
+						<img src="{% static "img/Gargantextuel-212x300.jpg"%}" title="Gargantextuel drawn by Cecile Meadel" style="border:2px solid black">
 						<!--
 						<h2>Introduction Video</h2>
@@ -63,57 +61,23 @@
 <div class="row">
 <div class="col-md-4 content">
-				<h3><a href="#">Historic</a></h3>
+				<h3><a href="#" title="Random sentences in Gargantua's Books chapters, historically true">Historic</a></h3>
-												<p>
+												<p> {{ paragraph_gargantua }}</p>
-    Chapter 1.VI. -- How Gargantua was born in a strange manner.
-    Chapter 2.XXIII. -- How Pantagruel departed from Paris, hearing
-    news that the Dipsodes had invaded the land of the Amaurots; and
-    the cause wherefore the leagues are so short in France. Chapter
-    3.XLVI. -- How Pantagruel and Panurge diversely interpret the
-    words of Triboulet. Chapter 4.LV. -- How Pantagruel, being at sea,
-    heard various unfrozen words. Chapter 5.IX. -- How we arrived at
-    the island of Tools.
-												</p>
 										</div>
 <div class="col-md-4 content">
-<h3><a href="#">Presentation</a></h3>
+<h3><a href="#" title="Randomized words, semantically and syntaxically falses." >Presentation</a></h3>
-												<p>
+												<p> {{ paragraph_lorem }}
-										Lorem ipsum dolor sit amet, consectetur adipiscing elit,
-				sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
-				Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris
-				nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in
-				reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla
-				pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-				culpa qui officia deserunt mollit anim id est laborum.
 												</p>
 										</div>
 <div class="col-md-4 content">
-<h3><a href="#">Tutoreil</a></h3>
+<h3><a href="#" title="Randomized letters, true or false ?">Tutoreil</a></h3>
 												<p>
+                        {{ paragraph_tutoreil }}
 												<!-- Why not French ? -->
 												<!-- find Cambridge source which inspired this --!>
-        Il praaît que l'odrre des ltetres dnas un mot n'a pas
-        d'iprnorotncae. La pmeirère et la drenèire letrte diovent
-        êrte à la bnnoe pclae. Le rsete peut êrte dnas un dsérorde
-        ttoal et on puet tujoruos lrie snas poribême. On ne lit
-        donc pas chuaqe ltetre en elle-mmêe, mias le mot cmome un
-        tuot. Un chnagmnet de réfretniel et nuos tarnsposns ce
-        rselutat au txete lui-mmêe: l'odrre des mtos est faiblement
-        imoprtnat copmraé au cnotxete du txete qui, lui, est copmté:
-        comptexter avec Gargantext.
 												</p>
 										</div>

--- a/templates/menu.html
+++ b/templates/menu.html
@@ -17,16 +17,16 @@
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
          </button>
-					<a class="navbar-brand" style="line-height:15px; height:10px; padding: 10px 10px;" href="/"><img src="/img/logo.svg"></a>
+					<a class="navbar-brand" style="line-height:15px; height:10px; padding: 10px 10px;" href="/"><img src="/img/logo.svg" title="Back to home."></a>
        </div>
        <div class="navbar-collapse collapse">
          <ul class="nav navbar-nav">
 							<!-- <li><a href="/admin/">Admin/</a></li> --!>
-            <li><a href="/about/">About</a>
+            <li><a href="/about/" title="More informations about the project, its sponsors and its authors.">About</a>
 						</li>
 						{% if user.is_authenticated %}
-            <li><a href="/projects/">Projects</a></li>
+            <li><a href="/projects/" title="All your projects are here.">Projects</a></li>
 						{% endif %}
 						{% if project %}
 						<li><a href="/project/{{project.id}}">{{project.name}}</a></li>
@@ -40,14 +40,14 @@
 				<ul class="nav pull-right">
 						<li class="dropdown">
-								<a href="#" role="button" class="dropdown-toggle" data-toggle="dropdown"><i class="icon-user"></i> {{ user }}<i class="caret"></i>
+								<a href="#" role="button" class="dropdown-toggle" data-toggle="dropdown" title="That is your login"><i class="icon-user"></i> {{ user }}<i class="caret"></i>
 								</a>
 								<ul class="dropdown-menu">
-										<li><a tabindex="-1" href="http://www.iscpif.fr/tiki-index.php?page=gargantext_feedback" target="blank" >Report Feedback</a></li>
+										<li><a tabindex="-1" href="http://www.iscpif.fr/tiki-index.php?page=gargantext_feedback" title="Send us a message (bug, thanks, congrats...)">Report Feedback</a></li>
 										<li class="divider"></li>
 								{% if user.is_authenticated %}
-										<li><a tabindex="-1" href="/auth/logout">Logout</a></li>
+										<li><a tabindex="-1" href="/auth/logout" title="Click here to logout especially on public devices">Logout</a></li>
 										{% else %}
 										<li><a tabindex="-1" href="/auth/">Login</a></li>
 								{% endif %}
@@ -66,8 +66,8 @@
 <hr>
 <footer>
-		<p>Gargantext, version 1.0.6, <a href="http://www.cnrs.fr" target="blank">Copyrights CNRS {{ date.year }}</a>, 
+		<p>Gargantext, version 1.0.6, <a href="http://www.cnrs.fr" target="blank" title="Institution that enables this project.">Copyrights CNRS {{ date.year }}</a>, 
-		<a href="http://www.gnu.org/licenses/agpl-3.0.html" target="blank">Licence aGPLV3</a>.</p>
+		<a href="http://www.gnu.org/licenses/agpl-3.0.html" target="blank" title="Legal instructions of the project.">Licence aGPLV3</a>.</p>
 </footer>

--- a/templates/project.html
+++ b/templates/project.html
@@ -84,19 +84,16 @@
 												<ul>
 														{% for corpus in corpora %}
 														<li> {% ifnotequal corpus.count 0 %}
-																		<a href="/project/{{project.id}}/corpus/{{corpus.id}}"> 
+																		<a href="/project/{{project.id}}/corpus/{{corpus.id}}">  {{corpus.name}} </a> , {{ corpus.count }} Documents 
-																			{{corpus.name}}
+															 {% else %}
-																		</a>
+																 	{{corpus.name}} : <img width="20px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img> Processing, drink a cup of tea, and refresh the page :)
-																		, {{ corpus.count }} Documents 
+															 {% endifnotequal %}
-																 {% else %}
-																 {{corpus.name}} : <img width="20px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img> Processing, drink a cup of tea, and refresh the page :)
-																 {% endifnotequal %}
 																		<button type="button" class="btn btn-xs btn-default" data-container="body" data-toggle="popover" data-placement="bottom" 
 																		data-content='
 																		<ul>
 																		<li> Rename </li>
 																		<li> Add new documents </li>
-																		<li><a href="/project/{{ project.id }}/corpus/{{ corpus.id}}/delete">Delete</a></li>
+																		<li><a href="/delete/{{corpus.id}}">Delete</a></li>
 																		</ul>
 																		'>Manage</button>
 																</li>
@@ -330,7 +327,7 @@
 					console.log("enabling "+"#"+value.id)
 					$("#"+value.id).attr('onclick','getGlobalResults(this);');
 					// $("#submit_thing").prop('disabled' , false)
-					$("#submit_thing").html("Process a 100 sample!")
+					$("#submit_thing").html("Process a 1000 sample!")
 		            thequeries = data
 		            var N=0,k=0;
@@ -427,8 +424,8 @@
 		//CSS events for changing the Select element
 		function CustomForSelect( selected ) {
 			// show Radio-Inputs and trigger FileOrNotFile>@upload-file events
-			//if(selected=="pubmed" || selected=="istext") {
+			if(selected=="pubmed" || selected=="istext") {
-			if(selected=="pubmed") {
+			// if(selected=="pubmed") {
 				console.log("show the button for: "+selected)
 				$("#pubmedcrawl").css("visibility", "visible"); 
 				$("#pubmedcrawl").show();

--- a/templates/projects.html
+++ b/templates/projects.html
@@ -44,7 +44,7 @@
 																		<ul>
 																		<li> Rename </li>
 																		<li> Add new corpus </li>
-																		<li><a href="/project/{{ project.id }}/delete">Delete</a></li>
+																		<li><a href="/delete/{{ project.id }}">Delete</a></li>
 																		</ul>
 																		'>Manage</button>

--- a/templates/subcorpus.html
+++ b/templates/subcorpus.html
@@ -19,18 +19,21 @@
 {% if documents %}
-          <div id="delAll" style="visibility: hidden;">
-              <button onclick="deleteDuplicates(theurl);">Delete Duplicates</button>
-          </div>
 <ul>
 {% for doc in documents %}
    {% if doc.date %}
    <li><div id="doc_{{doc.id}}"> <b>{{ doc.date }}</b>: <a target="_blank" href="/nodeinfo/{{doc.id}}">{{ doc.name}}</a> , @ {{ doc.metadata.source}}</div></li>
    {% endif %}
 {% endfor %}
+          <div id="delAll" style="visibility: hidden;">
+							<center>
+              <button onclick="deleteDuplicates(theurl);">Delete all Duplicates in one click</button>
+							</center>
+          </div>
 </ul>
 <script>

--- a/test_db.py
+++ b/test_db.py
+# Without this, we couldn't use the Django environment
+import os
+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
+os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")
+# database tools
+from node import models
+from gargantext_web.db import *
+from parsing.corpustools import *
+user = session.query(User).first()
+project = session.query(Node).filter(Node.name == 'A').first()
+corpus = Node(
+    parent_id = project.id,
+    name = 'Test 456',
+    type_id = cache.NodeType['Corpus'].id,
+    user_id = user.id,
+)
+session.add(corpus)
+session.commit()
+add_resource(corpus,
+    # file = './data_samples/pubmed_result.xml',
+    file = '/srv/gargantext_lib/data_samples/pubmed_2013-04-01_HoneyBeesBeeBees.xml',
+    type_id = cache.ResourceType['pubmed'].id,
+)
+parse_resources(corpus)
+extract_ngrams(corpus, ('title', ))
+# print(corpus)
+# corpus = session.query(Node).filter(Node.id == 72771).first()
+# corpus = session.query(Node).filter(Node.id == 73017).first()
+compute_tfidf(corpus)