[MERGE] Testing 2 stable.

0ab4f20a · delanoe · 48c7e541 · 499a52e7 · 0ab4f20a · 0ab4f20a
Commit 0ab4f20a authored Apr 14, 2017 by delanoe
32 changed files
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,12 @@
 * Guided Tour
 * Sources form highlighting crawlers

+
+## Version 3.0.6.8
+* REPEC Crawler (connection with https://multivac.iscpif.fr)
+* HAL Crawler (connection to https://hal.archives-ouvertes.fr/)
+* New Graph Feature: color nodes by growth
+
 ## Version 3.0.6.4
 * COOC SQL improved


--- a/gargantext/constants.py
+++ b/gargantext/constants.py
@@ -181,8 +181,6 @@ def get_tagger(lang):
    return tagger()


-
-
 RESOURCETYPES = [
    {   "type": 1,
        'name': 'Europresse',
@@ -199,7 +197,7 @@ RESOURCETYPES = [
        'crawler': None,
    },
    {   'type': 3,
-        'name': 'Pubmed [XML]',
+        'name': 'Pubmed [CRAWLER/XML]',
        'format': 'Pubmed',
        'parser': "PubmedParser",
        'file_formats':["zip", "xml"],
@@ -235,26 +233,43 @@ RESOURCETYPES = [
        'crawler': None,
    },
    {   'type': 8,
-        'name': 'ISTex',
+        'name': 'ISTex [CRAWLER]',
        'format': 'json',
        'parser': "ISTexParser",
        'file_formats':["zip", "txt"],
        'crawler': None,
    },
   {    "type": 9,
-        "name": 'SCOAP [XML]',
+        "name": 'SCOAP [CRAWLER/XML]',
        "parser": "CernParser",
        "format": 'MARC21',
        'file_formats':["zip","xml"],
        "crawler": "CernCrawler",
   },
+#   {    "type": 10,
+#        "name": 'REPEC [RIS]',
+#        "parser": "RISParser",
+#        "format": 'RIS',
+#        'file_formats':["zip","ris", "txt"],
+#        "crawler": None,
+#   },
+#
   {    "type": 10,
-        "name": 'REPEC [RIS]',
-        "parser": "RISParser",
-        "format": 'RIS',
-        'file_formats':["zip","ris", "txt"],
-        "crawler": None,
+        "name": 'REPEC [CRAWLER]',
+        "parser": "MultivacParser",
+        "format": 'JSON',
+        'file_formats':["zip","json"],
+        "crawler": "MultivacCrawler",
   },
+
+   {    "type": 11,
+        "name": 'HAL [CRAWLER]',
+        "parser": "HalParser",
+        "format": 'JSON',
+        'file_formats':["zip","json"],
+        "crawler": "HalCrawler",
+   },
+
 ]
 #shortcut for resources declaration in template
 PARSERS = [(n["type"],n["name"]) for n in RESOURCETYPES if n["parser"] is not None]

--- a/gargantext/urls.py
+++ b/gargantext/urls.py
@@ -28,19 +28,20 @@ import graph.urls
 import moissonneurs.urls


-urlpatterns = [ url(r'^admin/'     , admin.site.urls                           )
-              , url(r'^api/'       , include( gargantext.views.api.urls )      )
-              , url(r'^'           , include( gargantext.views.pages.urls )    )
+urlpatterns = [ url(r'^admin/'     , admin.site.urls                                   )
+              , url(r'^api/'       , include( gargantext.views.api.urls )              )
+              , url(r'^'           , include( gargantext.views.pages.urls )            )
              , url(r'^favicon.ico$', Redirect.as_view( url=static.url('favicon.ico')
-                                    , permanent=False), name="favicon")
+                                    , permanent=False), name="favicon"                 )

              # Module Graph
-              , url(r'^'           , include( graph.urls )                     )
+              , url(r'^'           , include( graph.urls )                             )

              # Module Annotation
              # tempo: unchanged doc-annotations routes --
-              , url(r'^annotations/', include( annotations_urls )              )
-              , url(r'^projects/(\d+)/corpora/(\d+)/documents/(\d+)/(focus=[0-9,]+)?$', annotations_main_view)
+              , url(r'^annotations/', include( annotations_urls )                      )
+              , url(r'^projects/(\d+)/corpora/(\d+)/documents/(\d+)/(focus=[0-9,]+)?$'
+                                                                , annotations_main_view)

              # Module Scrapers (Moissonneurs in French)
              , url(r'^moissonneurs/'   , include( moissonneurs.urls )                 )

--- a/gargantext/util/crawlers/CERN.py
+++ b/gargantext/util/crawlers/CERN.py
@@ -4,7 +4,7 @@
 # *****  CERN Scrapper    *****
 # ****************************
 # Author:c24b
-# Date: 27/05/2015
+# Date: 27/05/2016
 import hmac, hashlib
 import requests
 import os
@@ -96,10 +96,12 @@ class CernCrawler(Crawler):
        print(self.results_nb, "res")
        #self.generate_urls()
        return(self.ids)
+    
    def generate_urls(self):
        ''' generate raw urls of ONE record'''
        self.urls = ["http://repo.scoap3.org/record/%i/export/xm?ln=en" %rid for rid in self.ids]
        return self.urls
+    
    def fetch_records(self, ids):
        ''' for NEXT time'''
        raise NotImplementedError

--- a/gargantext/util/crawlers/HAL.py
+++ b/gargantext/util/crawlers/HAL.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# ****************************
+# ****  HAL      Scrapper  ***
+# ****************************
+# CNRS COPYRIGHTS
+# SEE LEGAL LICENCE OF GARGANTEXT.ORG
+
+from ._Crawler import *
+import json
+from gargantext.constants  import UPLOAD_DIRECTORY
+from math                  import trunc
+from gargantext.util.files import save
+
+class HalCrawler(Crawler):
+    ''' HAL API CLIENT'''
+    
+    def __init__(self):
+        # Main EndPoints
+        self.BASE_URL = "https://api.archives-ouvertes.fr"
+        self.API_URL  = "search"
+        
+        # Final EndPoints
+        # TODO : Change endpoint according type of database
+        self.URL   = self.BASE_URL + "/" + self.API_URL
+        self.status = []
+
+    def __format_query__(self, query=None):
+        '''formating the query'''
+
+        #search_field="title_t"
+        search_field="abstract_t"
+
+        return (search_field + ":" + "(" + query  + ")")
+
+
+    def _get(self, query, fromPage=1, count=10, lang=None):
+        # Parameters
+
+        fl = """ title_s
+               , abstract_s
+               , submittedDate_s
+               , journalDate_s
+               , authFullName_s
+               , uri_s
+               , isbn_s
+               , issue_s
+               , journalPublisher_s
+             """
+               #, authUrl_s
+               #, type_s
+        
+        wt = "json"
+
+        querystring = { "q"       : query
+                      , "rows"    : count
+                      , "start"   : fromPage
+                      , "fl"      : fl
+                      , "wt"      : wt
+                      }
+        
+        # Specify Headers
+        headers = { "cache-control" : "no-cache" }
+        
+        
+        # Do Request and get response
+        response = requests.request( "GET"
+                                   , self.URL
+                                   , headers = headers
+                                   , params  = querystring
+                                   )
+        
+        #print(querystring)
+        # Validation : 200 if ok else raise Value
+        if response.status_code == 200:
+            charset = ( response.headers["Content-Type"]
+                                .split("; ")[1]
+                                .split("=" )[1]
+                      )
+            return (json.loads(response.content.decode(charset)))
+        else:
+            raise ValueError(response.status_code, response.reason)
+        
+    def scan_results(self, query):
+        '''
+        scan_results : Returns the number of results
+        Query String -> Int
+        '''
+        self.results_nb = 0
+        
+        total = ( self._get(query)
+                      .get("response", {})
+                      .get("numFound"  ,  0)
+                )
+        
+        self.results_nb = total
+
+        return self.results_nb
+
+    def download(self, query):
+        
+        downloaded = False
+        
+        self.status.append("fetching results")
+
+        corpus = []
+        paging = 100
+        self.query_max = self.scan_results(query)
+        #print("self.query_max : %s" % self.query_max)
+
+        if self.query_max > QUERY_SIZE_N_MAX:
+            msg = "Invalid sample size N = %i (max = %i)" % ( self.query_max
+                                                            , QUERY_SIZE_N_MAX
+                                                            )
+            print("ERROR (scrap: Multivac d/l ): " , msg)
+            self.query_max = QUERY_SIZE_N_MAX
+        
+        #for page in range(1, trunc(self.query_max / 100) + 2):
+        for page in range(0, self.query_max, paging):
+            print("Downloading page %s to %s results" % (page, paging))
+            docs = (self._get(query, fromPage=page, count=paging)
+                        .get("response", {})
+                        .get("docs"   , [])
+                   )
+
+            for doc in docs:
+                corpus.append(doc)
+
+        self.path = save( json.dumps(corpus).encode("utf-8")
+                        , name='HAL.json'
+                        , basedir=UPLOAD_DIRECTORY
+                        )
+        downloaded = True
+        
+        return downloaded
--- a/gargantext/util/crawlers/ISTEX.py
+++ b/gargantext/util/crawlers/ISTEX.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# ****************************
+# ****  MULTIVAC Scrapper  ***
+# ****************************
+# CNRS COPYRIGHTS
+# SEE LEGAL LICENCE OF GARGANTEXT.ORG
+
 from ._Crawler import *
 import json


--- a/gargantext/util/crawlers/MULTIVAC.py
+++ b/gargantext/util/crawlers/MULTIVAC.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# ****************************
+# ****  MULTIVAC Scrapper  ***
+# ****************************
+# CNRS COPYRIGHTS
+# SEE LEGAL LICENCE OF GARGANTEXT.ORG
+
+from ._Crawler import *
+import json
+from gargantext.settings   import API_TOKENS
+from gargantext.constants  import UPLOAD_DIRECTORY
+from math                  import trunc
+from gargantext.util.files import save
+
+class MultivacCrawler(Crawler):
+    ''' Multivac API CLIENT'''
+    
+    def __init__(self):
+        self.apikey = API_TOKENS["MULTIVAC"]
+        
+        # Main EndPoints
+        self.BASE_URL = "https://api.iscpif.fr/v2"
+        self.API_URL  = "pvt/economy/repec/search"
+        
+        # Final EndPoints
+        # TODO : Change endpoint according type of database
+        self.URL   = self.BASE_URL + "/" + self.API_URL
+        self.status = []
+
+    def __format_query__(self, query=None):
+        '''formating the query'''
+        None
+
+    def _get(self, query, fromPage=1, count=10, lang=None):
+        # Parameters
+        querystring = { "q"       : query
+                      , "count"   : count
+                      , "from"    : fromPage
+                      , "api_key" : API_TOKENS["MULTIVAC"]["APIKEY"]
+                      }
+        
+        if lang is not None:
+            querystring["lang"] = lang
+        
+        # Specify Headers
+        headers = { "cache-control" : "no-cache" }
+        
+        
+        # Do Request and get response
+        response = requests.request( "GET"
+                                   , self.URL
+                                   , headers = headers
+                                   , params  = querystring
+                                   )
+        
+        #print(querystring)
+        # Validation : 200 if ok else raise Value
+        if response.status_code == 200:
+            charset = ( response.headers["Content-Type"]
+                                .split("; ")[1]
+                                .split("=" )[1]
+                      )
+            return (json.loads(response.content.decode(charset)))
+        else:
+            raise ValueError(response.status_code, response.reason)
+        
+    def scan_results(self, query):
+        '''
+        scan_results : Returns the number of results
+        Query String -> Int
+        '''
+        self.results_nb = 0
+        
+        total = ( self._get(query)
+                      .get("results", {})
+                      .get("total"  ,  0)
+                )
+        
+        self.results_nb = total
+
+        return self.results_nb
+
+    def download(self, query):
+        
+        downloaded = False
+        
+        self.status.append("fetching results")
+
+        corpus = []
+        paging = 100
+        self.query_max = self.scan_results(query)
+        #print("self.query_max : %s" % self.query_max)
+
+        if self.query_max > QUERY_SIZE_N_MAX:
+            msg = "Invalid sample size N = %i (max = %i)" % ( self.query_max
+                                                            , QUERY_SIZE_N_MAX
+                                                            )
+            print("ERROR (scrap: Multivac d/l ): " , msg)
+            self.query_max = QUERY_SIZE_N_MAX
+        
+        for page in range(1, trunc(self.query_max / 100) + 2):
+            print("Downloading page %s to %s results" % (page, paging))
+            docs = (self._get(query, fromPage=page, count=paging)
+                        .get("results", {})
+                        .get("hits"   , [])
+                   )
+
+            for doc in docs:
+                corpus.append(doc)
+
+        self.path = save( json.dumps(corpus).encode("utf-8")
+                        , name='Multivac.json'
+                        , basedir=UPLOAD_DIRECTORY
+                        )
+        downloaded = True
+        
+        return downloaded
--- a/gargantext/util/crawlers/_Crawler.py
+++ b/gargantext/util/crawlers/_Crawler.py
 # Scrapers config
 QUERY_SIZE_N_MAX     = 1000

-from gargantext.constants import get_resource
+from gargantext.constants import get_resource, QUERY_SIZE_N_MAX
 from gargantext.util.scheduling import scheduled
 from gargantext.util.db         import session
 from requests_futures.sessions import FuturesSession
@@ -18,31 +18,34 @@ class Crawler:

        #the name of corpus
        #that will be built in case of internal fileparsing
-        self.record = record
-        self.name = record["corpus_name"]
-        self.project_id = record["project_id"]
-        self.user_id = record["user_id"]
-        self.resource = record["source"]
-        self.type = get_resource(self.resource)
-        self.query = record["query"]
+        self.record       = record
+        self.name         = record["corpus_name"]
+        self.project_id   = record["project_id"]
+        self.user_id      = record["user_id"]
+        self.resource     = record["source"]
+        self.type         = get_resource(self.resource)
+        self.query        = record["query"]
        #format the sampling
        self.n_last_years = 5
-        self.YEAR = date.today().year
+        self.YEAR         = date.today().year
        #pas glop
        # mais easy version
-        self.MONTH = str(date.today().month)
+        self.MONTH        = str(date.today().month)
+        
        if len(self.MONTH) == 1:
            self.MONTH = "0"+self.MONTH
-        self.MAX_RESULTS = 1000
+        
+        self.MAX_RESULTS = QUERY_SIZE_N_MAX
+        
        try:
            self.results_nb = int(record["count"])
        except KeyError:
            #n'existe pas encore
            self.results_nb = 0
        try:
-            self.webEnv = record["webEnv"]
+            self.webEnv   = record["webEnv"]
            self.queryKey = record["queryKey"]
-            self.retMax = record["retMax"]
+            self.retMax   = record["retMax"]
        except KeyError:
            #n'exsite pas encore
            self.queryKey = None
@@ -67,6 +70,7 @@ class Crawler:
        if self.download():
            self.create_corpus()
            return self.corpus_id
+    
    def get_sampling_dates():
        '''Create a sample list of min and max date based on Y and M f*
        or N_LAST_YEARS results'''

--- a/gargantext/util/db.py
+++ b/gargantext/util/db.py
@@ -171,3 +171,6 @@ def bulk_insert_ifnotexists(model, uniquekey, fields, data, cursor=None, do_stat

    cursor.execute('COMMIT WORK;')

+    cursor.close()
+
+
--- a/gargantext/util/generators/credits.py
+++ b/gargantext/util/generators/credits.py
@@ -8,29 +8,12 @@ import random

 _members = [

-    { 'first_name' : 'Constance', 'last_name' : 'de Quatrebarbes',
-     'mail' : '4barbesATgmail.com',
-     'website'  : 'http://c24b.github.io/',
-     'picture' : 'constance.jpg',
-     'role' : 'developer'},
-
    { 'first_name' : 'David', 'last_name' : 'Chavalarias',
     'mail' : 'david.chavalariasATiscpif.fr',
     'website' : 'http://chavalarias.com',
     'picture' : 'david.jpg',
     'role':'principal investigator'},

-   # { 'first_name' : 'Elias', 'last_name' : 'Showk',
-   #  'mail' : '',
-   #  'website' : 'https://github.com/elishowk',
-   #  'picture' : '', 'role' : 'developer'},
-
-    { 'first_name' : 'Mathieu', 'last_name' : 'Rodic',
-     'mail' : '',
-     'website'  : 'http://rodic.fr',
-     'picture' : 'mathieu.jpg',
-     'role' : 'developer'},
-
    { 'first_name' : 'Samuel', 'last_name' : 'Castillo J.',
     'mail' : 'kaisleanATgmail.com',
     'website'  : 'http://www.pksm3.droppages.com',
@@ -43,12 +26,6 @@ _members = [
     'picture' : 'maziyar.jpg',
     'role' : 'developer'},

-    { 'first_name' : 'Romain', 'last_name' : 'Loth',
-     'mail' : '',
-     'website'  : 'http://iscpif.fr',
-     'picture' : 'romain.jpg',
-     'role' : 'developer'},
-
    { 'first_name' : 'Alexandre', 'last_name' : 'Delanoë',
     'mail' : 'alexandre+gargantextATdelanoe.org',
     'website' : 'http://alexandre.delanoe.org',
@@ -59,8 +36,33 @@ _members = [
    # copy-paste the line above and write your informations please
 ]

+_membersPast = [
+    { 'first_name' : 'Constance', 'last_name' : 'de Quatrebarbes',
+     'mail' : '4barbesATgmail.com',
+     'website'  : 'http://c24b.github.io/',
+     'picture' : 'constance.jpg',
+     'role' : 'developer'},
+
+     { 'first_name' : 'Mathieu', 'last_name' : 'Rodic',
+     'mail' : '',
+     'website'  : 'http://rodic.fr',
+     'picture' : 'mathieu.jpg',
+     'role' : 'developer'},
+        
+    { 'first_name' : 'Romain', 'last_name' : 'Loth',
+     'mail' : '',
+     'website'  : 'http://iscpif.fr',
+     'picture' : 'romain.jpg',
+     'role' : 'developer'},
+
+    { 'first_name' : 'Elias', 'last_name' : 'Showk',
+     'mail' : '',
+     'website' : 'https://github.com/elishowk',
+     'picture' : '', 'role' : 'developer'},
+        ]
+
 _institutions = [
-    #{ 'name' : 'Mines ParisTech', 'website' : 'http://mines-paristech.fr', 'picture' : 'mines.png', 'funds':''},
+    { 'name' : 'Mines ParisTech', 'website' : 'http://mines-paristech.fr', 'picture' : 'mines.png', 'funds':''},
    #{ 'name' : 'Institut Pasteur', 'website' : 'http://www.pasteur.fr', 'picture' : 'pasteur.png', 'funds':''},
    { 'name' : 'EHESS', 'website' : 'http://www.ehess.fr', 'picture' : 'ehess.png', 'funds':''},
    #{ 'name' : '', 'website' : '', 'picture' : '', 'funds':''},
@@ -87,6 +89,10 @@ def members():
    random.shuffle(_members)
    return _members

+def membersPast():
+    random.shuffle(_membersPast)
+    return _membersPast
+
 def institutions():
    random.shuffle(_institutions)
    return _institutions

--- a/gargantext/util/parsers/HAL.py
+++ b/gargantext/util/parsers/HAL.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# ****************************
+# ****  HAL Parser    ***
+# ****************************
+# CNRS COPYRIGHTS
+# SEE LEGAL LICENCE OF GARGANTEXT.ORG
+
+from ._Parser import Parser
+from datetime import datetime
+import json
+
+class HalParser(Parser):
+
+    def parse(self, filebuf):
+        '''
+        parse :: FileBuff -> [Hyperdata]
+        '''
+        contents = filebuf.read().decode("UTF-8")
+        data = json.loads(contents)
+        
+        filebuf.close()
+        
+        json_docs = data
+        hyperdata_list = []
+        
+        hyperdata_path = { "id"       : "isbn_s"
+                         , "title"    : "title_s"
+                         , "abstract" : "abstract_s"
+                         , "source"   : "journalPublisher_s"
+                         , "url"      : "uri_s"
+                         , "authors"  : "authFullName_s"
+                         }
+
+        uris = set()
+
+        for doc in json_docs:
+
+            hyperdata = {}
+            
+            for key, path in hyperdata_path.items():
+                    
+                    field = doc.get(path, "NOT FOUND")
+                    if isinstance(field, list):
+                        hyperdata[key] = ", ".join(field)
+                    else:
+                        hyperdata[key] = field
+            
+            if hyperdata["url"] in uris:
+                print("Document already parsed")
+            else:
+                uris.add(hyperdata["url"])
+#            hyperdata["authors"] = ", ".join(
+#                                             [ p.get("person", {})
+#                                                .get("name"  , "")
+#                          
+#                                               for p in doc.get("hasauthor", [])
+#                                             ]
+#                                            )
+#            
+                maybeDate = doc.get("submittedDate_s", None)
+
+                if maybeDate is not None:
+                    date = datetime.strptime(maybeDate, "%Y-%m-%d %H:%M:%S")
+                else:
+                    date = datetime.now()
+
+                hyperdata["publication_date"] = date
+                hyperdata["publication_year"]  = str(date.year)
+                hyperdata["publication_month"] = str(date.month)
+                hyperdata["publication_day"]   = str(date.day)
+                
+                hyperdata_list.append(hyperdata)
+        
+        return hyperdata_list
--- a/gargantext/util/parsers/ISTEX.py
+++ b/gargantext/util/parsers/ISTEX.py
@@ -13,20 +13,21 @@ class ISTexParser(Parser):
        hyperdata_list = []
        hyperdata_path = {
            "id"                : "id",
-            "source"           : 'corpusName',
-            "title"             : 'title',
+            "source"            : "corpusName",
+            "title"             : "title",
            "genre"             : "genre",
-            "language_iso3"     : 'language',
-            "doi"               : 'doi',
-            "host"              : 'host',
-            "publication_date"  : 'publicationDate',
-            "abstract"  : 'abstract',
+            "language_iso3"     : "language",
+            "doi"               : "doi",
+            "host"              : "host",
+            "publication_date"  : "publicationDate",
+            "abstract"          : "abstract",
            # "authors"           : 'author',
-            "authorsRAW"        : 'author',
+            "authorsRAW"        : "author",
            #"keywords"          : "keywords"
        }

        suma = 0
+        
        for json_doc in json_docs:

            hyperdata = {}
@@ -103,7 +104,7 @@ class ISTexParser(Parser):
                    RealDate = RealDate[0]

                # print( RealDate ," | length:",len(RealDate))
-                Decision=""
+                Decision = True
                if len(RealDate)>4:
                    if len(RealDate)>8:
                        try: Decision = datetime.strptime(RealDate, '%Y-%b-%d').date()

--- a/gargantext/util/parsers/MULTIVAC.py
+++ b/gargantext/util/parsers/MULTIVAC.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# ****************************
+# ****  MULTIVAC Parser    ***
+# ****************************
+# CNRS COPYRIGHTS
+# SEE LEGAL LICENCE OF GARGANTEXT.ORG
+
+from ._Parser import Parser
+from datetime import datetime
+import json
+
+class MultivacParser(Parser):
+
+    def parse(self, filebuf):
+        '''
+        parse :: FileBuff -> [Hyperdata]
+        '''
+        contents = filebuf.read().decode("UTF-8")
+        data = json.loads(contents)
+        
+        filebuf.close()
+        
+        json_docs = data
+        hyperdata_list = []
+        
+        hyperdata_path = { "id"       : "id"
+                         , "title"    : "title"
+                         , "abstract" : "abstract"
+                         , "type"     : "type"
+                         }
+        
+        for json_doc in json_docs:
+
+            hyperdata = {}
+            
+            doc = json_doc["_source"]
+
+            for key, path in hyperdata_path.items():
+                    hyperdata[key] = doc.get(path, "")
+            
+            hyperdata["source"] = doc.get("serial"      , {})\
+                                     .get("journaltitle", "REPEC Database")
+            
+            try:
+                hyperdata["url"]    = doc.get("file", {})\
+                                         .get("url" , "")
+            except:
+                pass
+
+            hyperdata["authors"] = ", ".join(
+                                             [ p.get("person", {})
+                                                .get("name"  , "")
+                          
+                                               for p in doc.get("hasauthor", [])
+                                             ]
+                                            )
+            
+
+            year = doc.get("serial"  , {})\
+                      .get("issuedate", None)
+            
+            if year == "Invalide date":
+                year = doc.get("issuedate"  , None)
+
+            if year is None:
+                year = datetime.now()
+            else:
+                try:
+                    date = datetime.strptime(year, '%Y')
+                except:
+                    print("FIX DATE MULTIVAC REPEC %s" % year)
+                    date = datetime.now()
+
+            hyperdata["publication_date"] = date
+            hyperdata["publication_year"]  = str(date.year)
+            hyperdata["publication_month"] = str(date.month)
+            hyperdata["publication_day"]   = str(date.day)
+            
+            hyperdata_list.append(hyperdata)
+        
+        return hyperdata_list
--- a/gargantext/util/parsers/PUBMED.py
+++ b/gargantext/util/parsers/PUBMED.py
@@ -78,7 +78,7 @@ class PubmedParser(Parser):
            if "publication_month" in hyperdata: PubmedDate+=" "+hyperdata["publication_month"]
            if "publication_day" in hyperdata: PubmedDate+=" "+hyperdata["publication_day"]

-            Decision=""
+            Decision=True
            if len(RealDate)>4:
                if len(RealDate)>8:
                    try: Decision = datetime.strptime(RealDate, '%Y %b %d').date()

--- a/gargantext/util/toolchain/metric_tfidf.py
+++ b/gargantext/util/toolchain/metric_tfidf.py
@@ -109,7 +109,7 @@ def compute_occs(corpus, overwrite_id = None, groupings_id = None,):
                    .group_by("counted_form")
                 )

-    #print(str(occs_q))
+    #print(str(occs_q.all()))
    occ_sums = occs_q.all()
    # example result = [(1970, 1.0), (2024, 2.0),  (259, 2.0), (302, 1.0), ... ]
    #                    ^^^^  ^^^
@@ -177,6 +177,7 @@ def compute_ti_ranking(corpus,
      - overwrite_id: optional id of a pre-existing XXXX node for this corpus
                   (the Node and its previous Node NodeNgram rows will be replaced)
    """
+    print("compute_ti_ranking")
    # validate string params
    if count_scope not in ["local","global"]:
        raise ValueError("compute_ti_ranking: count_scope param allowed values: 'local', 'global'")
@@ -189,7 +190,7 @@ def compute_ti_ranking(corpus,
    if type(corpus) == int:
        corpus_id = corpus
        corpus = cache.Node[corpus_id]
-    elif type(corpus) == str and match(r'\d+$', corpus):
+    elif type(corpus) == str and match(r'^\d+$', corpus):
        corpus_id = int(corpus)
        corpus = cache.Node[corpus_id]
    else:
@@ -329,7 +330,7 @@ def compute_ti_ranking(corpus,

    # result
    print("%s : Starting Query tf_nd_query" % t())
-    print(str(tf_nd_query))
+    #print(str(tf_nd_query.all()))
    tf_nd = tf_nd_query.all()
    print("%s : End Query tf_nd_quer" % t())

@@ -371,7 +372,7 @@ def compute_ti_ranking(corpus,
    # TODO 2 release these 2 typenames TFIDF-CORPUS and TFIDF-GLOBAL
    # TODO 3 recreate them elsewhere in their sims (WeightedIndex) version
    # TODO 4 requalify this here as a NodeNgram
-    # then TODO 5 use WeightedList.save() !
+    # TODO 5 use WeightedList.save()

    # reflect that in NodeNodeNgrams
    bulk_insert(
@@ -398,7 +399,8 @@ def compute_tfidf_local(corpus,
      - overwrite_id: optional id of a pre-existing TFIDF-XXXX node for this corpus
                   (the Node and its previous NodeNodeNgram rows will be replaced)
    """
-
+    print("Compute TFIDF local")
+    
    # All docs of this corpus
    docids_subquery = (session
                        .query(Node.id)

--- a/gargantext/util/toolchain/ngram_coocs.py
+++ b/gargantext/util/toolchain/ngram_coocs.py
@@ -3,9 +3,9 @@ COOCS
    (this is the full SQL version, should be more reliable on outerjoin)
 """
 from gargantext                import settings
-from sqlalchemy                import create_engine
+from sqlalchemy                import exc
 from gargantext.util.lists     import WeightedMatrix
-# from gargantext.util.db        import session, aliased, func
+from gargantext.util.db        import get_engine
 from gargantext.util.db_cache  import cache
 from gargantext.constants      import DEFAULT_COOC_THRESHOLD, NODETYPES
 from gargantext.constants      import INDEXED_HYPERDATA
@@ -64,12 +64,7 @@ def compute_coocs(  corpus,
    """

    # 1) prepare direct connection to the DB
-    url = 'postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}:{PORT}/{NAME}'.format(
-            **settings.DATABASES['default']
-        )
-
-    engine = create_engine( url )
-    connection = engine.connect()
+    connection = get_engine().connect()

    # string vars for our SQL query
    # setting work memory high to improve cache perf.
@@ -223,10 +218,19 @@ def compute_coocs(  corpus,
    # 6) EXECUTE QUERY
    # ----------------
    # debug
-    print(final_sql)
+    #print(final_sql)

    # executing the SQL statement
-    results = connection.execute(final_sql)
+    try:
+        # suppose the database has been restarted.
+        results = connection.execute(final_sql)
+        connection.close()
+    except exc.DBAPIError as e:
+        # an exception is raised, Connection is invalidated.
+        if e.connection_invalidated:
+            print("Connection was invalidated for ngram_coocs")
+        else:
+            print(e)

    #  => storage in our matrix structure
    matrix = WeightedMatrix(results)

--- a/gargantext/views/pages/main.py
+++ b/gargantext/views/pages/main.py
@@ -47,7 +47,8 @@ def about(request):
        context = {
            'user': request.user,
            'date': datetime.datetime.now(),
-            'team': credits.members(),
+            'team'    : credits.members(),
+            'teamPast': credits.membersPast(),
            'institutions': credits.institutions(),
            'labos': credits.labs(),
            'grants': credits.grants(),

--- a/graph/graph.py
+++ b/graph/graph.py
@@ -8,6 +8,7 @@ from graph.cooccurrences          import countCooccurrences
 from graph.distances              import clusterByDistances
 from graph.bridgeness             import filterByBridgeness
 from graph.mail_notification      import notify_owner
+from graph.growth                 import compute_growth

 from gargantext.util.scheduling   import scheduled
 from gargantext.constants         import graph_constraints
@@ -64,7 +65,15 @@ def compute_graph( corpus_id=None      , cooc_id=None

        print("GRAPH #%d ... Filtering by bridgeness %d." % (cooc_id, bridgeness))
        data = filterByBridgeness(G,partition,ids,weight,bridgeness,"node_link",field1,field2)
-
+        
+        if start is not None and end is not None:
+            growth= dict()
+            for (ng_id, score) in compute_growth(corpus_id, groupList_id, mapList_id, start, end):
+                growth[ng_id] = float(score) + 100 # for the normalization, should not be negativ
+
+            for node in data['nodes']:
+                node['attributes']['growth'] = growth[node['id']]
+        
        print("GRAPH #%d ... Saving Graph in hyperdata as json." % cooc_id)
        node = session.query(Node).filter(Node.id == cooc_id).first()

@@ -187,7 +196,7 @@ def get_graph( request=None         , corpus=None
                                     )
                                .filter( Start.key == 'publication_date')
                                .filter( Start.value_utc >= date_start_utc)
-                      )
+                            )


    # Filter corpus by date if any end date
@@ -203,8 +212,7 @@ def get_graph( request=None         , corpus=None
                                     )
                                .filter( End.key == 'publication_date')
                                .filter( End.value_utc <= date_end_utc )
-                      )
-
+                            )

    # Finally test if the size of the corpora is big enough
    # --------------------------------
@@ -221,10 +229,11 @@ def get_graph( request=None         , corpus=None
                                   #, limit=size
                                    )

-        return {"state" : "saveOnly",
-                "target_id" : cooc_id,
-                "target_name": cooc_name,
-                "target_date": cooc_date}
+        return { "state"      : "saveOnly"
+               , "target_id"  : cooc_id
+               , "target_name": cooc_name
+               , "target_date": cooc_date
+               }

    elif corpus_size > graph_constraints['corpusMax']:
        # Then compute cooc asynchronously with celery
@@ -262,5 +271,5 @@ def get_graph( request=None         , corpus=None
    if len(data) == 0:
        print("GRAPH #   ... GET_GRAPH: 0 coocs in matrix")
        data = {'nodes':[], 'links':[]}  # empty data
-
+    
    return data
--- a/graph/growth.py
+++ b/graph/growth.py
+"""
+Computes ngram growth on periods
+"""
+
+from gargantext.models   import Node, NodeNgram, NodeNodeNgram, NodeNgramNgram
+from gargantext.util.db_cache  import cache
+from gargantext.util.db  import session, bulk_insert, aliased, \
+                                func, get_engine # = sqlalchemy.func like sum() or count()
+from datetime             import datetime
+
+
+def timeframes(start, end):
+    """
+    timeframes :: String -> String -> (UTCTime, UTCTime, UTCTime)
+    """
+    
+    start = datetime.strptime (str(start), "%Y-%m-%d")
+    end   = datetime.strptime (str(end), "%Y-%m-%d")
+
+    date_0 = start - (end - start)
+    date_1 = start
+    date_2 = end
+
+    return (date_0, date_1, date_2)
+
+
+
+def compute_growth(corpus_id, groupList_id, mapList_id, start, end):
+    """
+    compute_graph :: Int -> UTCTime -> UTCTime -> Int -> Int 
+                   -> [(Int, Numeric)]
+    
+    this function uses SQL function in 
+    /srv/gargantext/install/gargamelle/sqlFunctions.sql
+
+    First compute occurrences of ngrams in mapList (with groups) on the first
+    period, then on the second and finally returns growth.
+
+    Directly computed with Postgres Database (C) for optimization.
+    """
+    connection = get_engine()
+    
+    (date_0, date_1, date_2) = timeframes(start, end)
+    
+    query = """SELECT * FROM OCC_HIST( {corpus_id}
+                                     , {groupList_id}
+                                     , {mapList_id}
+                                     , '{date_0}'
+                                     , '{date_1}'
+                                     , '{date_2}'
+                                     )
+            """.format( corpus_id    = corpus_id
+                      , groupList_id = groupList_id
+                      , mapList_id   = mapList_id
+                      , date_0       = date_0
+                      , date_1       = date_1
+                      , date_2       = date_2
+                      )
+    return(connection.execute(query))
+
+
--- a/graph/utils.py
+++ b/graph/utils.py
@@ -19,6 +19,8 @@ def compress_graph(graphdata):
    for node in graphdata['nodes']:
        node['lb'] = node['label']
        del node['label']
+        
+        #node['attributes']['growth'] = 0.8

        node['at'] = node['attributes']
        del node['attributes']

--- a/install/gargamelle/Debian.sh
+++ b/install/gargamelle/Debian.sh
@@ -5,13 +5,10 @@ apt-get install -y \
 apt-utils ca-certificates locales \
 sudo aptitude gcc g++ wget git vim \
 build-essential make \
-postgresql-9.5 postgresql-client-9.5 postgresql-contrib-9.5 \
-postgresql-server-dev-9.5 libpq-dev libxml2 \
-postgresql-9.5 postgresql-client-9.5 postgresql-contrib-9.5 \
-nginx rabbitmq-server 
-
-# WARNING: uwsgi is not on stretch any more (get it from unstable)
-# uwsgi uwsgi-core uwsgi-plugin-python3
+postgresql-9.6 postgresql-client-9.6 postgresql-contrib-9.6 \
+postgresql-server-dev-9.6 libpq-dev libxml2 \
+postgresql-9.6 postgresql-client-9.6 postgresql-contrib-9.6 \
+nginx rabbitmq-server uwsgi uwsgi-core uwsgi-plugin-python3


 ### Configure timezone and locale
@@ -32,15 +29,15 @@ update-locale LC_ALL=fr_FR.UTF-8
 ### Install main dependencies and python packages based on Debian distrib
 echo "############# PYTHON DEPENDENCIES ###############"
 apt-get update && apt-get install -y \
-  libxml2-dev xml-core libgfortran-5-dev \
+  libxml2-dev xml-core libgfortran-6-dev \
  libpq-dev \
  python3.5 \
  python3-dev \
  python3-six python3-numpy python3-setuptools \
  python3-numexpr \
  python3-pip \
-  libxml2-dev libxslt-dev
-  #libxslt1-dev zlib1g-dev
+  libxml2-dev libxslt-dev zlib1g-dev
+  #libxslt1-dev
 
 UPDATE AND CLEAN
 apt-get update && apt-get autoclean
@@ -70,7 +67,7 @@ update-locale LC_ALL=fr_FR.UTF-8
 ## POSTGRESQL DATA (as ROOT)
 #######################################################################

-sed -iP "s%^data_directory.*%data_directory = \'\/srv\/gargandata\'%" /etc/postgresql/9.5/main/postgresql.conf
-echo "host all  all    0.0.0.0/0  md5" >> /etc/postgresql/9.5/main/pg_hba.conf
-echo "listen_addresses='*'" >> /etc/postgresql/9.5/main/postgresql.conf
+sed -iP "s%^data_directory.*%data_directory = \'\/srv\/gargandata\'%" /etc/postgresql/9.6/main/postgresql.conf
+echo "host all  all    0.0.0.0/0  md5" >> /etc/postgresql/9.6/main/pg_hba.conf
+echo "listen_addresses='*'" >> /etc/postgresql/9.6/main/postgresql.conf

--- a/install/gargamelle/psqlFunctions.sql
+++ b/install/gargamelle/psqlFunctions.sql
+-- CNRS Copyrights 2017
+-- See Gargantext Licence for details
+-- Maintainers: team@gargantext.org
+
+
+-- USAGE
+-- psql gargandb < occ_growth.sql
+
+-- OCC_HIST :: Corpus.id -> GroupList.id -> MapList.id -> Start -> EndFirst -> EndLast
+-- EXEMPLE USAGE 
+--    SELECT * FROM OCC_HIST(182856, 183859, 183866, '1800-03-15 17:00:00+01', '2000-03-15 17:00:00+01', '2017-03-15 17:00:00+01')
+
+
+-- OCC_HIST_PART :: Corpus.id -> GroupList.id -> Start -> End
+DROP FUNCTION OCC_HIST_PART(integer, integer, timestamp without time zone, timestamp without time zone);
+-- DROP for tests
+CREATE OR REPLACE FUNCTION OCC_HIST_PART(int, int, timestamp, timestamp) RETURNS TABLE (ng_id int, score float8) 
+    AS $$
+-- EXPLAIN ANALYZE
+    SELECT 
+    COALESCE(gr.ngram1_id, ng1.ngram_id) as ng_id,
+    SUM(ng1.weight) as score
+
+    from nodes n
+    
+    -- BEFORE
+    INNER JOIN nodes as n1 ON n1.id = n.id
+
+    INNER JOIN nodes_ngrams ng1 ON ng1.node_id = n1.id
+
+    -- Limit with timestamps: ]start, end]
+    INNER JOIN nodes_hyperdata nh1 ON nh1.node_id = n1.id
+                                  AND nh1.value_utc >  $3
+                                  AND nh1.value_utc <= $4
+
+    -- Group List
+    LEFT JOIN  nodes_ngrams_ngrams gr ON ng1.ngram_id = gr.ngram2_id
+                               AND gr.node_id = $2
+
+    WHERE
+        n.typename  = 4
+    AND n.parent_id = $1
+    GROUP BY 1
+    $$
+LANGUAGE SQL;
+
+DROP FUNCTION OCC_HIST(integer, integer, integer, timestamp without time zone, timestamp without time zone, timestamp without time zone);
+-- OCC_HIST :: Corpus.id -> GroupList.id -> MapList.id -> Start -> EndFirst -> EndLast
+CREATE OR REPLACE FUNCTION OCC_HIST(int, int, int, timestamp, timestamp, timestamp) RETURNS TABLE (ng_id int, score numeric) 
+    AS $$
+    WITH OCC1 as (SELECT * from OCC_HIST_PART($1, $2, $4, $5))
+       , OCC2 as (SELECT * from OCC_HIST_PART($1, $2, $5, $6))
+       , GROWTH as (SELECT ml.ngram_id as ngram_id
+                 , COALESCE(OCC1.score, null) as score1 
+                 , COALESCE(OCC2.score, null) as score2
+                    FROM nodes_ngrams ml
+                        LEFT JOIN OCC1 ON OCC1.ng_id = ml.ngram_id
+                        LEFT JOIN OCC2 ON OCC2.ng_id = ml.ngram_id
+                    WHERE ml.node_id = $3
+                    ORDER by score2 DESC)
+    SELECT ngram_id, COALESCE(ROUND(CAST((100 * (score2 - score1) / COALESCE((score2 + score1), 1)) as numeric), 2), 0) from GROWTH
+    $$
+LANGUAGE SQL;
+
+
+-- BEHAVIORAL TEST (should be equal to occ in terms table)
+--    WITH OCC as (SELECT * from OCC_HIST(182856, 183859, '1800-03-15 17:00:00+01', '2300-03-15 17:00:00+01'))
+--    SELECT ng_id, score from OCC 
+--            INNER JOIN nodes_ngrams ml on ml.ngram_id = ng_id
+--                                      AND ml.node_id = 183866
+--            ORDER BY score DESC;
--- a/install/gargamelle/psql_configure.sh
+++ b/install/gargamelle/psql_configure.sh
@@ -12,12 +12,12 @@ echo "::::: POSTGRESQL :::::"
 su postgres -c 'pg_dropcluster 9.4 main --stop'
 #done in docker but redoing it
 rm -rf /srv/gargandata && mkdir /srv/gargandata && chown postgres:postgres /srv/gargandata
-su postgres -c '/usr/lib/postgresql/9.5/bin/initdb -D /srv/gargandata/'
-su postgres -c '/usr/lib/postgresql/9.5/bin/pg_ctl -D /srv/gargandata/ -l /srv/gargandata/journal_applicatif start'
+su postgres -c '/usr/lib/postgresql/9.6/bin/initdb -D /srv/gargandata/'
+su postgres -c '/usr/lib/postgresql/9.6/bin/pg_ctl -D /srv/gargandata/ -l /srv/gargandata/journal_applicatif start'

-su postgres -c 'pg_createcluster -D /srv/gargandata 9.5 main '
-su postgres -c 'pg_ctlcluster -D /srv/gargandata 9.5 main start '
-su postgres -c 'pg_ctlcluster 9.5 main start'
+su postgres -c 'pg_createcluster -D /srv/gargandata 9.6 main '
+su postgres -c 'pg_ctlcluster -D /srv/gargandata 9.6 main start '
+su postgres -c 'pg_ctlcluster 9.6 main start'

 service postgresql start


--- a/moissonneurs/hal.py
+++ b/moissonneurs/hal.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# ****************************
+# ***** HAL Crawler *****
+# ****************************
+# LICENCE: GARGANTEXT.org Licence
+
+RESOURCE_TYPE_HAL = 11
+
+from django.shortcuts               import redirect, render
+from django.http                    import Http404, HttpResponseRedirect \
+                                                  , HttpResponseForbidden
+
+from gargantext.constants           import get_resource, load_crawler, QUERY_SIZE_N_MAX
+from gargantext.models.nodes        import Node
+from gargantext.util.db             import session
+from gargantext.util.db_cache       import cache
+from gargantext.util.http           import JsonHttpResponse
+from gargantext.util.scheduling     import scheduled
+from gargantext.util.toolchain      import parse_extract_indexhyperdata
+
+
+def query( request):
+    '''get GlobalResults()'''
+    if request.method == "POST":
+        query = request.POST["query"]
+        source = get_resource(RESOURCE_TYPE_HAL)
+        if source["crawler"] is not None:
+            crawlerbot = load_crawler(source)()
+            #old raw way to get results_nb
+            results = crawlerbot.scan_results(query)
+            #ids = crawlerbot.get_ids(query)
+            print(results)
+            return JsonHttpResponse({"results_nb":crawlerbot.results_nb})
+
+def save(request, project_id):
+    '''save'''
+    if request.method == "POST":
+
+        query = request.POST.get("query")
+        try:
+            N = int(request.POST.get("N"))
+        except:
+            N = 0
+        print(query, N)
+        #for next time
+        #ids = request.POST["ids"]
+        source = get_resource(RESOURCE_TYPE_HAL)
+        if N == 0:
+            raise Http404()
+        if N > QUERY_SIZE_N_MAX:
+            N = QUERY_SIZE_N_MAX
+
+        try:
+            project_id = int(project_id)
+        except ValueError:
+            raise Http404()
+        # do we have a valid project?
+        project = session.query( Node ).filter(Node.id == project_id).first()
+        if project is None:
+            raise Http404()
+        user = cache.User[request.user.id]
+        if not user.owns(project):
+            return HttpResponseForbidden()
+        # corpus node instanciation as a Django model
+
+        corpus = Node(
+            name = query,
+            user_id = request.user.id,
+            parent_id = project_id,
+            typename = 'CORPUS',
+                        hyperdata    = { "action"        : "Scrapping data"
+                                        }
+        )
+
+        #download_file
+        crawler_bot = load_crawler(source)()
+        #for now no way to force downloading X records
+
+        #the long running command
+        filename = crawler_bot.download(query)
+        corpus.add_resource(
+           type = source["type"]
+        #,  name = source["name"]
+        ,  path = crawler_bot.path
+                           )
+
+        session.add(corpus)
+        session.commit()
+        #corpus_id = corpus.id
+
+        try:
+            scheduled(parse_extract_indexhyperdata)(corpus.id)
+        except Exception as error:
+            print('WORKFLOW ERROR')
+            print(error)
+            try:
+                print_tb(error.__traceback__)
+            except:
+                pass
+            # IMPORTANT ---------------------------------
+            # sanitize session after interrupted transact
+            session.rollback()
+            # --------------------------------------------
+
+        return render(
+            template_name = 'pages/projects/wait.html',
+            request = request,
+            context = {
+                'user'   : request.user,
+                'project': project,
+            },
+        )
+
+
+    data = [query_string,query,N]
+    print(data)
+    return JsonHttpResponse(data)
+
--- a/moissonneurs/multivac.py
+++ b/moissonneurs/multivac.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# ****************************
+# ***** MULTIVAC Crawler *****
+# ****************************
+# LICENCE: GARGANTEXT.org Licence
+
+RESOURCE_TYPE_MULTIVAC = 10
+
+from django.shortcuts import redirect, render
+from django.http import Http404, HttpResponseRedirect, HttpResponseForbidden
+
+from gargantext.constants           import get_resource, load_crawler, QUERY_SIZE_N_MAX
+from gargantext.models.nodes        import Node
+from gargantext.util.db             import session
+from gargantext.util.db_cache       import cache
+from gargantext.util.http           import JsonHttpResponse
+from gargantext.util.scheduling     import scheduled
+from gargantext.util.toolchain      import parse_extract_indexhyperdata
+
+
+
+def query( request):
+    '''get GlobalResults()'''
+    if request.method == "POST":
+        query = request.POST["query"]
+        source = get_resource(RESOURCE_TYPE_MULTIVAC)
+        if source["crawler"] is not None:
+            crawlerbot = load_crawler(source)()
+            #old raw way to get results_nb
+            results = crawlerbot.scan_results(query)
+            #ids = crawlerbot.get_ids(query)
+            print(results)
+            return JsonHttpResponse({"results_nb":crawlerbot.results_nb})
+
+def save(request, project_id):
+    '''save'''
+    if request.method == "POST":
+
+        query = request.POST.get("query")
+        try:
+            N = int(request.POST.get("N"))
+        except:
+            N = 0
+        print(query, N)
+        #for next time
+        #ids = request.POST["ids"]
+        source = get_resource(RESOURCE_TYPE_MULTIVAC)
+        if N == 0:
+            raise Http404()
+        if N > QUERY_SIZE_N_MAX:
+            N = QUERY_SIZE_N_MAX
+
+        try:
+            project_id = int(project_id)
+        except ValueError:
+            raise Http404()
+        # do we have a valid project?
+        project = session.query( Node ).filter(Node.id == project_id).first()
+        if project is None:
+            raise Http404()
+        user = cache.User[request.user.id]
+        if not user.owns(project):
+            return HttpResponseForbidden()
+        # corpus node instanciation as a Django model
+
+        corpus = Node(
+            name = query,
+            user_id = request.user.id,
+            parent_id = project_id,
+            typename = 'CORPUS',
+                        hyperdata    = { "action"        : "Scrapping data"
+                                        , "language_id" : "en"
+                                        }
+        )
+
+        #download_file
+        crawler_bot = load_crawler(source)()
+        #for now no way to force downloading X records
+
+        #the long running command
+        filename = crawler_bot.download(query)
+        corpus.add_resource(
+           type = source["type"]
+        #,  name = source["name"]
+        ,  path = crawler_bot.path
+                           )
+
+        session.add(corpus)
+        session.commit()
+        #corpus_id = corpus.id
+
+        try:
+            scheduled(parse_extract_indexhyperdata)(corpus.id)
+        except Exception as error:
+            print('WORKFLOW ERROR')
+            print(error)
+            try:
+                print_tb(error.__traceback__)
+            except:
+                pass
+            # IMPORTANT ---------------------------------
+            # sanitize session after interrupted transact
+            session.rollback()
+            # --------------------------------------------
+
+        return render(
+            template_name = 'pages/projects/wait.html',
+            request = request,
+            context = {
+                'user'   : request.user,
+                'project': project,
+            },
+        )
+
+
+    data = [query_string,query,N]
+    print(data)
+    return JsonHttpResponse(data)
+
--- a/moissonneurs/urls.py
+++ b/moissonneurs/urls.py
@@ -18,24 +18,31 @@

 from django.conf.urls import url

-import moissonneurs.pubmed as pubmed
-import moissonneurs.istex  as istex
-import moissonneurs.cern  as cern
+import moissonneurs.pubmed   as pubmed
+import moissonneurs.istex    as istex
+import moissonneurs.cern     as cern
+import moissonneurs.multivac as multivac
+import moissonneurs.hal      as hal

-# TODO
-#import moissonneurs.hal         as hal
-#import moissonneurs.revuesOrg   as revuesOrg
-
-
-# TODO ?
-# REST API for the moissonneurs
+# TODO : ISIDORE

 # /!\ urls patterns here are *without* the trailing slash
-urlpatterns = [ url(r'^pubmed/query$'     , pubmed.query    )
-              , url(r'^pubmed/save/(\d+)' , pubmed.save     )
-
-              , url(r'^istex/query$'      , istex.query     )
-              , url(r'^istex/save/(\d+)'  , istex.save      )
-              , url(r'^cern/query$'      , cern.query       )
-              , url(r'^cern/save/(\d+)'  , cern.save        )
+urlpatterns = [ url(r'^pubmed/query$'       , pubmed.query   )
+              , url(r'^pubmed/save/(\d+)'   , pubmed.save    )
+
+              , url(r'^istex/query$'        , istex.query    )
+              , url(r'^istex/save/(\d+)'    , istex.save     )
+              
+              , url(r'^cern/query$'         , cern.query     )
+              , url(r'^cern/save/(\d+)'     , cern.save      )
+              
+              , url(r'^multivac/query$'     , multivac.query )
+              , url(r'^multivac/save/(\d+)' , multivac.save  )
+
+              , url(r'^hal/query$'          , hal.query      )
+              , url(r'^hal/save/(\d+)'      , hal.save       )
+
+             #, url(r'^isidore/query$'      , isidore.query  )
+             #, url(r'^isidore/save/(\d+)'  , isidore.save   )
+              
              ]
--- a/templates/pages/main/about.html
+++ b/templates/pages/main/about.html
@@ -183,9 +183,55 @@
                        </div>
                    </div>
                </div>
+        {% endif %}
+
+        {% if teamPast %}
+        <div class="panel panel-default">
+            <div class="panel-heading">
+                <h2 class="panel-title">
+                    <a data-toggle="collapse" data-parent="#accordion" href="#collapseTeamPast">
+                        <center>
+                            <h2>
+                                <span class="glyphicon glyphicon-question-sign" aria-hidden="true"></span>
+                                Former Developers
+                                <span class="glyphicon glyphicon-question-sign" aria-hidden="true"></span>
+                            </h2>
+                        </center>
+                    </a>
+                </h2>
+            </div>
+            <div id="collapseTeamPast" class="panel-collapse collapse" role="tabpanel">
+                <div class="panel-body">
+                    <div class="container">
+                        <div class="row">
+                            <div class="thumbnails">
+                                {% for member in teamPast %}
+                                <div class="col-md-5 ">
+                                    <div class="thumbnail">
+                                        <div class="caption">
+                                            <center>
+                                            <h3>{{ member.first_name }} {{member.last_name }}</h3>
+                                            {% if member.role %}
+                                            <p class="description">{{ member.role }}</p>
+                                            {% endif %}
+                                            </center>
+                                        </div>
+                                    </div>
+                                </div>
+                                {% endfor %}
+                            </div>
+                        </div>
+                    </div>
+                </div>
            </div>
        </div>
        {% endif %}
+            
+            </div>
+        </div>
+
+
+

        <div class="panel panel-default">
            <div class="panel-heading">

--- a/templates/pages/menu.html
+++ b/templates/pages/menu.html
@@ -367,7 +367,7 @@
            <p>
                Gargantext
                <span class="glyphicon glyphicon-registration-mark" aria-hidden="true"></span>
-                , version 3.0.6.6,
+                , version 3.0.6.8,
                <a href="http://www.cnrs.fr" target="blank" title="Institution that enables this project.">
                    Copyrights
                    <span class="glyphicon glyphicon-copyright-mark" aria-hidden="true"></span>

--- a/templates/pages/projects/modals.tpl
+++ b/templates/pages/projects/modals.tpl
@@ -86,12 +86,12 @@
        <button type="button" class="close" data-dismiss="modal" aria-label="Close">
          <span aria-hidden="true">&times;</span>
        </button>
-    <h2 class="modal-title"><h2><span class="glyphicon glyphicon-info-sign" aria-hidden="true"></span>  Uploading corpus...</h2>
+    <h2 class="modal-title"><h2><span class="glyphicon glyphicon-info-sign" aria-hidden="true"></span>Building corpus...</h2>
  </div>
  <div class="modal-body">
    <h5>
-    Your file has been uploaded !
-    Gargantext need some time to eat it.
+    Gargantext is gathering your texts
+      and need some time to eat it.
    Duration depends on the size of the dish.
  </h5>
  </div>

--- a/templates/pages/projects/moissonneurs.js
+++ b/templates/pages/projects/moissonneurs.js
@@ -209,9 +209,11 @@
  function CustomForSelect( selected ) {
      // show Radio-Inputs and trigger FileOrNotFile>@upload-file events
      selected = selected.toLowerCase()
-      var is_pubmed = (selected.indexOf('pubmed') != -1);
-      var is_istex = (selected.indexOf('istex') != -1);
-      if (is_pubmed || is_istex) {
+      var is_pubmed = (selected.indexOf('pubmed')  != -1);
+      var is_istex  = (selected.indexOf('istex' )  != -1);
+      var is_repec  = (selected.indexOf('repec' )  != -1);
+      
+      if (is_pubmed || is_istex || is_repec) {
          // if(selected=="pubmed") {
          console.log("show the button for: " + selected)
          $("#pubmedcrawl").css("visibility", "visible");

--- a/templates/pages/projects/project.html
+++ b/templates/pages/projects/project.html
--- a/templates/pages/projects/wait.html
+++ b/templates/pages/projects/wait.html
@@ -199,12 +199,12 @@
 					<button type="button" class="close" data-dismiss="modal" aria-label="Close">
 						<span aria-hidden="true">&times;</span>
 					</button>
-			<h2 class="modal-title"><h2><span class="glyphicon glyphicon-info-sign" aria-hidden="true"></span>  Uploading corpus...</h2>
+			<h2 class="modal-title"><h2><span class="glyphicon glyphicon-info-sign" aria-hidden="true"></span>Building the corpus...</h2>
 		</div>
 		<div class="modal-body">
 			<p>
-			Your file has been uploaded !
-			Gargantext need some time to eat it.
+			Gargantext is gathering your texts
+			 and need some time to eat it.
 			Duration depends on the size of the dish.
 			</p>
 		</div>