From fc15b8bc286141fd7fbd6ec068412c4ed87c95ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alexandre=20Delano=C3=AB?= <devel+git@delanoe.org>
Date: Mon, 7 Aug 2017 07:13:24 +0200
Subject: [PATCH] [FEAT] need to fix the crawler.

---
 AdvancedTutorial.ipynb                  | 764 ------------------------
 gargantext/util/crawlers/HAL.py         |   2 +-
 gargantext/util/parsers/HAL.py          |  21 +-
 install/notebook.run                    |   2 +-
 install/notebook/gargantext_notebook.py |  54 +-
 5 files changed, 64 insertions(+), 779 deletions(-)
 delete mode 100644 AdvancedTutorial.ipynb

diff --git a/AdvancedTutorial.ipynb b/AdvancedTutorial.ipynb
deleted file mode 100644
index c453e873..00000000
--- a/AdvancedTutorial.ipynb
+++ /dev/null
@@ -1,764 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Advanced Gargantext Tutorial (Python)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "# First import the library Gargantext Notebook\n",
-    "from gargantext_notebook import *\n",
-    "\n",
-    "# This enables to draw graphics later\n",
-    "%matplotlib inline  "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "L'identifiant du corpus est : 254749\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Copier/coller l'url du corpus (avec http://): sur lequel travailler\n",
-    "corpus_url = \"http://gargantext.org/projects/251737/corpora/254749\"\n",
-    "\n",
-    "corpus_id = corpus_url.split(\"/\")[6]\n",
-    "\n",
-    "print(\"L\\'identifiant du corpus est : %s\" % corpus_id)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "# To get all the documents:\n",
-    "docs = documents(corpus_id)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'Towards big data science in the decade ahead from ten years of InCoB and the 1st ISCB-Asia Joint Conference.'"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# To get the title of the first document \n",
-    "# [0] indicates the index of the first document\n",
-    "docs[0].hyperdata['title']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "\"The 2011 International Conference on Bioinformatics (InCoB) conference, which is the annual scientific conference of the Asia-Pacific Bioinformatics Network (APBioNet), is hosted by Kuala Lumpur, Malaysia, is co-organized with the first ISCB-Asia conference of the International Society for Computational Biology (ISCB). InCoB and the sequencing of the human genome are both celebrating their tenth anniversaries and InCoB's goalposts for the next decade, implementing standards in bioinformatics and globally distributed computational networks, will be discussed and adopted at this conference. Of the 49 manuscripts (selected from 104 submissions) accepted to BMC Genomics and BMC Bioinformatics conference supplements, 24 are featured in this issue, covering software tools, genome/proteome analysis, systems biology (networks, pathways, bioimaging) and drug discovery and design.\""
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# To get the abstract of the first document (0)\n",
-    "docs[0].hyperdata['abstract']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'Shoba Ranganathan, Christian Schönbach, Janet Kelso, Burkhard Rost, Sheila Nathan, Tin Wee Tan'"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# To get the authors of the first document (0)\n",
-    "docs[0].hyperdata['authors']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'BMC bioinformatics'"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# To get the source of the first document (0)\n",
-    "docs[0].hyperdata['source']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "# If I want to count:\n",
-    "myChart = chart(docs, \"publication_year\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<matplotlib.axes._subplots.AxesSubplot at 0x7fc48a3da128>"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    },
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEZCAYAAACZwO5kAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGjxJREFUeJzt3X2QVfWd5/H3hwfFqAOKTRdFo40JGs2KpNMxWI55Ij5b\ngUrUgWwJWsyw2Ug0OpuxZ3drU1a5U6Q2JaNx1g0JiWhNRMNEYRNNIEYnGV1UJO0jUTuKoSmFFpEo\nPvHw3T/uD3JlgHsvfS+n74/Pq6rrnvM7v3Pu9946/enTv3vOPYoIzMwsX4OKLsDMzBrLQW9mljkH\nvZlZ5hz0ZmaZc9CbmWXOQW9mlrmqgl7S1ZKekfS0pDskDZM0TtIjknok3SnpkNT30DTfk5a3N/IF\nmJnZvlUMekljgCuBzoj4D8BgYBrwbWBeRHwE2ATMSqvMAjal9nmpn5mZFaTaoZshwGGShgAfAl4B\nPg8sTssXAlPT9JQ0T1o+WZLqU66ZmdVqSKUOEbFO0neAPwLvAMuAx4E3ImJb6tYLjEnTY4C1ad1t\nkjYDI4HX9vYcxxxzTLS3t+/vazAzOyg9/vjjr0VES6V+FYNe0lGUjtLHAW8APwHO7W+BkmYDswGO\nPfZYVq5c2d9NmpkdVCS9XE2/aoZuvgC8FBF9EbEV+ClwBjAiDeUAtAHr0vQ6YGwqYggwHNi4+0Yj\nYn5EdEZEZ0tLxT9IZma2n6oJ+j8CkyR9KI21TwaeBR4ALkp9ZgJL0vTSNE9a/uvwN6eZmRWmYtBH\nxCOUPlRdBTyV1pkPXAtcI6mH0hj8grTKAmBkar8G6GpA3WZmViUNhIPtzs7O2H2MfuvWrfT29vLu\nu+8WVFW+hg0bRltbG0OHDi26FDPrB0mPR0RnpX4VP4wtSm9vL0ceeSTt7e347Mz6iQg2btxIb28v\n48aNK7ocMzsABuxXILz77ruMHDnSIV9nkhg5cqT/UzI7iAzYoAcc8g3i99Xs4DKgg97MzPpvwI7R\n76696+d13d6auRdU7DN48GBOOeUUtm7dypAhQ5gxYwZXX301gwbt/e/jmjVrePjhh/nKV76yz20f\nf/zx3HfffZx44om72r7xjW8wevRorr322r1u+8ILL+Tpp5+uWLvZQFTv3+PdVfN7fTDyEf0+HHbY\nYXR3d/PMM8+wfPly7rvvPq677rp9rrNmzRp+/OMfV9z2tGnTWLRo0a75HTt2sHjxYqZNm9bvus3M\nyjnoqzRq1Cjmz5/PzTffTESwZs0azjzzTDo6Oujo6ODhhx8GoKuri9/+9rdMnDiRefPmsX37dr75\nzW/yyU9+kgkTJvC9730PgOnTp3PnnXfu2v5vfvMbjjvuOI477ri9brvcrbfeypw5c3bNX3jhhTz4\n4IMALFu2jNNPP52Ojg4uvvhi3nrrrQa+M2Y20Dnoa3D88cezfft2NmzYwKhRo1i+fDmrVq3izjvv\n5MorrwRg7ty5nHnmmXR3d3P11VezYMEChg8fzmOPPcZjjz3G97//fV566SVOOeUUBg0axBNPPAHA\nokWLmD59OsBet12N1157jeuvv55f/epXrFq1is7OTm644Yb6vxlm1jSaZox+oNm6dStz5syhu7ub\nwYMH8/zzz++x37Jly3jyySdZvLj0jc6bN2/mhRdeYNy4cUyfPp1FixbxsY99jHvuuWfXsFC1296T\nFStW8Oyzz3LGGWcA8P7773P66af389WaWTNz0NfgxRdfZPDgwYwaNYrrrruO1tZWnnjiCXbs2MGw\nYcP2uE5E8N3vfpdzzjnn3y2bNm0aZ599Np/5zGeYMGECra2tAMybN6/itocMGcKOHTt2ze88Lz4i\nOOuss7jjjjvq8ZLNLAMeuqlSX18fX/3qV5kzZw6S2Lx5M6NHj2bQoEHcfvvtbN++HYAjjzySN998\nc9d655xzDrfccgtbt24F4Pnnn2fLli0AfPjDH+aYY46hq6tr17ANsNdtl2tvb6e7u5sdO3awdu1a\nHn30UQAmTZrEQw89RE9PDwBbtmyp6T8CM8tP0xzRF3Ha1DvvvMPEiRN3nV556aWXcs011wDwta99\njS9/+cvcdtttnHvuuRx++OEATJgwgcGDB3Pqqady2WWXcdVVV7FmzRo6OjqICFpaWrjnnnt2Pcf0\n6dPp6uriS1/60q62vW273BlnnMG4ceM4+eSTOemkk+jo6ACgpaWFW2+9lenTp/Pee+8BcP3113PC\nCSc07H0ys4FtwH6p2erVqznppJMKqih/fn+tCD6Pvr6q/VIzD92YmWXOQW9mlrkBHfQDYVgpR35f\nzQ4uAzbohw0bxsaNGx1Kdbbz++j3djqomeVnwJ5109bWRm9vL319fUWXkp2dd5gys4NDxaCXdCJw\nZ1nT8cD/AG5L7e3AGuCSiNiUbiB+I3A+8DZwWUSsqrWwoUOH+g5IZmZ1UM3NwZ+LiIkRMRH4BKXw\nvpvSTb/vj4jxwP38+Sbg5wHj089s4JZGFG5mZtWpdYx+MvCHiHgZmAIsTO0LgalpegpwW5SsAEZI\nGl2Xas3MrGa1Bv00YOeXqLRGxCtp+lWgNU2PAdaWrdOb2szMrABVB72kQ4AvAj/ZfVmUTo2p6fQY\nSbMlrZS00h+4mpk1Ti1H9OcBqyJifZpfv3NIJj1uSO3rgLFl67Wltg+IiPkR0RkRnS0tLbVXbmZm\nVakl6Kfz52EbgKXAzDQ9E1hS1j5DJZOAzWVDPGZmdoBVdR69pMOBs4D/VNY8F7hL0izgZeCS1H4v\npVMreyidoXN53ao1M7OaVRX0EbEFGLlb20ZKZ+Hs3jeAK+pSnZmZ9duA/QoEMzOrDwe9mVnmHPRm\nZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9\nmVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZpmrKugljZC0WNLvJa2WdLqkoyUtl/RCejwq9ZWk\nmyT1SHpSUkdjX4KZme1LtUf0NwK/iIiPAqcCq4Eu4P6IGA/cn+YBzgPGp5/ZwC11rdjMzGpSMegl\nDQc+DSwAiIj3I+INYAqwMHVbCExN01OA26JkBTBC0ui6V25mZlWp5oh+HNAH/EjS7yT9QNLhQGtE\nvJL6vAq0pukxwNqy9XtT2wdImi1ppaSVfX19+/8KzMxsn6oJ+iFAB3BLRHwc2MKfh2kAiIgAopYn\njoj5EdEZEZ0tLS21rGpmZjWoJuh7gd6IeCTNL6YU/Ot3Dsmkxw1p+TpgbNn6banNzMwKUDHoI+JV\nYK2kE1PTZOBZYCkwM7XNBJak6aXAjHT2zSRgc9kQj5mZHWBDquz3deCfJR0CvAhcTumPxF2SZgEv\nA5ekvvcC5wM9wNupr5mZFaSqoI+IbqBzD4sm76FvAFf0sy4zM6sTXxlrZpY5B72ZWeYc9GZmmXPQ\nm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc\n9GZmmXPQm5llzkFvZpY5B72ZWeaqCnpJayQ9Jalb0srUdrSk5ZJeSI9HpXZJuklSj6QnJXU08gWY\nmdm+1XJE/7mImBgRO+8d2wXcHxHjgfvTPMB5wPj0Mxu4pV7FmplZ7fozdDMFWJimFwJTy9pvi5IV\nwAhJo/vxPGZm1g/VBn0AyyQ9Lml2amuNiFfS9KtAa5oeA6wtW7c3tX2ApNmSVkpa2dfXtx+lm5lZ\nNYZU2e8vI2KdpFHAckm/L18YESEpanniiJgPzAfo7OysaV0zM6teVUf0EbEuPW4A7gZOA9bvHJJJ\njxtS93XA2LLV21KbmZkVoGLQSzpc0pE7p4GzgaeBpcDM1G0msCRNLwVmpLNvJgGby4Z4zMzsAKtm\n6KYVuFvSzv4/johfSHoMuEvSLOBl4JLU/17gfKAHeBu4vO5Vm5lZ1SoGfUS8CJy6h/aNwOQ9tAdw\nRV2qMzOzfvOVsWZmmXPQm5llzkFvZpa5as+jN8tGe9fPG7btNXMvaNi2zfaXj+jNzDLnoDczy5yD\n3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLn\noDczy1zVQS9psKTfSfpZmh8n6RFJPZLulHRIaj80zfek5e2NKd3MzKpRyxH9VcDqsvlvA/Mi4iPA\nJmBWap8FbErt81I/MzMrSFVBL6kNuAD4QZoX8HlgceqyEJiapqekedLyyam/mZkVoNoj+n8E/g7Y\nkeZHAm9ExLY03wuMSdNjgLUAafnm1P8DJM2WtFLSyr6+vv0s38zMKqkY9JIuBDZExOP1fOKImB8R\nnRHR2dLSUs9Nm5lZmWruGXsG8EVJ5wPDgL8AbgRGSBqSjtrbgHWp/zpgLNAraQgwHNhY98rNzKwq\nFY/oI+LvI6ItItqBacCvI+I/Ag8AF6VuM4ElaXppmict/3VERF2rNjOzqvXnPPprgWsk9VAag1+Q\n2hcAI1P7NUBX/0o0M7P+qGboZpeIeBB4ME2/CJy2hz7vAhfXoTYzM6sDXxlrZpY5B72ZWeYc9GZm\nmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72Z\nWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYqBr2kYZIelfSEpGckXZfax0l6RFKPpDslHZLaD03zPWl5\ne2NfgpmZ7Us1R/TvAZ+PiFOBicC5kiYB3wbmRcRHgE3ArNR/FrAptc9L/czMrCAVgz5K3kqzQ9NP\nAJ8HFqf2hcDUND0lzZOWT5akulVsZmY1qWqMXtJgSd3ABmA58AfgjYjYlrr0AmPS9BhgLUBavhkY\nuYdtzpa0UtLKvr6+/r0KMzPbq6qCPiK2R8REoA04Dfhof584IuZHRGdEdLa0tPR3c2Zmthc1nXUT\nEW8ADwCnAyMkDUmL2oB1aXodMBYgLR8ObKxLtWZmVrNqzrppkTQiTR8GnAWsphT4F6VuM4ElaXpp\nmict/3VERD2LNjOz6g2p3IXRwEJJgyn9YbgrIn4m6VlgkaTrgd8BC1L/BcDtknqA14FpDajbzMyq\nVDHoI+JJ4ON7aH+R0nj97u3vAhfXpTozM+s3XxlrZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72Z\nWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFv\nZpY5B72ZWeaquTn4WEkPSHpW0jOSrkrtR0taLumF9HhUapekmyT1SHpSUkejX4SZme1dNUf024C/\njYiTgUnAFZJOBrqA+yNiPHB/mgc4DxiffmYDt9S9ajMzq1rFoI+IVyJiVZp+E1gNjAGmAAtTt4XA\n1DQ9BbgtSlYAIySNrnvlZmZWlSG1dJbUDnwceARojYhX0qJXgdY0PQZYW7Zab2p7pawNSbMpHfFz\n7LHH1lR0e9fPa+pfqzVzL2jo9s3MDqSqP4yVdATwL8A3IuJP5csiIoCo5YkjYn5EdEZEZ0tLSy2r\nmplZDaoKeklDKYX8P0fET1Pz+p1DMulxQ2pfB4wtW70ttZmZWQGqOetGwAJgdUTcULZoKTAzTc8E\nlpS1z0hn30wCNpcN8ZiZ2QFWzRj9GcClwFOSulPbfwXmAndJmgW8DFySlt0LnA/0AG8Dl9e1YjMz\nq0nFoI+IfwO0l8WT99A/gCv6WZeZmdWJr4w1M8ucg97MLHMOejOzzDnozcwy56A3M8ucg97MLHMO\nejOzzDnozcwy56A3M8ucg97MLHMOejOzzDnozcwy56A3M8ucg97MLHMOejOzzNV0c3Az8M3ZzZqN\nj+jNzDLnoDczy1w1Nwf/oaQNkp4uazta0nJJL6THo1K7JN0kqUfSk5I6Glm8mZlVVs0R/a3Aubu1\ndQH3R8R44P40D3AeMD79zAZuqU+ZZma2vyoGfUT8Bnh9t+YpwMI0vRCYWtZ+W5SsAEZIGl2vYs3M\nrHb7O0bfGhGvpOlXgdY0PQZYW9avN7X9O5JmS1opaWVfX99+lmFmZpX0+8PYiAgg9mO9+RHRGRGd\nLS0t/S3DzMz2Yn+Dfv3OIZn0uCG1rwPGlvVrS21mZlaQ/Q36pcDMND0TWFLWPiOdfTMJ2Fw2xGNm\nZgWoeGWspDuAzwLHSOoFvgXMBe6SNAt4Gbgkdb8XOB/oAd4GLm9AzWZmVoOKQR8R0/eyaPIe+gZw\nRX+LMjOz+vGVsWZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQ\nm5llzkFvZpa5it91Y/XX3vXzhm5/zdwLGrp9M2suPqI3M8ucg97MLHMOejOzzDnozcwy56A3M8uc\ng97MLHMNCXpJ50p6TlKPpK5GPIeZmVWn7kEvaTDwT8B5wMnAdEkn1/t5zMysOo24YOo0oCciXgSQ\ntAiYAjzbgOcyO6j4YrtiNev7r4io7wali4BzI+Kv0/ylwKciYs5u/WYDs9PsicBzdS3kg44BXmvg\n9hvN9RenmWsH11+0Rtd/XES0VOpU2FcgRMR8YP6BeC5JKyOi80A8VyO4/uI0c+3g+os2UOpvxIex\n64CxZfNtqc3MzArQiKB/DBgvaZykQ4BpwNIGPI+ZmVWh7kM3EbFN0hzgl8Bg4IcR8Uy9n6dGB2SI\nqIFcf3GauXZw/UUbEPXX/cNYMzMbWHxlrJlZ5hz0ZmaZc9CbmWXOQW9mlrksg17SaZI+maZPlnSN\npPOLrsuaj6Tbiq7BrL+yuzm4pG9R+kK1IZKWA58CHgC6JH08Iv5noQVWQdJHgTHAIxHxVln7uRHx\ni+Iq2zdJnwJWR8SfJB0GdAEdlL7n6B8iYnOhBVYgaffrPQR8TtIIgIj44oGvqj4kXR4RPyq6jlpI\n+ktK3531dEQsK7qeSiRdCdwdEWuLrmV32Z1eKekpYCJwKPAq0FYWPI9ExIRCC6wg7SxXAKspvY6r\nImJJWrYqIjqKrG9fJD0DnJqupZgPvA0sBian9i8VWmAFklZR+qP0AyAoBf0dlC76IyL+tbjq+kfS\nHyPi2KLr2BdJj0bEaWn6byj9HtwNnA3834iYW2R9lUjaDGwB/kBpv/lJRPQVW1VJdkf0wLaI2A68\nLekPEfEngIh4R9KOgmurxt8An4iItyS1A4sltUfEjZSCZyAbFBHb0nRn2R+lf5PUXVRRNegErgL+\nG/DNiOiW9E6zBLykJ/e2CGg9kLXsp6Fl07OBsyKiT9J3gBXAgA564EXgE8AXgL8CrpP0OKXQ/2lE\nvFlUYTkG/fuSPhQRb1N60wGQNBxohqAftHO4JiLWSPospbA/joEf9E+XDRE8IakzIlZKOgHYWnRx\nlUTEDmCepJ+kx/U01+9IK3AOsGm3dgEPH/hyajZI0lGUPjvUzqPhiNgiadu+Vx0QIu1Dy4BlkoZS\nGkaeDnwHqPgtk43STDtxtT4dEe/Brl/cnYYCM4spqSbrJU2MiG6AdGR/IfBD4JRiS6vor4EbJf13\nSl/N+v8krQXWpmVNISJ6gYslXQD8qeh6avAz4Iid+045SQ8e+HJqNhx4nNIfppA0OiJekXQEA/8g\nB3arMSK2Uvqer6WSPlRMSSXZjdHvi6Qjyj/cHIgktVEafnp1D8vOiIiHCiirJpL+AhhH6UCiNyLW\nF1xSvzXDvpOrFJKtEfFS0bXsi6QTIuL5ouvYk4Mt6Af8B1L70sxh08y1g/edorn+/slu6EbSNXtb\nBBxxIGtpgGeBZg2bAV+7950BzfX3Q3ZBD/wD8L+APX14M+AvEGvmsGnm2hPvOwVy/Y2TY9CvAu6J\niMd3XyCpGT4QbOawaebawftO0Vx/g2Q3Ri/pROD1PV2oIKl1oH8wKOlh4Ot7CZu1ETF2D6sNCM1c\nO3jfKZrrb5zsgr7ZNXPYNHPtOWj299/1N052QZ8ujPp7YCowitKl7BuAJcDciHijwPJsAPO+Y7lq\nhnGvWt1F6crAz0bE0RExEvhcarur0MqqIGm4pLmSfi/pdUkbJa1ObSOKrm9fmrn2xPtOgVx/4+QY\n9O0R8e3yC44i4tWI+DZwXIF1VauZw6aZawfvO0Vz/Q2S49DNMuBXwMKdY2KSWoHLKH1J0hcKLK8i\nSc9FxIm1LhsImrl28L5TNNffODke0f8VMBL4V0mbJL0OPAgcDVxSZGFVelnS36WAAUphI+laSt8Z\nM5A1c+3gfadorr9Bsgv6iNgE/AiYA4xN/0KdFBHXUrqJwUDXzGHTzLV73yme62+QHIdumvbGHTup\ndIepNmBFNNEdpqDpa/e+UzDX3yARkdUP8BSlr2oFaAdWUvqFBfhd0fVVUf+VwHPAPcAaYErZslVF\n15dr7d53iv9x/Y37yfErEJr5xh3Q3HeYaubawftO0Vx/g+QY9M184w5o7rBp5trB+07RXH+jCivy\nyRtkBqWbgu8SEdsiYgbw6WJKqsl6SRN3zqQd50LgGAZ+2DRz7eB9p2iuv0Gy+zC22amJ7zDVzLXn\noNnff9ffOA56M7PM5Th0Y2ZmZRz0ZmaZc9DbQUnSdkndkp6R9ISkv5W0z98HSe2SvnKgajSrFwe9\nHazeiYiJEfEx4CzgPOBbFdZpBxz01nT8YawdlCS9FRFHlM0fDzxG6VS444DbgcPT4jkR8bCkFcBJ\nwEvAQuAmYC7wWeBQ4J8i4nsH7EWYVclBbwel3YM+tb0BnAi8CeyIiHcljQfuiIjOdAHMf4mIC1P/\n2cCoiLhe0qHAQ8DFEfHSAX0xZhXkeGWsWX8NBW5OF79sB07YS7+zgQmSLkrzw4HxlI74zQYMB70Z\nu4ZutlO6R+y3gPXAqZQ+x3p3b6sBX4+IXx6QIs32kz+MtYOepBbg/wA3R2ksczjwSkTsAC4FBqeu\nbwJHlq36S+A/SxqatnOCpMMxG2B8RG8Hq8MkdVMaptlG6cPXG9Ky/w38i6QZwC+ALan9SWC7pCeA\nW4EbKZ2Js0qSgD5g6oF6AWbV8oexZmaZ89CNmVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz\n0JuZZe7/A6+cHY7zduzoAAAAAElFTkSuQmCC\n",
-      "text/plain": [
-       "<matplotlib.figure.Figure at 0x7fc48a441a58>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "myChart.plot.bar()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
-   "source": [
-    "## Title\n",
-    "\n",
-    "Here I can add some comments on the cart.\n",
-    "1. First point\n",
-    "2. Second point"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
-   "source": [
-    "# Lang Cleaning tools"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'fr'"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "detect_lang(\"Ceci est une phrase en français.\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'en'"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "detect_lang(\"This is an english sentence.\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<matplotlib.axes._subplots.AxesSubplot at 0x7fc487e01e80>"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    },
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAENCAYAAAAG6bK5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGuhJREFUeJzt3X+8VXWd7/HXm4OKmqMoB64Beg4NOiGBnTkqXMfJMkHL\nG95u9uDYD6YsKiX7dSuamcf12tTjYc0oZRYTJoFeA31YKbecUTQbTa8KElAq6rmIcbgax1+EFsmP\nz/1jfQ9ujucXe5+9t/B9Px+P82Cv7/qu9fluOOz3Xt+19l6KCMzMLD9D6j0AMzOrDweAmVmmHABm\nZplyAJiZZcoBYGaWKQeAmVmmHABmZplyAJiZZcoBYGaWqX4DQNJCSZsl/bZb+6clrZP0sKRvlrR/\nRVK7pMckTS9pPyu1tUuaO7hPw8zM9pb6+yoISX8LvARcGxETU9vbgX8A3h0Rf5Y0MiI2S5oALAFO\nBt4I3AEcl3b1OHAm0AGsANoi4pG+ao8YMSKamprKfW5mZll66KGHno2Ixv76De2vQ0TcLampW/On\ngMsi4s+pz+bUPgNYmtqflNROEQYA7RGxHkDS0tS3zwBoampi5cqV/Q3RzMxKSHpqIP3KPQdwHHCa\npAck/Yekk1L7aGBjSb+O1NZbu5mZ1Um/RwB9bHckMAU4CbhR0rjBGJCk2cBsgGOOOWYwdmlmZj0o\n9wigA/hJFB4EdgEjgE3A2JJ+Y1Jbb+2vERELIqI1IlobG/udwjIzszKVewRwM/B24C5JxwEHAs8C\ny4AfSbqC4iTweOBBQMB4Sc0UL/wzgfMrHLuZ7QO2b99OR0cH27Ztq/dQ9jvDhg1jzJgxHHDAAWVt\n328ASFoCnA6MkNQBXAIsBBamS0NfAWZFcTnRw5JupDi5uwO4KCJ2pv3MAW4DGoCFEfFwWSM2s31K\nR0cHhx12GE1NTUiq93D2GxHBc889R0dHB83NzWXtYyBXAbX1suqDvfT/OvD1HtpvBW7dq9GZ2T5v\n27ZtfvGvAkkcddRRdHZ2lr0PfxLYzKrOL/7VUenfqwPAzPZ7DQ0NnHjiiZxwwglMnjyZyy+/nF27\ndvW5zYYNG/jRj37U777HjRvHY489tkfbZz/7Wb7xjW/0ue+JEycObPBVVO5JYKuTprk/L3vbDZe9\nexBHYlaeSn6HezKQ3+uDDz6Y1atXA7B582bOP/98/vCHP3DppZf2vt8UAOef3/f1KjNnzmTp0qVc\ncsklAOzatYubbrqJe++9dy+eRX34CMDMsjJy5EgWLFjAVVddRUSwYcMGTjvtNFpaWmhpaeG+++4D\nYO7cudxzzz2ceOKJzJs3j507d/LFL36Rk046iUmTJvH9738fgLa2Nm644Ybd+7/77rs59thjOfbY\nY3vdd6lFixYxZ86c3cvnnHMOv/zlLwG4/fbbmTp1Ki0tLZx33nm89NJLg/p34QAws+yMGzeOnTt3\nsnnzZkaOHMny5ctZtWoVN9xwAxdffDEAl112GaeddhqrV6/mc5/7HNdccw2HH344K1asYMWKFVx9\n9dU8+eSTvOUtb2HIkCGsWbMGgKVLl9LWVlw709u+B+LZZ5/la1/7GnfccQerVq2itbWVK664YlD/\nHjwFZGZZ2759O3PmzGH16tU0NDTw+OOP99jv9ttvZ+3atdx0000AbNmyhSeeeILm5mba2tpYunQp\nJ5xwAjfffPPuqaWB7rsn999/P4888ginnnoqAK+88gpTp06t8NnuyQFgZtlZv349DQ0NjBw5kksv\nvZRRo0axZs0adu3axbBhw3rcJiL4zne+w/Tp01+zbubMmUybNo23ve1tTJo0iVGjRgEwb968fvc9\ndOjQPU5Id31gLiI488wzWbJkyWA85R55CsjMstLZ2cknP/lJ5syZgyS2bNnC0UcfzZAhQ7juuuvY\nuXMnAIcddhhbt27dvd306dOZP38+27dvB+Dxxx/n5ZdfBuBNb3oTI0aMYO7cubunf4Be912qqamJ\n1atXs2vXLjZu3MiDDz4IwJQpU7j33ntpb28H4OWXX96rI4iBcACY2X7vT3/60+7LQN/5zncybdq0\n3VftXHjhhSxevJjJkyezbt06Dj30UAAmTZpEQ0MDkydPZt68eXzsYx9jwoQJtLS0MHHiRD7xiU+w\nY8eO3TXa2tpYt24d733ve3e39bbvUqeeeirNzc1MmDCBiy++mJaWFgAaGxtZtGgRbW1tTJo0ialT\np7Ju3bpB/Xvp94Yw9dTa2hq+H8CefBmo7WseffRR3vzmN9d7GPutnv5+JT0UEa39besjADOzTDkA\nzMwy5QAwM8uUA8DMqu71fK5xX1bp36sDwMyqatiwYTz33HMOgUHWdT+A3j63MBD+IJiZVdWYMWPo\n6Oio6HvrrWdddwQrlwPAzKrqgAMOKPuOVVZdngIyM8tUvwEgaaGkzen+v93XfUFSSBqRliXpSknt\nktZKainpO0vSE+ln1uA+DTMz21sDOQJYBJzVvVHSWGAa8LuS5rOB8elnNjA/9T2S4mbypwAnA5dI\nGl7JwM3MrDL9BkBE3A0838OqecCXgNJT+zOAa6NwP3CEpKOB6cDyiHg+Il4AltNDqJiZWe2UdQ5A\n0gxgU0Ss6bZqNLCxZLkjtfXWbmZmdbLXVwFJOgT4e4rpn0EnaTbF9BHHHHNMNUqYmRnlHQG8CWgG\n1kjaAIwBVkn6T8AmYGxJ3zGprbf214iIBRHRGhGtjY2NZQzPzMwGYq8DICJ+ExEjI6IpIpoopnNa\nIuIZYBnw4XQ10BRgS0Q8DdwGTJM0PJ38nZbazMysTgZyGegS4P8Ax0vqkHRBH91vBdYD7cDVwIUA\nEfE88E/AivTz1dRmZmZ10u85gIho62d9U8njAC7qpd9CYOFejs/MzKrEnwQ2M8uUA8DMLFMOADOz\nTDkAzMwy5QAwM8uUA8DMLFMOADOzTDkAzMwy5QAwM8uUA8DMLFMOADOzTDkAzMwy5QAwM8uUA8DM\nLFMOADOzTDkAzMwy5QAwM8uUA8DMLFMDuSfwQkmbJf22pO2fJa2TtFbSTyUdUbLuK5LaJT0maXpJ\n+1mprV3S3MF/KmZmtjcGcgSwCDirW9tyYGJETAIeB74CIGkCMBM4IW3zPUkNkhqA7wJnAxOAttTX\nzMzqpN8AiIi7gee7td0eETvS4v3AmPR4BrA0Iv4cEU8C7cDJ6ac9ItZHxCvA0tTXzMzqZDDOAXwU\n+Lf0eDSwsWRdR2rrrf01JM2WtFLSys7OzkEYnpmZ9aSiAJD0D8AO4PrBGQ5ExIKIaI2I1sbGxsHa\nrZmZdTO03A0l/R1wDnBGRERq3gSMLek2JrXRR7uZmdVBWUcAks4CvgS8JyL+WLJqGTBT0kGSmoHx\nwIPACmC8pGZJB1KcKF5W2dDNzKwS/R4BSFoCnA6MkNQBXEJx1c9BwHJJAPdHxCcj4mFJNwKPUEwN\nXRQRO9N+5gC3AQ3Awoh4uArPx8zMBqjfAIiIth6ar+mj/9eBr/fQfitw616NzszMqsafBDYzy5QD\nwMwsUw4AM7NMOQDMzDLlADAzy5QDwMwsUw4AM7NMOQDMzDLlADAzy5QDwMwsUw4AM7NMOQDMzDLl\nADAzy5QDwMwsUw4AM7NMOQDMzDLlADAzy1S/ASBpoaTNkn5b0nakpOWSnkh/Dk/tknSlpHZJayW1\nlGwzK/V/QtKs6jwdMzMbqIEcASwCzurWNhe4MyLGA3emZYCzKW4EPx6YDcyHIjAo7iV8CnAycElX\naJiZWX30GwARcTfwfLfmGcDi9HgxcG5J+7VRuB84QtLRwHRgeUQ8HxEvAMt5baiYmVkNlXsOYFRE\nPJ0ePwOMSo9HAxtL+nWktt7azcysTio+CRwRAcQgjAUASbMlrZS0srOzc7B2a2Zm3ZQbAL9PUzuk\nPzen9k3A2JJ+Y1Jbb+2vERELIqI1IlobGxvLHJ6ZmfWn3ABYBnRdyTMLuKWk/cPpaqApwJY0VXQb\nME3S8HTyd1pqMzOzOhnaXwdJS4DTgRGSOiiu5rkMuFHSBcBTwPtT91uBdwHtwB+BjwBExPOS/glY\nkfp9NSK6n1g2M7Ma6jcAIqKtl1Vn9NA3gIt62c9CYOFejc7MzKrGnwQ2M8uUA8DMLFMOADOzTDkA\nzMwy5QAwM8uUA8DMLFMOADOzTDkAzMwy5QAwM8uUA8DMLFMOADOzTDkAzMwy5QAwM8uUA8DMLFMO\nADOzTDkAzMwy5QAwM8uUA8DMLFMVBYCkz0l6WNJvJS2RNExSs6QHJLVLukHSganvQWm5Pa1vGown\nYGZm5Sk7ACSNBi4GWiNiItAAzAS+AcyLiL8EXgAuSJtcALyQ2uelfmZmVieVTgENBQ6WNBQ4BHga\neAdwU1q/GDg3PZ6Rlknrz5CkCuubmVmZyg6AiNgE/AvwO4oX/i3AQ8CLEbEjdesARqfHo4GNadsd\nqf9R5dY3M7PKVDIFNJziXX0z8EbgUOCsSgckabaklZJWdnZ2Vro7MzPrRSVTQO8EnoyIzojYDvwE\nOBU4Ik0JAYwBNqXHm4CxAGn94cBz3XcaEQsiojUiWhsbGysYnpmZ9aWSAPgdMEXSIWku/wzgEeAu\n4H2pzyzglvR4WVomrf9FREQF9c3MrAKVnAN4gOJk7irgN2lfC4AvA5+X1E4xx39N2uQa4KjU/nlg\nbgXjNjOzCg3tv0vvIuIS4JJuzeuBk3vouw04r5J6ZmY2ePxJYDOzTDkAzMwy5QAwM8uUA8DMLFMO\nADOzTDkAzMwy5QAwM8uUA8DMLFMOADOzTDkAzMwy5QAwM8uUA8DMLFMOADOzTDkAzMwy5QAwM8uU\nA8DMLFMOADOzTDkAzMwyVVEASDpC0k2S1kl6VNJUSUdKWi7pifTn8NRXkq6U1C5praSWwXkKZmZW\njkqPAL4N/HtE/BUwGXiU4mbvd0bEeOBOXr35+9nA+PQzG5hfYW0zM6tA2QEg6XDgb4FrACLilYh4\nEZgBLE7dFgPnpsczgGujcD9whKSjyx65mZlVpJIjgGagE/ihpF9L+oGkQ4FREfF06vMMMCo9Hg1s\nLNm+I7WZmVkdVBIAQ4EWYH5EvBV4mVenewCIiABib3YqabaklZJWdnZ2VjA8MzPrSyUB0AF0RMQD\nafkmikD4fdfUTvpzc1q/CRhbsv2Y1LaHiFgQEa0R0drY2FjB8MzMrC9lB0BEPANslHR8ajoDeARY\nBsxKbbOAW9LjZcCH09VAU4AtJVNFZmZWY0Mr3P7TwPWSDgTWAx+hCJUbJV0APAW8P/W9FXgX0A78\nMfU1M7M6qSgAImI10NrDqjN66BvARZXUMzOzweNPApuZZcoBYGaWKQeAmVmmHABmZplyAJiZZcoB\nYGaWKQeAmVmmHABmZplyAJiZZcoBYGaWKQeAmVmmHABmZplyAJiZZcoBYGaWKQeAmVmmHABmZply\nAJiZZcoBYGaWqYoDQFKDpF9L+llabpb0gKR2STek+wUj6aC03J7WN1Va28zMyjcYRwCfAR4tWf4G\nMC8i/hJ4AbggtV8AvJDa56V+ZmZWJxUFgKQxwLuBH6RlAe8AbkpdFgPnpscz0jJp/Rmpv5mZ1UGl\nRwDfAr4E7ErLRwEvRsSOtNwBjE6PRwMbAdL6Lam/mZnVQdkBIOkcYHNEPDSI40HSbEkrJa3s7Owc\nzF2bmVmJSo4ATgXeI2kDsJRi6ufbwBGShqY+Y4BN6fEmYCxAWn848Fz3nUbEgohojYjWxsbGCoZn\nZmZ9KTsAIuIrETEmIpqAmcAvIuIDwF3A+1K3WcAt6fGytExa/4uIiHLrm5lZZarxOYAvA5+X1E4x\nx39Nar8GOCq1fx6YW4XaZmY2QEP779K/iPgl8Mv0eD1wcg99tgHnDUY9MzOrnD8JbGaWKQeAmVmm\nHABmZplyAJiZZcoBYGaWKQeAmVmmHABmZplyAJiZZcoBYGaWKQeAmVmmHABmZplyAJiZZcoBYGaW\nKQeAmVmmHABmZplyAJiZZcoBYGaWqbIDQNJYSXdJekTSw5I+k9qPlLRc0hPpz+GpXZKulNQuaa2k\nlsF6EmZmtvcqOQLYAXwhIiYAU4CLJE2guNfvnRExHriTV+/9ezYwPv3MBuZXUNvMzCpUdgBExNMR\nsSo93go8CowGZgCLU7fFwLnp8Qzg2ijcDxwh6eiyR25mZhUZlHMAkpqAtwIPAKMi4um06hlgVHo8\nGthYsllHajMzszqoOAAkvQH4MfDZiPhD6bqICCD2cn+zJa2UtLKzs7PS4ZmZWS8qCgBJB1C8+F8f\nET9Jzb/vmtpJf25O7ZuAsSWbj0lte4iIBRHRGhGtjY2NlQzPzMz6UMlVQAKuAR6NiCtKVi0DZqXH\ns4BbSto/nK4GmgJsKZkqMjOzGhtawbanAh8CfiNpdWr7e+Ay4EZJFwBPAe9P624F3gW0A38EPlJB\nbTMzq1DZARARvwLUy+ozeugfwEXl1jMzs8HlTwKbmWXKAWBmlikHgJlZphwAZmaZcgCYmWXKAWBm\nlikHgJlZphwAZmaZcgCYmWXKAWBmlikHgJlZphwAZmaZcgCYmWXKAWBmlikHgJlZphwAZmaZcgCY\nmWXKAWBmlqmaB4CksyQ9Jqld0txa1zczs0IlN4Xfa5IagO8CZwIdwApJyyLikVqOw/Ze09yfV7T9\nhsvePUgjMbPBUtMAAE4G2iNiPYCkpcAMwAFgZnVRyZubff2NTa0DYDSwsWS5AzilxmOomN8N11bO\n/0HNqkkRUbti0vuAsyLiY2n5Q8ApETGnpM9sYHZaPB54rIKSI4BnK9h+X6tbz9q51a1nbT/nPGpX\nUvfYiGjsr1OtjwA2AWNLlsektt0iYgGwYDCKSVoZEa2Dsa99oW49a+dWt561/ZzzqF2LurW+CmgF\nMF5Ss6QDgZnAshqPwczMqPERQETskDQHuA1oABZGxMO1HIOZmRVqPQVERNwK3FqjcoMylbQP1a1n\n7dzq1rO2n3Metatet6Yngc3M7PXDXwVhZpYpB4CZWaZqfg5gfyZpJDCsazkiflfH4ZiZ9clHAINA\n0nskPQE8CfwHsAH4t7oOyvY7kg6UNDH9HFDDup8ZSNsg1muQdH219m+v2i9OAkv6UkR8U9J3gNc8\noYi4uMr11wDvAO6IiLdKejvwwYi4oJp1U+1G4ONAEyVHdBHx0f2xbqr9GeCHwFbgB8BbgbkRcXsN\naj9Jz79j46pc93RgMcWbC1F8oHJWRNxdzbqp9qqIaOnW9uuIeGsVa/4KeEdEvFKtGn3UPg6YD4yK\niImSJgHviYiv1aD2D+n596sq/6/2lymgLwPfBP4v8EId6m+PiOckDZE0JCLukvStGtW+BbgHuAPY\nWaOa9awL8NGI+Lak6cBw4EPAdUDVAwAo/WTmMOA84Mga1L0cmBYRj8HuF6klwF9Xq6CkNuB8oFlS\n6Qc2DwOer1bdZD1wb6r7cldjRFxR5boAVwNfBL6faq6V9COg6gEA/Kzk8TDgvwL/r1rF9pcA+L2k\nNwIfAU6neIdUSy9KegPFC+L1kjZT8ktbZYdExJdrVOv1UBde/fd9N3BdRDwsqSb/5hHxXLemb0l6\nCPgfVS59QNeLfxrH4zWYBroPeJriO2kuL2nfCqytRkFJ10XEh4D3APMopqkPq0atPhwSEQ92+5Xa\nUYvCEfHj0mVJS4BfVave/hIA84E7gXHAQyXtojicqurhOcUv6zbgM8AHgb8ALq1yzS4/k/Su9AG7\nWqpXXYCHJN1G8e86V9JhwK5aFJZUOhUyhOKIoBb/j1ZK+gHwv9LyBym+WqVqIuIp4ClgajXrdPPX\n6c3c74Dv1LBuqWclvYk0FZO+xPLpOo1lPDCyWjvfL84BdJE0PyI+VcN6v4qIv5G0lVfn7breNuyi\nOEz+54j4XhXHsBU4BHgF2J7qR0T8RRXrdT3XN6S6r1S7brcxDAH+ERgeEZ+TdAzFtx/eU4Pad/Hq\n899BMSf/LxHxeJXr/g3F/TT+JjXdAzwRET/rfauKa/b0+w1V/LeWdDHwKaCZPac+umpW+80cksZR\nfAr3P1NMKT8JfCAFYjXrimI69aWS5meAr3Q/Mhi0mvtTALzeSDoKuC8ijq9ijSHAB4DmiPhqejE8\nOiIeqFbNVPcXwOUR8fOStqsj4uPVrJvqzKcI2HdExJslDQduj4iTalB7GPDf2PPkd0TEV6tcdxXF\nSd/fpOU24LMRsc/dT2Mgav1mLtX8fLemgymO8l6G2px/kPTbiJhY7TpdfBloFaX54tOrXOa7wBSg\nLS1vBa6qck0oXgC/JKl07rtqJyS7OSUiLqKYdiMiXgAOrFHtm4H/QnG09VL6qcX5nvcBiyQdL+nj\nwIXAtBrUrYtav/gnh6WfVoqjkOHAEcAngZY+thtMD0mq+huZLvvLOYDXrYio9tzhKRHRIunXqd4L\n6au2q+1F4AzgSkn/m2JOula2p/tLd83RNlKjcwDAmIg4q0a1douI9eld/80U8+PTIuJPtR7H/iwi\nLgWQdDfQEhFb0/L/BCq7DeDAnQJ8QNJTFG8suqa+JlWjmANg31evF0NFxA7gQkl/R3GlwvAa1AW4\nEvgpMFLS1yneHf9jjWrfJ+ktXVMx1SbpN+w5/34kxVepPyCJar0wZG4UxXmtLq+ktlqYXqM6gANg\nf1CvF8N/7XoQEYvSC9VFNahLRFyfLr08g+Id0rkR8Wg1a5a8EA8FPiJpPfBnqvwODTinSvu13l0L\nPCjpp2n5XGBRLQpX+0Rzdz4JvB+Q9Fe8+mJ4Z7VfDHMk6di+1tf6P65VV7rc97S0eHdE/Lqe46kW\nB4CZWaZ8FZCZWaYcAGZmmXIAmJWQtFPSakkPS1oj6Qvpw3Z9bdMk6fxajdFssDgAzPb0p4g4MSJO\nAM4EzgYu6WebJopvzTTbp/gksFkJSS9FxBtKlsdRfOnaCOBYiq+dPjStnhMR90m6H3gzxXfGLKa4\nNPcyik+BHwR8NyK+X7MnYTZADgCzEt0DILW9CBxP8TUbuyJim6TxwJKIaE03a/nvEXFO6j8bGBkR\nX5N0EHAvcF5EPFnTJ2PWD38QzGzgDgCuknQixbc2HtdLv2nApPQ1wgCHU3ytrwPAXlccAGZ9SFNA\nO4HNFOcCfg9Mpjh/tq23zYBPR8RtNRmkWZl8EtisF+l7lf4VuCqKudLDgacjYhfFbSgbUtet7HnX\nqtuAT3XdsUvScZIOxex1xkcAZns6WNJqiumeHRQnfbu+B/57wI8lfRj4d179Gui1wE5Jayi+M+bb\nFFcGrUo3+eik+D4Zs9cVnwQ2M8uUp4DMzDLlADAzy5QDwMwsUw4AM7NMOQDMzDLlADAzy5QDwMws\nUw4AM7NM/X+WgGyX8gJ3yQAAAABJRU5ErkJggg==\n",
-      "text/plain": [
-       "<matplotlib.figure.Figure at 0x7fc4cc6ea6d8>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "chart(docs, \"language_iso2\").plot.bar()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Counter({'de': 13,\n",
-       "         'en': 1547,\n",
-       "         'es': 5,\n",
-       "         'fi': 1,\n",
-       "         'fr': 4,\n",
-       "         'hu': 1,\n",
-       "         'it': 1,\n",
-       "         'ja': 5,\n",
-       "         'ko': 1,\n",
-       "         'ru': 3,\n",
-       "         'zh': 23})"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Counter([doc.hyperdata[\"language_iso2\"] for doc in docs])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "# Deleting language that is not in majority\n",
-    "def cleanCorpusWithLang(corpus_id, lang):\n",
-    "    return (session.query(Node.id).filter(Node.parent_id == corpus_id)\n",
-    "                        .filter(Node.hyperdata[\"language_iso2\"].astext != lang)\n",
-    "                        .count()\n",
-    "                       #.delete()\n",
-    "           )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "57"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "cleanCorpusWithLang(corpus_id, 'en')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[(True, 'This is an english paragraph.\\n '),\n",
-       " (False, '\"This is an english paragraph.\\n\\nThis is an english paragraph.\\n ')]"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "abstract0 = \"\"\"\"Ceci est un paragraphe en français.\n",
-    "\n",
-    "This is an english paragraph.\n",
-    " \"\"\"\n",
-    "\n",
-    "abstract1 = \"\"\"\"This is an english paragraph.\n",
-    "\n",
-    "This is an english paragraph.\n",
-    " \"\"\"\n",
-    "\n",
-    "def clean_lang_inText(lang, text):\n",
-    "    \n",
-    "    texts_before = nltk.tokenize.blankline_tokenize(text)\n",
-    "    texts_after  = '\\n\\n'.join([sentence \n",
-    "                                    for sentence in texts_before\n",
-    "                                    if detect_lang(sentence) == lang\n",
-    "                              ])\n",
-    "    \n",
-    "    return (len(texts_before) != len(nltk.tokenize.blankline_tokenize(texts_after)), texts_after)\n",
-    "\n",
-    "[clean_lang_inText('en', abstract) for abstract in [abstract0, abstract1]]\n",
-    "\n",
-    "# TODO update each document accordingly"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "# TODO update all the abstract with That function"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
-   "source": [
-    "# Measures IMT Tools"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "154"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "scan_hal(\"machine learning AND deep\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "90"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Request syntax\n",
-    "# \"network analysis\"     = network <-> analysis\n",
-    "# \"network OR analysis\"  = network | analysis\n",
-    "# \"network AND analysis\" = network & analysis\n",
-    "\n",
-    "scan_gargantext(corpus_id, 'english', \"machine | learning & deep\")\n",
-    "\n",
-    "# \"network NOT analysis\" = @@ to_tsquery('network') !! to_tsquery('analysis')\n",
-    "# (need to change the function if not has to be used)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[('network analysis', 'network <-> analysis'),\n",
-       " ('big data AND something', '(big <-> data) & something')]"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Forces / Faiblesses de l'IMT\n",
-    "#             Hal Query                     Gargantext Query\n",
-    "queries = [ (\"network analysis\"         , \"network <-> analysis\" )\n",
-    "          , (\"big data AND something\"   , \"(big <-> data) & something\")\n",
-    "           ]\n",
-    "[(query[0], query[1]) for query in queries]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "def imt_vs_hal(corpus_id, queryHal, queryGarg):\n",
-    "    return((scan_gargantext(corpus_id, 'english', queryGarg),        scan_hal(queryHal)))\n",
-    "    #return((scan_gargantext(corpus_id, 'english', queryGarg) *100 / scan_hal(queryHal)))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[(5, 10649), (0, 5)]"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Then chart it to see your strenght and weakness!\n",
-    "[imt_vs_hal(corpus_id, query[0], query[1]) for query in queries]\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
-   "source": [
-    "# Graph generation"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "# TODO Cooccurrences optimization"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "# TODO optimize the distributional distance"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
-   "source": [
-    "# List Management"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "# Front End add a check box to merge or to overwrite previous list"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "# optimize the list merge"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.5.3rc1"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/gargantext/util/crawlers/HAL.py b/gargantext/util/crawlers/HAL.py
index b920b1e6..df929459 100644
--- a/gargantext/util/crawlers/HAL.py
+++ b/gargantext/util/crawlers/HAL.py
@@ -113,7 +113,7 @@ class HalCrawler(Crawler):
             msg = "Invalid sample size N = %i (max = %i)" % ( self.query_max
                                                             , QUERY_SIZE_N_MAX
                                                             )
-            print("ERROR (scrap: Multivac d/l ): " , msg)
+            print("ERROR (scrap: HAL d/l ): " , msg)
             self.query_max = QUERY_SIZE_N_MAX
         
         #for page in range(1, trunc(self.query_max / 100) + 2):
diff --git a/gargantext/util/parsers/HAL.py b/gargantext/util/parsers/HAL.py
index e92228cf..d869bbeb 100644
--- a/gargantext/util/parsers/HAL.py
+++ b/gargantext/util/parsers/HAL.py
@@ -11,17 +11,8 @@ from datetime import datetime
 import json
 
 class HalParser(Parser):
-
-    def parse(self, filebuf):
-        '''
-        parse :: FileBuff -> [Hyperdata]
-        '''
-        contents = filebuf.read().decode("UTF-8")
-        data = json.loads(contents)
+    def _parse(self, json_docs):
         
-        filebuf.close()
-        
-        json_docs = data
         hyperdata_list = []
         
         hyperdata_path = { "id"       : "isbn_s"
@@ -73,3 +64,13 @@ class HalParser(Parser):
                 hyperdata_list.append(hyperdata)
         
         return hyperdata_list
+
+    def parse(self, filebuf):
+        '''
+        parse :: FileBuff -> [Hyperdata]
+        '''
+        contents = filebuf.read().decode("UTF-8")
+        data = json.loads(contents)
+
+        return self._parse(data)
+
diff --git a/install/notebook.run b/install/notebook.run
index ac999fb5..1f6c6bca 100755
--- a/install/notebook.run
+++ b/install/notebook.run
@@ -16,7 +16,7 @@ sudo docker run \
         --env POSTGRES_HOST=localhost \
         -v /srv/gargantext:/srv/gargantext \
         -it garg-notebook:latest \
-        /bin/bash -c "/bin/su notebooks -c 'source /env_3-5/bin/activate && cd /srv/gargantext/ && jupyter notebook --port=8899 --ip=0.0.0.0 --no-browser'"
+        /bin/bash -c "/bin/su notebooks -c 'source /env_3-5/bin/activate && cd /home/notebooks && jupyter notebook --port=8899 --ip=0.0.0.0 --no-browser'"
 #        #&& jupyter nbextension enable --py widgetsnbextension --sys-prefix 
         #/bin/bash -c "/bin/su notebooks -c 'source /env_3-5/bin/activate && cd /srv/gargantext/ && jupyter notebook --port=8899 --ip=0.0.0.0 --no-browser --notebook-dir=/home/notebooks/'"
 
diff --git a/install/notebook/gargantext_notebook.py b/install/notebook/gargantext_notebook.py
index f24fef71..b3b564a8 100644
--- a/install/notebook/gargantext_notebook.py
+++ b/install/notebook/gargantext_notebook.py
@@ -22,7 +22,7 @@ application = get_wsgi_application()
 from gargantext.util.toolchain.main import parse_extract_indexhyperdata
 from gargantext.util.db import *
 from gargantext.models import Node
-
+from gargantext.util.toolchain.main import parse_extract_indexhyperdata
 from nltk.tokenize import wordpunct_tokenize
 
 from gargantext.models import *
@@ -56,9 +56,7 @@ def chart(docs, field):
     frame1 = pd.DataFrame(year_publis, columns=['Date', 'DateValue'], index=frame0.Date)
     return frame1
 
-
 from gargantext.util.crawlers.HAL import HalCrawler
-
 def scan_hal(request):
     hal = HalCrawler()
     return hal.scan_results(request)
@@ -73,3 +71,53 @@ def scan_gargantext(corpus_id, lang, request):
     return [i for i in connection.execute(query)][0][0]
     connection.close()
 
+
+def myProject_fromUrl(url):
+    """
+    myProject :: String -> Project
+    """
+    project_id = url.split("/")[4]
+    project = session.query(Node).filter(Node.id == project_id).first()
+    return project
+
+
+def newCorpus(project, resourceName=11, name="Machine learning", query="LSTM"):
+    print("Corpus \"%s\" in project \"%s\" created" % (name, project.name))
+    
+    corpus = project.add_child(name="Corpus name", typename='CORPUS')
+    corpus.hyperdata["resources"] = [{"extracted" : "true", "type" : 11}]
+    corpus.hyperdata["statuses"]  = [{"action" : "notebook", "complete" : "true"}]
+    # [TODO] Add informations needed to get buttons on the Project view.
+    session.add(corpus)
+    session.commit()
+    
+    hal = HalCrawler()
+    max_result = hal.scan_results(query)
+    paging = 100 
+    for page in range(0, max_result, paging):
+        print("%s documents downloaded / %s." % (str( paging * (page +1)), str(max_result) ))
+        docs = (hal._get(query, fromPage=page, count=paging)
+                     .get("response", {})
+                      .get("docs", [])
+               )
+            
+        from gargantext.util.parsers.HAL import HalParser
+        # [TODO] fix boilerplate for docs here
+        new_docs = HalParser(docs)._parse(docs)
+        
+        for doc in new_docs:
+            new_doc = (corpus.add_child( name      = doc["title"][:255]
+                                       , typename  = 'DOCUMENT')
+                      )
+            new_doc["hyperdata"] = doc
+            session.add(new_doc)
+            session.commit()
+    
+    print("Extracting the ngrams")
+    parse_extract_indexhyperdata(corpus)
+    
+    print("Corpus is ready to explore:")
+    print("http://imt.gargantext.org/projects/%s/corpora/%s/" % (project.id, corpus.id))
+    
+    return corpus
+
-- 
2.21.0