[GIT] Resolved conflicts in url.py

[CODE] Import paths are now relative instead of absolute in parsing/

[GIT] Resolved conflicts in url.py
[CODE] Import paths are now relative instead of absolute in parsing/
de3fe8db · Mathieu Rodic · f5443d84 · de3fe8db · f5443d84 · f5443d84
Commit de3fe8db authored Dec 06, 2014 by Mathieu Rodic
26 changed files
--- a/.gitignore
+++ b/.gitignore
 __pycache__/
 parsing/Taggers/treetagger/
 .ipynb_checkpoints/
+*.pyc
--- a/AlexandreTests.ipynb
+++ b/AlexandreTests.ipynb
-{
- "metadata": {
-  "name": "",
-  "signature": "sha256:0e63832a6b33d476c8b284b72b0740bd9ade357e5ebb1f73bdc399bbd2824a16"
- },
- "nbformat": 3,
- "nbformat_minor": 0,
- "worksheets": [
-  {
-   "cells": [
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "from node.models import Node, NodeType,\\\n",
-      "                        Project, Corpus, Document,\\\n",
-      "                        Ngram, Node_Ngram,\\\n",
-      "                        User, Language, ResourceType"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 1
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "Node.objects.all().delete()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 2
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "import pycountry\n",
-      "\n",
-      "for language in pycountry.languages:\n",
-      "    try:\n",
-      "        implemented = 1 if language.alpha2 in ['en', 'fr'] else 0\n",
-      "        Language(iso2=language.alpha2, iso3=language.terminology, fullname=language.name, implemented=implemented).save()\n",
-      "    except:\n",
-      "        pass"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 24
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "english = Language.objects.get(iso2='en')\n",
-      "french  = Language.objects.get(iso2='fr')"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 25
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "try:\n",
-      "    me = User.objects.get(username='alexandre')\n",
-      "except:\n",
-      "    me = User(username='alexandre')\n",
-      "    me.save()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 5
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "try:\n",
-      "    typeProject = NodeType.objects.get(name='Project')\n",
-      "except Exception as error:\n",
-      "    print(error)\n",
-      "    typeProject = NodeType(name='Project')\n",
-      "    typeProject.save()  \n",
-      "\n",
-      "try:\n",
-      "    typeCorpus  = NodeType.objects.get(name='Corpus')\n",
-      "except Exception as error:\n",
-      "    print(error)\n",
-      "    typeCorpus  = NodeType(name='Corpus')\n",
-      "    typeCorpus.save()\n",
-      "    \n",
-      "try:\n",
-      "    typeDoc     = NodeType.objects.get(name='Document')\n",
-      "except Exception as error:\n",
-      "    print(error)\n",
-      "    typeDoc     = NodeType(name='Document')\n",
-      "    typeDoc.save()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 6
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "try:\n",
-      "    typePubmed = ResourceType.objects.get(name='pubmed')\n",
-      "    typeIsi    = ResourceType.objects.get(name='isi')\n",
-      "    typeRis    = ResourceType.objects.get(name='ris')\n",
-      "    typePresseFrench = ResourceType.objects.get(name='europress_french')\n",
-      "    typePresseEnglish = ResourceType.objects.get(name='europress_english')\n",
-      "\n",
-      "except Exception as error:\n",
-      "    print(error)\n",
-      "    \n",
-      "    typePubmed = ResourceType(name='pubmed')\n",
-      "    typePubmed.save()  \n",
-      "    \n",
-      "    typeIsi    = ResourceType(name='isi')\n",
-      "    typeIsi.save()\n",
-      "    \n",
-      "    typeRis    = ResourceType(name='ris')\n",
-      "    typeRis.save()\n",
-      "    \n",
-      "    typePresseFrench = ResourceType(name='europress_french')\n",
-      "    typePresseFrench.save()\n",
-      "    \n",
-      "    typePresseEnglish = ResourceType(name='europress_english')\n",
-      "    typePresseEnglish.save()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "get() returned more than one ResourceType -- it returned 2!\n"
-       ]
-      }
-     ],
-     "prompt_number": 33
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "Node.objects.all().delete()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 34
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "try:\n",
-      "    project = Node.objects.get(name='Bees project')\n",
-      "except:\n",
-      "    project = Node(name='Bees project', type=typeProject, user=me)\n",
-      "    project.save()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 9
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "### Pubmed"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "try:\n",
-      "    corpus_pubmed = Node.objects.get(name='PubMed corpus')\n",
-      "except:\n",
-      "    corpus_pubmed = Node(parent=project, name='PubMed corpus', type=typeCorpus, user=me)\n",
-      "    corpus_pubmed.save()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 10
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "corpus_pubmed.add_resource(file='/srv/gargantext_lib/data_samples/pubmedBig.zip', type=typePubmed)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "metadata": {},
-       "output_type": "pyout",
-       "prompt_number": 11,
-       "text": [
-        "<Resource: Resource object>"
-       ]
-      }
-     ],
-     "prompt_number": 11
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "#corpus_abeille.add_resource(file='/srv/gargantext_lib/data_samples/pubmed.zip', type=typePubmed)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 12
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "corpus_pubmed.parse_resources()\n",
-      "corpus_pubmed.children.count()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "metadata": {},
-       "output_type": "pyout",
-       "prompt_number": 13,
-       "text": [
-        "600"
-       ]
-      }
-     ],
-     "prompt_number": 13
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "corpus_pubmed.id"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "metadata": {},
-       "output_type": "pyout",
-       "prompt_number": 14,
-       "text": [
-        "3131"
-       ]
-      }
-     ],
-     "prompt_number": 14
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "corpus_pubmed.children.all().extract_ngrams(['title', 'abstract'])\n",
-      "#Node_Ngram.objects.filter(node=corpus_pubmed.children.all()[0]).count()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Warning: parsing empty text\n",
-        "Warning: parsing empty text"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n"
-       ]
-      }
-     ],
-     "prompt_number": 15
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "### RIS"
-     ]
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "try:\n",
-      "    corpus_ris = Node.objects.get(name='RIS corpus')\n",
-      "except:\n",
-      "    corpus_ris = Node(parent=project, name='RIS corpus', type=typeCorpus, user=me)\n",
-      "    corpus_ris.save()"
-     ]
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "corpus_ris.add_resource(file='/srv/gargantext_lib/data_samples/risUnix.zip', type=typeRis)"
-     ]
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "corpus_ris.parse_resources()"
-     ]
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "corpus_ris.children.count()"
-     ]
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "corpus_ris.children.all()[15].metadata"
-     ]
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "corpus_ris.name = \"ZOTERO CORPUS (CIRDEM)\"\n",
-      "corpus_ris.save()"
-     ]
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "### Science"
-     ]
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "try:\n",
-      "    science = Node.objects.get(name='WOS corpus')\n",
-      "except:\n",
-      "    science = Node(parent=project, name='WOS corpus', type=typeCorpus, user=me)\n",
-      "    science.save()"
-     ]
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "science.add_resource(file='/srv/gargantext_lib/data_samples/isi.zip', type=typeIsi)\n",
-      "science.parse_resources()\n",
-      "science.children.count()"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "#[n.metadata for n in science.children.all()]"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 10
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "science.children.all().extract_ngrams(['title',])\n",
-      "Node_Ngram.objects.filter(node=science.children.all()[0]).count()"
-     ]
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "### Press"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "try:\n",
-      "    presse = Node.objects.get(name='Presse corpus')\n",
-      "except:\n",
-      "    presse = Node(parent=project, name='Presse corpus', type=typeCorpus, user=me)\n",
-      "    presse.save()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 12
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "presse.language = Language.objects.get(iso2='fr')\n",
-      "presse.save()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 13
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 13
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 13
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "presse.add_resource(file='/srv/gargantext_lib/data_samples/html/html_french.zip', type=typePresse)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "metadata": {},
-       "output_type": "pyout",
-       "prompt_number": 14,
-       "text": [
-        "<Resource: Resource object>"
-       ]
-      }
-     ],
-     "prompt_number": 14
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "presse.parse_resources()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 15
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "presse.children.count()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "metadata": {},
-       "output_type": "pyout",
-       "prompt_number": 16,
-       "text": [
-        "88"
-       ]
-      }
-     ],
-     "prompt_number": 16
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "for child in presse.children.all():\n",
-      "    print(child.metadata['title'])\n",
-      "    child.extract_ngrams(['title',])"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "Conf\u00e9d\u00e9ration paysanne : \" retrait imm\u00e9diat \" du R\u00e9gent\n",
-        "defaultdict(<class 'float'>, {'retrait imm\u00e9diat': 1.0, 'R\u00e9gent': 1.0, 'Conf\u00e9d\u00e9ration paysanne': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Gaucho, R\u00e9gent : la mobilisation continue\n",
-        "defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, 'mobilisation continue': 1.0, 'Gaucho': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stderr",
-       "text": [
-        "/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=44 mode='rb'>\n",
-        "  for inst in _active[:]:\n",
-        "\n"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stderr",
-       "text": [
-        "WARNING:py.warnings:/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=44 mode='rb'>\n",
-        "  for inst in _active[:]:\n",
-        "\n"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stderr",
-       "text": [
-        "/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=43 mode='wb'>\n",
-        "  for inst in _active[:]:\n",
-        "\n"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stderr",
-       "text": [
-        "WARNING:py.warnings:/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=43 mode='wb'>\n",
-        "  for inst in _active[:]:\n",
-        "\n"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stderr",
-       "text": [
-        "/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=46 mode='rb'>\n",
-        "  for inst in _active[:]:\n",
-        "\n"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stderr",
-       "text": [
-        "WARNING:py.warnings:/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=46 mode='rb'>\n",
-        "  for inst in _active[:]:\n",
-        "\n"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "GB/rapport: \"oui mais\" au ma\u00efs OGM, \"non mais\" pour colza et betterave\n",
-        "defaultdict(<class 'float'>, {'betterave': 1.0, 'ma\u00efs': 1.0, 'GB rapport': 1.0, 'colza': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stderr",
-       "text": [
-        "/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=47 mode='rb'>\n",
-        "  for inst in _active[:]:\n",
-        "\n"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stderr",
-       "text": [
-        "WARNING:py.warnings:/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=47 mode='rb'>\n",
-        "  for inst in _active[:]:\n",
-        "\n"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stderr",
-       "text": [
-        "/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=45 mode='wb'>\n",
-        "  for inst in _active[:]:\n",
-        "\n"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stderr",
-       "text": [
-        "WARNING:py.warnings:/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=45 mode='wb'>\n",
-        "  for inst in _active[:]:\n",
-        "\n"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stderr",
-       "text": [
-        "/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=49 mode='rb'>\n",
-        "  for inst in _active[:]:\n",
-        "\n"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stderr",
-       "text": [
-        "WARNING:py.warnings:/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=49 mode='rb'>\n",
-        "  for inst in _active[:]:\n",
-        "\n"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Rapport: \"oui mais\" au ma\u00efs OGM, \"non mais\" pour colza et betterave \u00e0 sucre\n",
-        "defaultdict(<class 'float'>, {'ma\u00efs': 1.0, 'betterave': 1.0, 'Rapport': 1.0, 'sucre': 1.0, 'colza': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stderr",
-       "text": [
-        "/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=48 mode='rb'>\n",
-        "  for inst in _active[:]:\n",
-        "\n"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stderr",
-       "text": [
-        "WARNING:py.warnings:/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=48 mode='rb'>\n",
-        "  for inst in _active[:]:\n",
-        "\n"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Rapport: \"oui mais\" au ma\u00efs OGM, \"non mais\" pour colza et betterave \u00e0 sucre\n",
-        "defaultdict(<class 'float'>, {'ma\u00efs': 1.0, 'betterave': 1.0, 'Rapport': 1.0, 'sucre': 1.0, 'colza': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "La Conf\u00e9d\u00e9ration exige le retrait du R\u00e9gent\n",
-        "defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, 'Conf\u00e9d\u00e9ration exige': 1.0, 'retrait': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "La Conf\u00e9d\u00e9ration exige le retrait du R\u00e9gent\n",
-        "defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, 'Conf\u00e9d\u00e9ration exige': 1.0, 'retrait': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "La Conf\u00e9d\u00e9ration paysanne demande le retrait du R\u00e9gent\n",
-        "defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, 'retrait': 1.0, 'Conf\u00e9d\u00e9ration paysanne demande': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Enqu\u00eate R\u00e9gent: BASF demande le statut de \"t\u00e9moin assist\u00e9\"\n",
-        "defaultdict(<class 'float'>, {'t\u00e9moin assist\u00e9': 1.0, 'statut': 1.0, 'Enqu\u00eate R\u00e9gent': 1.0, 'BASF demande': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Enqu\u00eate R\u00e9gent: BASF demande le statut de \"t\u00e9moin assist\u00e9\"\n",
-        "defaultdict(<class 'float'>, {'t\u00e9moin assist\u00e9': 1.0, 'statut': 1.0, 'Enqu\u00eate R\u00e9gent': 1.0, 'BASF demande': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Un film-enqu\u00eate\n",
-        "defaultdict(<class 'float'>, {'film-enqu\u00eate': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "La Conf\u00e9d\u00e9ration Paysanne demande le \"retrait imm\u00e9diat\" du R\u00e9gent TS\n",
-        "defaultdict(<class 'float'>, {'retrait imm\u00e9diat': 1.0, 'R\u00e9gent TS': 1.0, 'Conf\u00e9d\u00e9ration Paysanne demande': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "La Conf\u00e9d\u00e9ration Paysanne demande le \"retrait imm\u00e9diat\" du R\u00e9gent TS\n",
-        "defaultdict(<class 'float'>, {'retrait imm\u00e9diat': 1.0, 'R\u00e9gent TS': 1.0, 'Conf\u00e9d\u00e9ration Paysanne demande': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Insecticide R\u00e9gent TS: un juge souhaite enqu\u00eater sur la mise en danger d'autrui\n",
-        "defaultdict(<class 'float'>, {'juge souhaite enqu\u00eater': 1.0, 'mise': 1.0, 'Insecticide R\u00e9gent TS': 1.0, 'danger d': 1.0, 'autrui': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Sous-estimation des risques li\u00e9s \u00e0 l'utilisation du R\u00e9gent TS\n",
-        "defaultdict(<class 'float'>, {'l': 1.0, 'utilisation': 1.0, 'Sous-estimation': 1.0, 'risques li\u00e9s': 1.0, 'R\u00e9gent TS': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "L'affaire de l'insecticide rebondit\n",
-        "defaultdict(<class 'float'>, {'l': 1.0, 'affaire': 1.0, 'insecticide rebondit': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Coup d'oeil sur 2003 : les faits marquants\n",
-        "defaultdict(<class 'float'>, {'faits marquants': 1.0, 'Coup d': 1.0, 'oeil': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "La Conf\u00e9d\u00e9ration exige le retrait du R\u00e9gent\n",
-        "defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, 'Conf\u00e9d\u00e9ration exige': 1.0, 'retrait': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Le juge veut enqu\u00eater sur la mise en danger d'autrui\n",
-        "defaultdict(<class 'float'>, {'mise': 1.0, 'juge veut enqu\u00eater': 1.0, 'danger d': 1.0, 'autrui': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "La Conf\u00e9d\u00e9ration paysanne demande le retrait du R\u00e9gent TS"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "defaultdict(<class 'float'>, {'R\u00e9gent TS': 1.0, 'retrait': 1.0, 'Conf\u00e9d\u00e9ration paysanne demande': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Maires anti-Gaucho devant le tribunal\n",
-        "defaultdict(<class 'float'>, {'Maires anti-Gaucho': 1.0, 'tribunal': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "D\u00e9chets m\u00e9nagers, abeilles, OGM... Nature Avenir fait le point\n",
-        "defaultdict(<class 'float'>, {'D\u00e9chets m\u00e9nagers': 1.0, 'point': 1.0, 'abeilles': 1.0, 'Nature Avenir fait': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "D\u00e9chets m\u00e9nagers, abeilles, OGM... Nature Avenir fait le point\n",
-        "defaultdict(<class 'float'>, {'D\u00e9chets m\u00e9nagers': 1.0, 'point': 1.0, 'abeilles': 1.0, 'Nature Avenir fait': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "La banlieue par la bande\n",
-        "defaultdict(<class 'float'>, {'banlieue': 1.0, 'bande': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Insecticide R\u00e9gent TS .\n",
-        "defaultdict(<class 'float'>, {'Insecticide R\u00e9gent TS': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Chimie : une nouvelle expertise affirme la toxicit\u00e9 de l'insecticide R\u00e9gent TS\n",
-        "defaultdict(<class 'float'>, {'nouvelle expertise affirme': 1.0, 'insecticide R\u00e9gent TS': 1.0, 'Chimie': 1.0, 'l': 1.0, 'toxicit\u00e9': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "[Une expertise judiciaire affirme que les risques pour l'homme et pour l'environnement li\u00e9s \u00e0 l'utilisation de l'insecticide R\u00e9gent TS ont \u00e9t\u00e9 sous-estim\u00e9s.]\n",
-        "defaultdict(<class 'float'>, {'insecticide R\u00e9gent TS ont \u00e9t\u00e9 sous-estim\u00e9s': 1.0, 'expertise judiciaire affirme': 1.0, 'utilisation': 1.0, 'l': 4.0, 'environnement li\u00e9s': 1.0, 'risques': 1.0, 'homme': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Un insecticide \u00e0 risque\n",
-        "defaultdict(<class 'float'>, {'risque': 1.0, 'insecticide': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "La cuv\u00e9e des miels 2003 est plus que rare, s\u00e9cheresse oblige\n",
-        "defaultdict(<class 'float'>, {'miels': 1.0, 's\u00e9cheresse oblige': 1.0, 'cuv\u00e9e': 1.0, 'rare': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Les semences Gaucho, des d\u00e9chets banals\u00a0?\n",
-        "defaultdict(<class 'float'>, {'semences Gaucho': 1.0, 'd\u00e9chets banals': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Inqui\u00e9tudes des apiculteurs finist\u00e9riens (Lire en page 8)\n",
-        "defaultdict(<class 'float'>, {'Inqui\u00e9tudes': 1.0, 'Lire': 1.0, 'apiculteurs finist\u00e9riens': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Inqui\u00e9tudes des apiculteurs finist\u00e9riens\n",
-        "defaultdict(<class 'float'>, {'Inqui\u00e9tudes': 1.0, 'apiculteurs finist\u00e9riens': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "2003 dans le r\u00e9tro\n",
-        "defaultdict(<class 'float'>, {'r\u00e9tro': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "A. David, apiculteur : \u00ab Rien ne change \u00bb\n",
-        "defaultdict(<class 'float'>, {'David': 1.0, 'Rien': 1.0, 'apiculteur': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "R\u00e9gent TS: selon une nouvelle expertise, les risques ont \u00e9t\u00e9 sous-estim\u00e9s\n",
-        "defaultdict(<class 'float'>, {'nouvelle expertise': 1.0, 'R\u00e9gent TS': 1.0, 'risques ont \u00e9t\u00e9 sous-estim\u00e9s': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "R\u00e9gent TS: selon une nouvelle expertise, les risques ont \u00e9t\u00e9 sous-estim\u00e9s"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "defaultdict(<class 'float'>, {'nouvelle expertise': 1.0, 'R\u00e9gent TS': 1.0, 'risques ont \u00e9t\u00e9 sous-estim\u00e9s': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "R\u00e9gent TS: les risques pour l'homme auraient \u00e9t\u00e9 sous-estim\u00e9s (expertise)\n",
-        "defaultdict(<class 'float'>, {'l': 1.0, 'expertise': 1.0, 'risques': 1.0, 'R\u00e9gent TS': 1.0, 'homme auraient \u00e9t\u00e9 sous-estim\u00e9s': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "R\u00e9gent TS: les risques pour l'homme auraient \u00e9t\u00e9 sous-estim\u00e9s (expertise)\n",
-        "defaultdict(<class 'float'>, {'l': 1.0, 'expertise': 1.0, 'risques': 1.0, 'R\u00e9gent TS': 1.0, 'homme auraient \u00e9t\u00e9 sous-estim\u00e9s': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "R\u00e9gent TS: un troisi\u00e8me expert \u00e9voque des risques pour la sant\u00e9 humaine\n",
-        "defaultdict(<class 'float'>, {'expert \u00e9voque': 1.0, 'risques': 1.0, 'R\u00e9gent TS': 1.0, 'sant\u00e9 humaine': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "R\u00e9gent TS: un troisi\u00e8me expert \u00e9voque des risques pour la sant\u00e9 humaine\n",
-        "defaultdict(<class 'float'>, {'expert \u00e9voque': 1.0, 'risques': 1.0, 'R\u00e9gent TS': 1.0, 'sant\u00e9 humaine': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Les vrais ennemis des abeilles\n",
-        "defaultdict(<class 'float'>, {'abeilles': 1.0, 'vrais ennemis': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Un expert d\u00e9nonce les dangers d'un pesticide\n",
-        "defaultdict(<class 'float'>, {'expert d\u00e9nonce': 1.0, 'pesticide': 1.0, 'dangers d': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Huissier ind\u00e9sirable : le maire de Murs-\u00c9rign\u00e9 \u00e9crit au ministre de l'Int\u00e9rieur\n",
-        "defaultdict(<class 'float'>, {'Murs-\u00c9rign\u00e9 \u00e9crit': 1.0, 'l': 1.0, 'Huissier ind\u00e9sirable': 1.0, 'Int\u00e9rieur': 1.0, 'maire': 1.0, 'ministre': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "R\u00e9gent TS : nouvelles accusations\n",
-        "defaultdict(<class 'float'>, {'nouvelles accusations': 1.0, 'R\u00e9gent TS': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Un cocotier dans votre salon ?\n",
-        "defaultdict(<class 'float'>, {'cocotier': 1.0, 'salon': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Pratiques commerciales: L'autre malbouffe\n",
-        "defaultdict(<class 'float'>, {'Pratiques commerciales': 1.0, 'autre malbouffe': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Quel avenir pour le XXIe si\u00e8cle ?"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "defaultdict(<class 'float'>, {'avenir': 1.0, 'si\u00e8cle': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Des abeilles, du miel et du pain d'\u00e9pice\n",
-        "defaultdict(<class 'float'>, {'pain d': 1.0, 'abeilles': 1.0, 'miel': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Abeilles et fipronil : un dossier \" empoisonnant \"\n",
-        "defaultdict(<class 'float'>, {'fipronil': 1.0, 'dossier': 1.0, 'Abeilles': 1.0, 'empoisonnant': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Des apiculteurs manifestent \u00e0 Angers contre \"une tentative d'intimidation\"\n",
-        "defaultdict(<class 'float'>, {'tentative d': 1.0, 'apiculteurs manifestent': 1.0, 'Angers': 1.0, 'intimidation': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Insecticides : manifestation apr\u00e8s \" l'intimidation \"\n",
-        "defaultdict(<class 'float'>, {'l': 1.0, 'manifestation': 1.0, 'Insecticides': 1.0, 'intimidation': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Apiculteurs : non \u00e0 l'atteinte aux libert\u00e9s"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "defaultdict(<class 'float'>, {'libert\u00e9s': 1.0, 'l': 1.0, 'Apiculteurs': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Une ruche politique... consensuelle\n",
-        "defaultdict(<class 'float'>, {'ruche politique': 1.0, 'consensuelle': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Les apiculteurs manifestent\n",
-        "defaultdict(<class 'float'>, {'apiculteurs manifestent': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "[Les apiculteurs locaux invitent la population vendredi 13 f\u00e9vrier \u00e0 20 heures \u00e0 la mairie pour assister \u00e0 une projection de cassettes vid\u00e9os sur la vie des abeilles et les cons\u00e9quences de l'utilisation de certains insecticides.]\n",
-        "defaultdict(<class 'float'>, {'vie': 1.0, 'abeilles': 1.0, 'heures': 1.0, 'l': 1.0, 'f\u00e9vrier': 1.0, 'cassettes vid\u00e9os': 1.0, 'insecticides': 1.0, 'population vendredi': 1.0, 'projection': 1.0, 'mairie': 1.0, 'utilisation': 1.0, 'cons\u00e9quences': 1.0, 'apiculteurs locaux invitent': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "\" Les abeilles du coche \""
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "defaultdict(<class 'float'>, {'abeilles': 1.0, 'coche': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Gaucho : une r\u00e9action de Philippe Bodard\n",
-        "defaultdict(<class 'float'>, {'Philippe Bodard': 1.0, 'r\u00e9action': 1.0, 'Gaucho': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Baisse de production des apiculteurs fran\u00e7ais\n",
-        "defaultdict(<class 'float'>, {'apiculteurs fran\u00e7ais': 1.0, 'production': 1.0, 'Baisse': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Une esp\u00e8ce en danger\n",
-        "defaultdict(<class 'float'>, {'danger': 1.0, 'esp\u00e8ce': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Progression du taux de mortalit\u00e9 chez les abeilles\n",
-        "defaultdict(<class 'float'>, {'mortalit\u00e9': 1.0, 'abeilles': 1.0, 'Progression': 1.0, 'taux': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Un huissier chez des apiculteurs Visite impromptue, vendredi, dans le Maine-et-Loire lors d'une r\u00e9union.\n",
-        "defaultdict(<class 'float'>, {'apiculteurs Visite impromptue': 1.0, 'huissier': 1.0, 'Maine-et-Loire': 1.0, 'vendredi': 1.0, 'd': 1.0, 'r\u00e9union': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Un huissier chez des apiculteurs Visite surprise \u00e0 Blaison-Gohier (49) lors d'une r\u00e9union sur les insecticides.\n",
-        "defaultdict(<class 'float'>, {'Blaison-Gohier': 1.0, 'huissier': 1.0, 'insecticides': 1.0, 'r\u00e9union': 1.0, 'apiculteurs Visite surprise': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Un huissier chez des apiculteurs Visite impromptue \u00e0 Blaison-Gohier, lors d'une r\u00e9union sur les insecticides.\n",
-        "defaultdict(<class 'float'>, {'Blaison-Gohier': 1.0, 'huissier': 1.0, 'insecticides': 1.0, 'apiculteurs Visite impromptue': 1.0, 'r\u00e9union': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Un huissier chez des apiculteurs Visite impromptue, pr\u00e8s d'Angers, lors d'une r\u00e9union sur les insecticides.\n",
-        "defaultdict(<class 'float'>, {'r\u00e9union': 1.0, 'huissier': 1.0, 'insecticides': 1.0, 'apiculteurs Visite impromptue': 1.0, 'Angers': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Intimidation d'un huissier apr\u00e8s une r\u00e9union d'information\n",
-        "defaultdict(<class 'float'>, {'Intimidation d': 1.0, 'huissier': 1.0, 'r\u00e9union d': 1.0, 'information': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Les apiculteurs interpellent les citoyens\n",
-        "defaultdict(<class 'float'>, {'citoyens': 1.0, 'apiculteurs interpellent': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Insecticides : \" T\u00e9moins g\u00eanants \", huissier g\u00ean\u00e9\n",
-        "defaultdict(<class 'float'>, {'T\u00e9moins g\u00eanants': 1.0, 'huissier g\u00ean\u00e9': 1.0, 'Insecticides': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Professeur Narbonne : \" Certaines personnes n'ont pas bien fait leur boulot \"\n",
-        "defaultdict(<class 'float'>, {'boulot': 1.0, 'personnes n': 1.0, 'Professeur Narbonne': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "La Conf\u00e9d\u00e9ration interpelle l'agriculture raisonn\u00e9e"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "defaultdict(<class 'float'>, {'agriculture raisonn\u00e9e': 1.0, 'Conf\u00e9d\u00e9ration interpelle l': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "La Conf\u00e9d\u00e9ration interpelle l'agriculture raisonn\u00e9e\n",
-        "defaultdict(<class 'float'>, {'agriculture raisonn\u00e9e': 1.0, 'Conf\u00e9d\u00e9ration interpelle l': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "La Conf\u00e9d\u00e9ration interpelle l'agriculture raisonn\u00e9e\n",
-        "defaultdict(<class 'float'>, {'agriculture raisonn\u00e9e': 1.0, 'Conf\u00e9d\u00e9ration interpelle l': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "La Conf\u00e9d\u00e9ration interpelle l'agriculture raisonn\u00e9e\n",
-        "defaultdict(<class 'float'>, {'agriculture raisonn\u00e9e': 1.0, 'Conf\u00e9d\u00e9ration interpelle l': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "La Conf\u00e9d\u00e9ration interpelle l'agriculture raisonn\u00e9e\n",
-        "defaultdict(<class 'float'>, {'agriculture raisonn\u00e9e': 1.0, 'Conf\u00e9d\u00e9ration interpelle l': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "La Conf\u00e9d\u00e9ration interpelle l'agriculture raisonn\u00e9e\n",
-        "defaultdict(<class 'float'>, {'agriculture raisonn\u00e9e': 1.0, 'Conf\u00e9d\u00e9ration interpelle l': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "La Conf\u00e9d\u00e9ration interpelle l'agriculture raisonn\u00e9e\n",
-        "defaultdict(<class 'float'>, {'agriculture raisonn\u00e9e': 1.0, 'Conf\u00e9d\u00e9ration interpelle l': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Homologation bloqu\u00e9e pour le Regent .\n",
-        "defaultdict(<class 'float'>, {'Homologation bloqu\u00e9e': 1.0, 'Regent': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Insecticides : un film saisi pr\u00e8s d'Angers\n",
-        "defaultdict(<class 'float'>, {'film saisi': 1.0, 'Angers': 1.0, 'Insecticides': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Un huissier saisit un film sur des insecticides lors d'une r\u00e9union publique\n",
-        "defaultdict(<class 'float'>, {'huissier saisit': 1.0, 'film': 1.0, 'insecticides': 1.0, 'r\u00e9union publique': 1.0, 'd': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Les apiculteurs ont le bourdon\n",
-        "defaultdict(<class 'float'>, {'apiculteurs ont': 1.0, 'bourdon': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "R\u00e9gent : BASF demande \u00e0 \u00eatre t\u00e9moin assist\u00e9\n",
-        "defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, '\u00eatre t\u00e9moin assist\u00e9': 1.0, 'BASF demande': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "R\u00e9gent : BASF demande \u00e0 \u00eatre t\u00e9moin assist\u00e9\n",
-        "defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, '\u00eatre t\u00e9moin assist\u00e9': 1.0, 'BASF demande': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "R\u00e9gent : BASF demande \u00e0 \u00eatre t\u00e9moin assist\u00e9\n",
-        "defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, '\u00eatre t\u00e9moin assist\u00e9': 1.0, 'BASF demande': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "R\u00e9gent : BASF demande \u00e0 \u00eatre t\u00e9moin assist\u00e9\n",
-        "defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, '\u00eatre t\u00e9moin assist\u00e9': 1.0, 'BASF demande': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "BASF demande le statut de \" t\u00e9moin assist\u00e9 \"\n",
-        "defaultdict(<class 'float'>, {'t\u00e9moin assist\u00e9': 1.0, 'statut': 1.0, 'BASF demande': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Le Gaucho et le R\u00e9gent TS toujours en accusation\n",
-        "defaultdict(<class 'float'>, {'R\u00e9gent TS': 1.0, 'Gaucho': 1.0, 'accusation': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "Attention, abeilles en danger"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "defaultdict(<class 'float'>, {'danger': 1.0, 'Attention': 1.0, 'abeilles': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "R\u00e9gent : BASF demande \u00e0 \u00eatre t\u00e9moin assist\u00e9\n",
-        "defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, '\u00eatre t\u00e9moin assist\u00e9': 1.0, 'BASF demande': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "La Conf\u00e9d\u00e9ration interpelle l'agriculture raisonn\u00e9e\n",
-        "defaultdict(<class 'float'>, {'agriculture raisonn\u00e9e': 1.0, 'Conf\u00e9d\u00e9ration interpelle l': 1.0})"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n"
-       ]
-      }
-     ],
-     "prompt_number": 17
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "firstchild = presse.children.first()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "for n in Node_Ngram.objects.filter(node=firstchild):\n",
-      "    print(n.ngram.terms)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "liste_ordered = collections.OrderedDict(sorted(liste.items()), key=lambda t: t[1])"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "#liste_ordered"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "# Cr\u00e9ation des Listes"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "import collections"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "liste = collections.defaultdict(int)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "try:\n",
-      "    whitelist_type  = NodeType.objects.get(name='WhiteList')\n",
-      "    blacklist_type = NodeType.objects.get(name='BlackList')\n",
-      "except:\n",
-      "    whitelist_type = NodeType(name='WhiteList')\n",
-      "    whitelist_type.save()\n",
-      "    \n",
-      "    blacklist_type = NodeType(name='BlackList')\n",
-      "    blacklist_type.save()\n",
-      "\n",
-      "white_node = Node.objects.create(name='WhiteList Pubmed', user=me, parent=corpus_pubmed, type=whitelist_type)\n",
-      "black_node = Node.objects.create(name='BlackList Pubmed', user=me, parent=corpus_pubmed, type=blacklist_type)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "Node_Ngram.objects.filter(node=white_node).count()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "# Cr\u00e9ation de la white list"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "with transaction.atomic():\n",
-      "    for node in presse.children.all():\n",
-      "        for node_ngram in Node_Ngram.objects.filter(node=node):\n",
-      "            if node_ngram.ngram.n > 1:\n",
-      "                #liste[node_ngram.ngram.terms] += node_ngram.weight\n",
-      "                Node_Ngram.objects.create(node=white_node, ngram=node_ngram.ngram, weight=1)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "white_node.pk"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "Node_Ngram.objects.filter(node=white_node).count()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "# Cr\u00e9ation de la black list"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "with transaction.atomic():\n",
-      "    for node_ngram_object in Node_Ngram.objects.all()[101:150]:\n",
-      "        Node_Ngram.objects.create(node=black_node, ngram=node_ngram_object.ngram, occurences=1)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "Node_Ngram.objects.filter(node=black_node)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "# Cr\u00e9ation des synonymes"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "syno_type  = NodeType.objects.get(name='Synonyme')\n",
-      "syno_node = Node.objects.create(name='Syno Pubmed',\n",
-      "                                user=user, \n",
-      "                                parent=corpus, \n",
-      "                                type=syno_type)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "synonyme1, synonyme2 = Node_Ngram.objects.filter(node=white_node)[3:5]"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "NodeNgramNgram.objects.create(node=syno_node, ngramX=synonyme1.ngram, ngramY=synonyme2.ngram)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "# Cooccurrence"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "white_node.children.count()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "black_node.pk"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "try:\n",
-      "    cooc_type  = NodeType.objects.get(name='Cooccurrence')\n",
-      "except:\n",
-      "    cooc_type = NodeType(name='Cooccurrence')\n",
-      "    cooc_type.save()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "cooc = Node.objects.create(user=me,\\\n",
-      "                           parent=corpus_pubmed,\\\n",
-      "                           type=cooc_type,\\\n",
-      "                           name=\"Cooccurrences calcul Alpha\")"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "cooc.pk"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "white_node.children.all().delete()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "from django.db import connection\n",
-      "cursor = connection.cursor()\n",
-      "# LOCK TABLE documents_ngramtemporary IN EXCLUSIVE MODE;\n",
-      "query_string = \"\"\"\n",
-      "INSERT INTO node_nodengramngram (node_id, \"ngramX_id\", \"ngramY_id\", score)\n",
-      "\n",
-      "SELECT \n",
-      "%d as node_id, x.ngram_id, y.ngram_id, COUNT(*) AS score\n",
-      "\n",
-      "FROM\n",
-      "node_node_ngram AS x\n",
-      "\n",
-      "INNER JOIN \n",
-      "node_node_ngram AS y \n",
-      "ON x.node_id = y.node_id\n",
-      "\n",
-      "\n",
-      "WHERE\n",
-      "x.id in (select id from node_node_ngram WHERE node_id = %d )\n",
-      "AND\n",
-      "y.id in (select id from node_node_ngram WHERE node_id = %d )\n",
-      "AND\n",
-      "x.ngram_id <> y.ngram_id\n",
-      "\n",
-      "\n",
-      "GROUP BY\n",
-      "x.ngram_id, y.ngram_id\n",
-      "\n",
-      "HAVING count(*) > 1\n",
-      "\n",
-      "ORDER BY score\n",
-      "\n",
-      "LIMIT 300\n",
-      "\n",
-      "             \"\"\" % (cooc.pk, white_node.pk, white_node.pk)\n",
-      "\n",
-      "cursor.execute(query_string)\n",
-      "\n",
-      "try:\n",
-      "    while True:\n",
-      "        row = cursor.fetchone()\n",
-      "        if row is None:\n",
-      "            break\n",
-      "        print(row)\n",
-      "except:\n",
-      "    pass"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "from copy import copy\n",
-      "import numpy as np\n",
-      "import pandas as pd\n",
-      "import networkx as nx\n",
-      "from collections import defaultdict\n",
-      "from analysis.louvain import *\n",
-      "import matplotlib.pyplot as plt\n",
-      "%matplotlib inline"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "matrix = \"\""
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "matrix = defaultdict(lambda : defaultdict(float))\n",
-      "for cooccurrence in NodeNgramNgram.objects.filter(node=cooc):\n",
-      "    if cooccurrence.score > 1 :\n",
-      "        #print(x.ngramX.terms, x.ngramY.terms)\n",
-      "        matrix[cooccurrence.ngramX.terms][cooccurrence.ngramY.terms] = cooccurrence.score\n",
-      "        matrix[cooccurrence.ngramY.terms][cooccurrence.ngramX.terms] = cooccurrence.score"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "df = pd.DataFrame(matrix).T.fillna(0)\n",
-      "x = copy(df.values)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "x = np.where((x.sum(axis=1) > x.shape[0] / 2), 0, x )\n",
-      "x = np.where((x.sum(axis=1) > x.shape[0] / 10), 0, x )"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "x = x / x.sum(axis=1)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "matrix_filtered = np.where(x > .4, 1, 0)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "matrix_filtered"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "G = nx.from_numpy_matrix(matrix_filtered)\n",
-      "G = nx.relabel_nodes(G, dict(enumerate(df.columns)))"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "nx.draw(G, with_labels=True)\n",
-      "plt.show()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "partition = best_partition(G)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "#partition"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "pos = nx.spring_layout(G)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "count = 0.0\n",
-      "node_min = 3\n",
-      "for com in set(partition.values()) :\n",
-      "    count = count + 1\n",
-      "    list_nodes = [nodes for nodes in partition.keys() if partition[nodes] == com]\n",
-      "            \n",
-      "    if len(list_nodes) > node_min:\n",
-      "        nx.draw_networkx_nodes(G, pos, list_nodes, node_size = 20, with_labels=True)#, node_color = str(count / size))\n",
-      "        nx.draw_networkx_edges(G, pos, alpha=0.5)\n",
-      "        plt.title(\"Clique \" + str(count))\n",
-      "                \n",
-      "        for node in list_nodes: \n",
-      "            print(node)\n",
-      "            plt.show()\n",
-      "            print(\"-\" * 30)\n"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "# Asynchrone"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 145
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 146
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 147
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 4
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "from celerytest.tasks import add"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 1
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "add."
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "metadata": {},
-       "output_type": "pyout",
-       "prompt_number": 3,
-       "text": [
-        "<AsyncResult: c7df5232-b80a-4dd4-b615-432a6fb206e4>"
-       ]
-      }
-     ],
-     "prompt_number": 3
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "from celerytest.tasks import Test"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 5
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "t = Test()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 6
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "res = t.addition.delay((2,2))"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 7
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "res = t.addition.apply_async((2,2), countdown=2)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 8
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "res.get()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "ename": "TypeError",
-       "evalue": "addition() takes 2 positional arguments but 3 were given",
-       "output_type": "pyerr",
-       "traceback": [
-        "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-        "\u001b[1;32m<ipython-input-5-8bb969b0b8af>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mres\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
-        "\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/celery/result.py\u001b[0m in \u001b[0;36mget\u001b[1;34m(self, timeout, propagate, interval, no_ack, follow_parents, EXCEPTION_STATES, PROPAGATE_STATES)\u001b[0m\n\u001b[0;32m    173\u001b[0m             \u001b[0mstatus\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmeta\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'status'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    174\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mstatus\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mPROPAGATE_STATES\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mpropagate\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 175\u001b[1;33m                 \u001b[1;32mraise\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbackend\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexception_to_python\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmeta\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'result'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    176\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mstatus\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mEXCEPTION_STATES\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    177\u001b[0m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbackend\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexception_to_python\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmeta\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'result'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-        "\u001b[1;31mTypeError\u001b[0m: addition() takes 2 positional arguments but 3 were given"
-       ]
-      }
-     ],
-     "prompt_number": 5
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "from celery.contrib.methods import current_app"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 7
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "app."
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "metadata": {},
-       "output_type": "pyout",
-       "prompt_number": 6,
-       "text": [
-        "False"
-       ]
-      }
-     ],
-     "prompt_number": 6
-    }
-   ],
-   "metadata": {}
-  }
- ]
-}
\ No newline at end of file
--- a/SQL_TESTS.ipynb
+++ b/SQL_TESTS.ipynb
-{
- "metadata": {
-  "name": "",
-  "signature": "sha256:01c37f613503c408d979ba9eb9172cbd9b6b3be2ff0d7d35089d705cebc989c2"
- },
- "nbformat": 3,
- "nbformat_minor": 0,
- "worksheets": [
-  {
-   "cells": [
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "from node.models import Node, NodeType,\\\n",
-      "                        Project, Corpus, Document,\\\n",
-      "                        Ngram, Node_Ngram,\\\n",
-      "                        User, Language, ResourceType\n",
-      "            \n",
-      "from parsing.Caches import NgramsCache\n",
-      "            \n",
-      "from django.db import connection\n",
-      "cursor = connection.cursor()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 1
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "me = User.objects.get(username='alexandre')"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 2
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "import networkx as nx\n",
-      "from networkx.readwrite import json_graph"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 2
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "import csv"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 3
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "help(csv.writer)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "Help on built-in function writer in module _csv:\n",
-        "\n",
-        "writer(...)\n",
-        "    csv_writer = csv.writer(fileobj [, dialect='excel']\n",
-        "                                [optional keyword args])\n",
-        "        for row in sequence:\n",
-        "            csv_writer.writerow(row)\n",
-        "    \n",
-        "        [or]\n",
-        "    \n",
-        "        csv_writer = csv.writer(fileobj [, dialect='excel']\n",
-        "                                [optional keyword args])\n",
-        "        csv_writer.writerows(rows)\n",
-        "    \n",
-        "    The \"fileobj\" argument can be any object that supports the file API.\n",
-        "\n"
-       ]
-      }
-     ],
-     "prompt_number": 2
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "type(x)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "metadata": {},
-       "output_type": "pyout",
-       "prompt_number": 52,
-       "text": [
-        "_io.TextIOWrapper"
-       ]
-      }
-     ],
-     "prompt_number": 52
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "file = open('/tmp/test.graph', 'w')"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "output_type": "stream",
-       "stream": "stderr",
-       "text": [
-        "-c:1: ResourceWarning: unclosed file <_io.TextIOWrapper name='/tmp/test.graph' mode='w' encoding='UTF-8'>\n",
-        "\n"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stderr",
-       "text": [
-        "WARNING:py.warnings:-c:1: ResourceWarning: unclosed file <_io.TextIOWrapper name='/tmp/test.graph' mode='w' encoding='UTF-8'>\n",
-        "\n"
-       ]
-      }
-     ],
-     "prompt_number": 42
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "file.write('ici il fait beau')\n",
-      "file.close()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 46
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "G = nx.complete_graph(30)\n",
-      "f = open(\"graph.json\", \"w\")\n",
-      "f.write(json_graph.node_link_graph(G))\n",
-      "f.close()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "output_type": "stream",
-       "stream": "stderr",
-       "text": [
-        "-c:2: ResourceWarning: unclosed file <_io.TextIOWrapper name='graph.json' mode='w' encoding='UTF-8'>\n",
-        "\n"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stderr",
-       "text": [
-        "WARNING:py.warnings:-c:2: ResourceWarning: unclosed file <_io.TextIOWrapper name='graph.json' mode='w' encoding='UTF-8'>\n",
-        "\n"
-       ]
-      },
-      {
-       "ename": "AttributeError",
-       "evalue": "'Graph' object has no attribute 'get'",
-       "output_type": "pyerr",
-       "traceback": [
-        "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
-        "\u001b[1;32m<ipython-input-17-7d4aa550fd32>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[0mG\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcomplete_graph\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m30\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"graph.json\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"w\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mjson_graph\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnode_link_graph\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mG\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      4\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-        "\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/networkx/readwrite/json_graph/node_link.py\u001b[0m in \u001b[0;36mnode_link_graph\u001b[1;34m(data, directed, multigraph, attrs)\u001b[0m\n\u001b[0;32m    134\u001b[0m     \u001b[0mnode_link_data\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0madjacency_data\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtree_data\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    135\u001b[0m     \"\"\"\n\u001b[1;32m--> 136\u001b[1;33m     \u001b[0mmultigraph\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'multigraph'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmultigraph\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    137\u001b[0m     \u001b[0mdirected\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'directed'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdirected\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    138\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mmultigraph\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-        "\u001b[1;31mAttributeError\u001b[0m: 'Graph' object has no attribute 'get'"
-       ]
-      }
-     ],
-     "prompt_number": 17
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "typeCorpus  = NodeType.objects.get(name='Corpus')\n",
-      "for corpus in Node.objects.filter(type=typeCorpus):\n",
-      "    print(\"#%d - %s\" % (corpus.id, corpus))"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "#7 - Presse corpus\n"
-       ]
-      }
-     ],
-     "prompt_number": 33
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "try:\n",
-      "    me = User.objects.get(username='alexandre')\n",
-      "except:\n",
-      "    me = User(username='alexandre')\n",
-      "    me.save()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 34
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "#\u00a0corpus = Node.objects.filter(type=typeCorpus).first()\n",
-      "corpus = Node.objects.get(id=13064)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 3
-    },
-    {
-     "cell_type": "heading",
-     "level": 2,
-     "metadata": {},
-     "source": [
-      "Occurences"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "query_date = \"\"\"\n",
-      "    SELECT\n",
-      "        id,\n",
-      "        metadata -> 'publication_year' as year,\n",
-      "        metadata -> 'publication_month' as month, \n",
-      "        metadata -> 'publication_day' as day,\n",
-      "        metadata -> 'title',\n",
-      "    FROM\n",
-      "        node_node AS n\n",
-      "    WHERE\n",
-      "        n.parent_id = %d\n",
-      "    ORDER BY\n",
-      "        year, month, day ASC\n",
-      "    LIMIT\n",
-      "        20\n",
-      "    OFFSET\n",
-      "        %d\n",
-      "\"\"\" % (corpus.id, 0)\n",
-      "\n",
-      "cursor.execute(query_date)\n",
-      "\n",
-      "while True:\n",
-      "    row = cursor.fetchone()\n",
-      "    if row is None:\n",
-      "        break\n",
-      "    print(row)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "(13066, '1954', '11', '18', 'TOXICITY OF PESTICIDE DUSTS TO HONEYBEES', None)\n",
-        "(13065, '1954', '11', '18', 'EQUIPMENT AND TECHNIQUE USED IN LABORATORY EVALUATION OF PESTICIDE DUSTS IN TOXICOLOGICAL STUDIES WITH HONEYBEES', None)\n",
-        "(13067, '1958', '11', '18', 'TOXICITY OF PESTICIDES TO HONEY BEES IN LABORATORY AND FIELD TESTS IN SOUTHERN CALIFORNIA, 1955-1956', None)\n",
-        "(13068, '1959', '11', '18', 'THE EFFECTS OF FIELD APPLICATIONS OF SOME OF THE NEWER PESTICIDES ON HONEY BEES', None)\n",
-        "(13069, '1968', '11', '18', 'PROTECTING HONEYBEES FROM PESTICIDES', None)\n",
-        "(13071, '1969', '11', '18', 'PESTICIDE TOXICITY AND HONEY BEES', None)\n",
-        "(13070, '1969', '11', '18', 'POLLEN GATHERING OF HONEY BEES REDUCED BY PESTICIDE SPRAYS', None)\n",
-        "(13072, '1971', '11', '18', 'NEWER PESTICIDES DONT HARM ENVIRONMENT, BUT WHERE HAVE ALL BEES GONE', None)\n",
-        "(13075, '1971', '11', '18', 'HONEYBEES, PESTICIDES AND LAW .3.', None)\n",
-        "(13074, '1971', '11', '18', 'HONEYBEES, PESTICIDES AND LAW .2.', None)\n",
-        "(13073, '1971', '11', '18', 'HONEYBEES, PESTICIDES AND LAW .1.', None)\n",
-        "(13076, '1972', '11', '18', 'RICE FIELD MOSQUITO-CONTROL STUDIES WITH LOW VOLUME DURSBAN SPRAYS IN COLUSA COUNTY, CALIFORNIA .5. EFFECTS UPON HONEY BEES', None)\n",
-        "(13078, '1974', '11', '18', 'MICROSOMAL OXIDASES IN HONEY BEE, APIS-MELLIFERA (L)', None)\n",
-        "(13077, '1974', '11', '18', 'ISOLATED HONEY BEE ABDOMENS FOR MONITORING EFFECTS OF STRESS IN AMERICAN COCKROACH', None)\n",
-        "(13079, '1975', '11', '18', 'INHIBITOR OF MICROSOMAL OXIDATION FROM GUT TISSUES OF HONEY BEE (APIS-MELLIFERA)', None)\n",
-        "(13080, '1975', '11', '18', 'REPELLENT ADDITIVES TO REDUCE PESTICIDE HAZARDS TO HONEY BEES HYMENOPTERA-APIDAE, APIS-MELLIFERA-L - FIELD-TESTS', None)\n",
-        "(13081, '1975', '11', '18', 'HONEYBEE ABDOMEN ASSAYS OF HEMOLYMPH FROM STRESSED AND EXTERNALLY POISONED AMERICAN COCKROACHES', None)\n",
-        "(13082, '1976', '11', '18', 'PROBLEM OF PESTICIDES NOT DANGEROUS TO BEES', None)\n",
-        "(13084, '1977', '11', '18', 'EFFECT OF SOME PESTICIDES ON A SOLITARY BEE (MEGACHILE-PACIFICA-PANZ) - (HYMENOPTERA, MEGACHILIDAE)', None)\n",
-        "(13085, '1977', '11', '18', 'METHOD FOR TESTING PESTICIDE TOXICITY WHICH IS SUITABLE FOR SOLITARY BEES AND ESPECIALLY FOR MEGACHILE-PACIFICA-PANZ - (HYMENOPTERA, MEGACHILIDAE)', None)\n"
-       ]
-      }
-     ],
-     "prompt_number": 35
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "query_date = \"\"\"\n",
-      "    SELECT\n",
-      "        metadata -> 'publication_year' as year,\n",
-      "        metadata -> 'publication_month' as month, \n",
-      "        metadata -> 'publication_day' as day,\n",
-      "        COUNT(*)\n",
-      "    FROM\n",
-      "        node_node AS n\n",
-      "    WHERE\n",
-      "        n.parent_id = %d\n",
-      "    GROUP BY\n",
-      "        day, month, year\n",
-      "    ORDER BY\n",
-      "        year, month, day ASC\n",
-      "    LIMIT\n",
-      "        20\n",
-      "    OFFSET\n",
-      "        %d\n",
-      "\"\"\" % (corpus.id, 0)\n",
-      "\n",
-      "cursor.execute(query_date)\n",
-      "\n",
-      "while True:\n",
-      "    row = cursor.fetchone()\n",
-      "    if row is None:\n",
-      "        break\n",
-      "    print('\\'' + row[0] + '/' + row[1] + '/' + row[2] + '\\'' \n",
-      "          + ',' + '\\'' + str(row[3]) + '\\'' )"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "'1954/11/18','2'\n",
-        "'1958/11/18','1'\n",
-        "'1959/11/18','1'\n",
-        "'1968/11/18','1'\n",
-        "'1969/11/18','2'\n",
-        "'1971/11/18','4'\n",
-        "'1972/11/18','1'\n",
-        "'1974/11/18','2'\n",
-        "'1975/11/18','3'\n",
-        "'1976/11/18','1'\n",
-        "'1977/11/18','6'\n",
-        "'1978/11/18','11'\n",
-        "'1979/11/18','9'\n",
-        "'1980/11/18','6'\n",
-        "'1981/11/18','4'\n",
-        "'1982/11/18','7'\n",
-        "'1983/11/18','14'\n",
-        "'1984/11/18','17'\n",
-        "'1985/11/18','18'\n",
-        "'1986/02/21','1'\n"
-       ]
-      }
-     ],
-     "prompt_number": 28
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "cursor.execute(\"\"\"\n",
-      "    SELECT\n",
-      "        COUNT(*) AS occurrences,\n",
-      "        ngX.terms\n",
-      "    FROM\n",
-      "        node_node AS n\n",
-      "    INNER JOIN\n",
-      "        node_node_ngram AS nngX ON nngX.node_id = n.id\n",
-      "    INNER JOIN\n",
-      "        node_ngram AS ngX ON ngX.id = nngX.ngram_id\n",
-      "    WHERE\n",
-      "        n.parent_id = %s\n",
-      "    AND\n",
-      "        ngX.n >= 2\n",
-      "    GROUP BY\n",
-      "        ngX.terms\n",
-      "    Having\n",
-      "        COUNT(*) > 7\n",
-      "    ORDER BY\n",
-      "        occurrences DESC\n",
-      "    LIMIT\n",
-      "        100\n",
-      "    \n",
-      "\"\"\", [corpus.id])\n",
-      "\n",
-      "while True:\n",
-      "    row = cursor.fetchone()\n",
-      "    if row is None:\n",
-      "        break\n",
-      "    print(row)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "(138, 'honey bees')\n",
-        "(132, 'apis mellifera')\n",
-        "(69, 'honey bee')\n",
-        "(66, 'apis mellifera l')\n",
-        "(45, 'pesticide residues')\n",
-        "(39, 'gas chromatography')\n",
-        "(36, 'varroa destructor')\n",
-        "(36, 'honey bee colonies')\n",
-        "(30, 'sublethal effects')\n",
-        "(27, 'apidae )')\n",
-        "(21, 'neonicotinoid insecticides')\n",
-        "(21, 'honey bee ( hymenoptera')\n",
-        "(18, 'bee products')\n",
-        "(18, 'megachile rotundata')\n",
-        "(18, 'solid-phase extraction')\n",
-        "(18, 'simultaneous determination')\n",
-        "(18, 'mass spectrometric')\n",
-        "(15, 'case study')\n",
-        "(15, 'honey samples')\n",
-        "(15, 'liquid chromatography')\n",
-        "(15, 'high performance liquid chromatography')\n",
-        "(15, 'varroa mites')\n",
-        "(12, 'organochlorine pesticides')\n",
-        "(12, 'gas chromatography-mass spectrometry')\n",
-        "(12, 'liquid chromatography-mass spectrometry')\n",
-        "(12, 'colony health')\n",
-        "(12, 'gas chromatographic')\n",
-        "(12, 'colony collapse disorder')\n",
-        "(12, 'bumble bees')\n",
-        "(12, 'varroa jacobsoni')\n",
-        "(9, 'chemiluminescent elisa')\n",
-        "(9, 'diversionary plantings for reduction of pesticide related bee mortality')\n",
-        "(9, 'pesticides and law')\n",
-        "(9, 'plant protection products')\n",
-        "(9, 'nomia melanderi')\n",
-        "(9, 'electron-capture detection')\n",
-        "(9, 'managed pollinator cap coordinated agricultural project a national research')\n",
-        "(9, 'apis florea f')\n",
-        "(9, 'solid-phase microextraction')\n",
-        "(9, 'extension initiative')\n",
-        "(9, 'crop pollination')\n",
-        "(9, 'non-apis bees')\n",
-        "(9, 'honey bees ( apis mellifera')\n",
-        "(9, 'liquid chromatography-tandem mass spectrometry')\n",
-        "(9, 'bee pollen')\n",
-        "(9, 'foraging behavior')\n",
-        "(9, 'biological control')\n",
-        "(9, 'nosema ceranae')\n",
-        "(9, 'organophosphorus pesticides')\n",
-        "(9, 'field conditions')\n",
-        "(9, 'honey bee apis mellifera l')\n",
-        "(9, 'laboratory tests')\n",
-        "(9, 'beauveria bassiana')\n",
-        "(9, 'comparative toxicity')\n",
-        "(9, 'high levels')\n",
-        "(9, 'pesticide exposure')\n",
-        "(9, 'fluvalinate residues')\n",
-        "(9, 'insecticide residues')\n",
-        "(9, 'osmia lignaria')\n",
-        "(9, 'bombus impatiens')\n",
-        "(9, 'honey bee health')\n",
-        "(9, 'agricultural landscape')\n",
-        "(9, 'dispersive liquid-liquid microextraction')\n",
-        "(9, 'matrix solid-phase dispersion')\n"
-       ]
-      }
-     ],
-     "prompt_number": 28
-    },
-    {
-     "cell_type": "heading",
-     "level": 2,
-     "metadata": {},
-     "source": [
-      "Cooccurrences par ann\u00e9e"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "cursor.execute(\"\"\"\n",
-      "    SELECT\n",
-      "        COUNT(*) AS occurrences,\n",
-      "        n.metadata->'publication_year' AS year,\n",
-      "        ngX.terms\n",
-      "    FROM\n",
-      "        node_node AS n\n",
-      "    INNER JOIN\n",
-      "        node_node_ngram AS nngX ON nngX.node_id = n.id\n",
-      "    INNER JOIN\n",
-      "        node_ngram AS ngX ON ngX.id = nngX.ngram_id\n",
-      "    WHERE\n",
-      "        n.parent_id = %s\n",
-      "    GROUP BY\n",
-      "        terms,\n",
-      "        year\n",
-      "    ORDER BY\n",
-      "        occurrences DESC\n",
-      "    LIMIT\n",
-      "        20\n",
-      "\"\"\", [corpus.id])\n",
-      "\n",
-      "while True:\n",
-      "    row = cursor.fetchone()\n",
-      "    if row is None:\n",
-      "        break\n",
-      "    print(row)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "(24, '2014', 'patients')\n",
-        "(22, '2005', 'patients')\n",
-        "(18, '2005', 'study')\n",
-        "(15, '2014', 'voice')\n",
-        "(14, '2002', 'disease')\n",
-        "(14, '2013', 'patients')\n",
-        "(14, '2006', 'study')\n",
-        "(13, '2014', 'treatment')\n",
-        "(12, '2011', 'patients')\n",
-        "(12, '2004', 'voice')\n",
-        "(12, '2012', 'patients')\n",
-        "(12, '2003', 'patients')\n",
-        "(12, '2005', 'voice')\n",
-        "(11, '2002', 'patients')\n",
-        "(11, '2014', 'study')\n",
-        "(10, '2007', 'patients')\n",
-        "(10, '2006', 'patients')\n",
-        "(10, '2004', 'study')\n",
-        "(10, '2001', 'patients')\n",
-        "(10, '2014', 'phase')\n"
-       ]
-      }
-     ],
-     "prompt_number": 105
-    },
-    {
-     "cell_type": "heading",
-     "level": 2,
-     "metadata": {},
-     "source": [
-      "Cr\u00e9ation d'une liste de synonymes"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "ngramsCache = NgramsCache(Language.objects.get(iso2='fr'))"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 17
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "synonymePairs = [\n",
-      "    ['danger', 'risques'],\n",
-      "    ['risque', 'risques'],\n",
-      "    ['R\u00e9gent', 'R\u00e9gent TS']\n",
-      "]"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 18
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "try:\n",
-      "    typeSynonyme = NodeType.objects.get(name='Synonyme')\n",
-      "except:\n",
-      "    typeSynonyme = NodeType(name='Synonyme')\n",
-      "    typeSynonyme.save()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 19
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "listSynonymes = Node(name='Syno abeilles', type=typeSynonyme, user=me)\n",
-      "listSynonymes.save()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 22
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "for synonymePair in synonymePairs:\n",
-      "    NodeNgramNgram(\n",
-      "        ngramx = ngramsCache[synonymePair[0]],\n",
-      "        ngramy = ngramsCache[synonymePair[1]],\n",
-      "        node = listSynonymes,\n",
-      "        score = 1.\n",
-      "    ).save()"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 23
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "listSynonymes.id"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "metadata": {},
-       "output_type": "pyout",
-       "prompt_number": 24,
-       "text": [
-        "61297"
-       ]
-      }
-     ],
-     "prompt_number": 24
-    },
-    {
-     "cell_type": "heading",
-     "level": 2,
-     "metadata": {},
-     "source": [
-      "Occurrences avec synonymes"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "'''cursor.execute(\"\"\"\n",
-      "    SELECT\n",
-      "        COUNT(*) AS occurrences,\n",
-      "        ngx.terms\n",
-      "    FROM\n",
-      "        node_node AS n\n",
-      "    INNER JOIN\n",
-      "        node_node_ngram AS nngX ON nngX.node_id = n.id\n",
-      "    INNER JOIN\n",
-      "        node_ngram AS ngX ON ngX.id = nngX.ngram_id\n",
-      "    WHERE\n",
-      "        n.parent_id = %s\n",
-      "    GROUP BY\n",
-      "        ngX.terms\n",
-      "    ORDER BY\n",
-      "        occurrences DESC\n",
-      "    LIMIT\n",
-      "        20\n",
-      "\"\"\")'''\n",
-      "cursor.execute(\"\"\"\n",
-      "    SELECT\n",
-      "        n.id\n",
-      "    FROM\n",
-      "        node_node AS n\n",
-      "    INNER JOIN\n",
-      "        node_node_ngram AS nngx ON nngx.node_id = n.id\n",
-      "    INNER JOIN\n",
-      "        node_nodengramngram AS nngng ON nngng.ngramx_id = nngx.ngram_id\n",
-      "    INNER JOIN\n",
-      "        node_node_ngram AS nngy ON nngy.id = nngng.ngramy_id\n",
-      "    WHERE\n",
-      "        n.parent_id = %s\n",
-      "\"\"\", [corpus.id])\n",
-      "#\u00a0\"\"\" % [listSynonymes.id])\n",
-      "\n",
-      "while True:\n",
-      "    row = cursor.fetchone()\n",
-      "    if row is None:\n",
-      "        break\n",
-      "    print(row)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 26
-    },
-    {
-     "cell_type": "heading",
-     "level": 2,
-     "metadata": {},
-     "source": [
-      "Cooccurrences"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "cursor.execute(\"\"\"\n",
-      "    SELECT\n",
-      "        COUNT(*) AS cooccurrences,\n",
-      "        ngX.terms,\n",
-      "        ngY.terms\n",
-      "    FROM\n",
-      "        node_node AS n\n",
-      "        \n",
-      "    INNER JOIN\n",
-      "        node_node_ngram AS nngX ON nngX.node_id = n.id\n",
-      "    INNER JOIN\n",
-      "        node_ngram AS ngX ON ngX.id = nngX.ngram_id\n",
-      "        \n",
-      "    INNER JOIN\n",
-      "        node_node_ngram AS nngY ON nngY.node_id = n.id\n",
-      "    INNER JOIN\n",
-      "        node_ngram AS ngY ON ngY.id = nngY.ngram_id\n",
-      "        \n",
-      "    WHERE\n",
-      "        n.parent_id = %s\n",
-      "    AND\n",
-      "        nngX.ngram_id in (select id from node_node_ngram WHERE node_id = 61298 )\n",
-      "    AND\n",
-      "        nngY.ngram_id in (select id from node_node_ngram WHERE node_id = 61298 )\n",
-      "    AND\n",
-      "        nngX.ngram_id <> nngY.ngram_id\n",
-      "        \n",
-      "    GROUP BY\n",
-      "        ngX.id,\n",
-      "        ngX.terms,\n",
-      "        ngY.id,\n",
-      "        ngY.terms\n",
-      "    ORDER BY\n",
-      "        cooccurrences DESC\n",
-      "    LIMIT\n",
-      "        200\n",
-      "\"\"\", [corpus.id])\n",
-      "\n",
-      "while True:\n",
-      "    row = cursor.fetchone()\n",
-      "    if row is None:\n",
-      "        break\n",
-      "    print(row)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 15
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "cursor.execute(\"\"\"\n",
-      "    SELECT\n",
-      "        COUNT(*) AS cooccurrences,\n",
-      "        ngX.terms,\n",
-      "        ngY.terms\n",
-      "    FROM\n",
-      "        node_node AS n\n",
-      "        \n",
-      "    INNER JOIN\n",
-      "        node_node_ngram AS nngX ON nngX.node_id = n.id\n",
-      "    INNER JOIN\n",
-      "        node_ngram AS ngX ON ngX.id = nngX.ngram_id\n",
-      "        \n",
-      "    INNER JOIN\n",
-      "        node_node_ngram AS nngY ON nngY.node_id = n.id\n",
-      "    INNER JOIN\n",
-      "        node_ngram AS ngY ON ngY.id = nngY.ngram_id\n",
-      "\n",
-      "    WHERE\n",
-      "        n.parent_id = %s\n",
-      "    AND\n",
-      "        nngX.ngram_id <> nngY.ngram_id\n",
-      "        \n",
-      "    GROUP BY\n",
-      "        ngX.id,\n",
-      "        ngX.terms,\n",
-      "        ngY.id,\n",
-      "        ngY.terms\n",
-      "    ORDER BY\n",
-      "        cooccurrences DESC\n",
-      "    LIMIT\n",
-      "        20\n",
-      "\"\"\", [corpus.id])\n",
-      "\n",
-      "while True:\n",
-      "    row = cursor.fetchone()\n",
-      "    if row is None:\n",
-      "        break\n",
-      "    print(row)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "output_type": "stream",
-       "stream": "stderr",
-       "text": [
-        "ERROR: An unexpected error occurred while tokenizing input\n",
-        "The following traceback may be corrupted or invalid\n",
-        "The error message is: ('EOF in multi-line string', (1, 0))\n",
-        "\n"
-       ]
-      },
-      {
-       "ename": "OperationalError",
-       "evalue": "arr\u00eat des connexions suite \u00e0 la demande de l'administrateur\nSSL connection has been closed unexpectedly\n",
-       "output_type": "pyerr",
-       "traceback": [
-        "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mOperationalError\u001b[0m                          Traceback (most recent call last)",
-        "\u001b[1;32m<ipython-input-11-752593da5735>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     31\u001b[0m     \u001b[0mLIMIT\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     32\u001b[0m         \u001b[1;36m20\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 33\u001b[1;33m \"\"\", [corpus.id])\n\u001b[0m\u001b[0;32m     34\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     35\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-        "\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m     67\u001b[0m         \u001b[0mstart\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     68\u001b[0m         \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 69\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mCursorDebugWrapper\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     70\u001b[0m         \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     71\u001b[0m             \u001b[0mstop\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-        "\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m     51\u001b[0m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     52\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 53\u001b[1;33m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     54\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     55\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mexecutemany\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparam_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-        "\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/utils.py\u001b[0m in \u001b[0;36m__exit__\u001b[1;34m(self, exc_type, exc_value, traceback)\u001b[0m\n\u001b[0;32m     97\u001b[0m                 \u001b[1;32mif\u001b[0m \u001b[0mdj_exc_type\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mDataError\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mIntegrityError\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     98\u001b[0m                     \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrapper\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merrors_occurred\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 99\u001b[1;33m                 \u001b[0msix\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreraise\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdj_exc_type\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdj_exc_value\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtraceback\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    100\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    101\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-        "\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/utils/six.py\u001b[0m in \u001b[0;36mreraise\u001b[1;34m(tp, value, tb)\u001b[0m\n\u001b[0;32m    547\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mreraise\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtb\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    548\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__traceback__\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mtb\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 549\u001b[1;33m             \u001b[1;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    550\u001b[0m         \u001b[1;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    551\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
-        "\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m     51\u001b[0m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     52\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 53\u001b[1;33m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     54\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     55\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mexecutemany\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparam_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-        "\u001b[1;31mOperationalError\u001b[0m: arr\u00eat des connexions suite \u00e0 la demande de l'administrateur\nSSL connection has been closed unexpectedly\n"
-       ]
-      }
-     ],
-     "prompt_number": 11
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [],
-     "language": "python",
-     "metadata": {},
-     "outputs": []
-    }
-   ],
-   "metadata": {}
-  }
- ]
-}
\ No newline at end of file
--- a/WorkFlow2.ipynb
+++ b/WorkFlow2.ipynb
--- a/analysis/functions.py
+++ b/analysis/functions.py
@@ -11,7 +11,7 @@ def create_blacklist(user, corpus):
 def create_synonymes(user, corpus):
    pass

-def create_whitelist(user, corpus):
+def create_whitelist(user, corpus, size=100):
    cursor = connection.cursor()
    
    try:
@@ -51,7 +51,7 @@ def create_whitelist(user, corpus):
        AND
            n.type_id = %d
        AND
-            ngX.n >= 1
+        ngX.n >= 2

        GROUP BY
            ngX.id
@@ -60,16 +60,16 @@ def create_whitelist(user, corpus):
        ORDER BY
            occurrences DESC
        LIMIT
-            100
+            %d
        ;
-    """  % (white_list.id, corpus.id, type_document.id)
+    """  % (white_list.id, corpus.id, type_document.id, size)

    cursor.execute(query_whitelist)

    return white_list

 #def create_cooc(user, corpus, whitelist, blacklist, synonymes):
-def create_cooc(user=None, corpus=None, whitelist=None):
+def create_cooc(user=None, corpus=None, whitelist=None, size=150):
    cursor = connection.cursor()

    try:
@@ -127,11 +127,117 @@ def create_cooc(user=None, corpus=None, whitelist=None):
    ORDER BY
        score DESC
    LIMIT
-        150
-    """ % (cooc.pk, corpus.id, whitelist.id, whitelist.id)
+        %d
+    """ % (cooc.pk, corpus.id, whitelist.id, whitelist.id, size)

    cursor.execute(query_cooc)
    return cooc

+def get_cooc(request=None, corpus_id=None, cooc_id=None, type="node_link"):
+    import pandas as pd
+    from copy import copy
+    import numpy as np
+    import networkx as nx
+    from networkx.readwrite import json_graph
+    from gargantext_web.api import JsonHttpResponse
+    
+    from analysis.louvain import best_partition
+
+    matrix = defaultdict(lambda : defaultdict(float))
+    labels = dict()
+    weight = dict()
+
+    corpus = Node.objects.get(id=corpus_id)
+    type_cooc = NodeType.objects.get(name="Cooccurrence")
+
+    if Node.objects.filter(type=type_cooc, parent=corpus).first() is None:
+        print("Coocurrences do not exist yet, create it.")
+        if type == "node_link":
+            n = 150
+        elif type == "adjacency":
+            n = 50
+
+        whitelist = create_whitelist(request.user, corpus, size=n)
+        cooccurrence_node = create_cooc(user=request.user, corpus=corpus, whitelist=whitelist, size=n)
+        print(cooccurrence_node.id, "Cooc created")
+    else:
+        cooccurrence_node = Node.objects.filter(type=type_cooc, parent=corpus).first()
+
+    for cooccurrence in NodeNgramNgram.objects.filter(node=cooccurrence_node):
+        labels[cooccurrence.ngramx.id] = cooccurrence.ngramx.terms
+        labels[cooccurrence.ngramy.id] = cooccurrence.ngramy.terms
+        
+        matrix[cooccurrence.ngramx.id][cooccurrence.ngramy.id] = cooccurrence.score
+        matrix[cooccurrence.ngramy.id][cooccurrence.ngramx.id] = cooccurrence.score
+
+        weight[cooccurrence.ngramy.terms] = weight.get(cooccurrence.ngramy.terms, 0) + cooccurrence.score
+        weight[cooccurrence.ngramx.terms] = weight.get(cooccurrence.ngramx.terms, 0) + cooccurrence.score
+
+
+    df = pd.DataFrame(matrix).T.fillna(0)
+    x = copy(df.values)
+    x = x / x.sum(axis=1)
+
+    # Removing unconnected nodes
+    threshold = min(x.max(axis=1))
+    matrix_filtered = np.where(x >= threshold, 1, 0)
+    #matrix_filtered = np.where(x > threshold, x, 0)
+    #matrix_filtered = matrix_filtered.resize((90,90))
+    G = nx.from_numpy_matrix(matrix_filtered)
+    G = nx.relabel_nodes(G, dict(enumerate([ labels[label] for label in list(df.columns)])))
+    #G = nx.relabel_nodes(G, dict(enumerate(df.columns)))
+    
+    # Removing too connected nodes (find automatic way to do it)
+#    outdeg = G.degree()
+#    to_remove = [n for n in outdeg if outdeg[n] >= 10]
+#    G.remove_nodes_from(to_remove)
+
+    partition = best_partition(G)
+    
+        
+    if type == "node_link":
+        for node in G.nodes():
+            try:
+                #node,type(labels[node])
+                G.node[node]['label']   = node
+                G.node[node]['name']    = node
+                G.node[node]['size']    = weight[node]
+                G.node[node]['group']   = partition[node]
+                #G.add_edge(node, partition[node], weight=3)
+#            G.node[node]['color'] = '19,180,300'
+            except Exception as error:
+                print(error)
+
+        data = json_graph.node_link_data(G)
+    
+    elif type == "adjacency":
+        for node in G.nodes():
+            try:
+                #node,type(labels[node])
+                #G.node[node]['label']   = node
+                G.node[node]['name']    = node
+                #G.node[node]['size']    = weight[node]
+                G.node[node]['group']   = partition[node]
+                #G.add_edge(node, partition[node], weight=3)
+#            G.node[node]['color'] = '19,180,300'
+            except Exception as error:
+                print(error)
+        data = json_graph.node_link_data(G)
+     
+
+#    data = json_graph.node_link_data(G, attrs={\
+#            'source':'source',\
+#            'target':'target',\
+#            'weight':'weight',\
+#            #'label':'label',\
+#            #'color':'color',\
+#            'id':'id',})
+    #print(data)
+    return data
+
+
+
+
+


--- a/gargantext_web/urls.py
+++ b/gargantext_web/urls.py
@@ -11,12 +11,13 @@ import gargantext_web.api
 admin.autodiscover()

 urlpatterns = patterns('',
-    # url(r'^$', 'gargantext_web.views.home', name='home'),
-    # url(r'^blog/', include('blog.urls')),
+    
+    # Admin views
    url(r'^admin/', include(admin.site.urls)),
    url(r'^login/', include(admin.site.urls)),
    url(r'^grappelli/', include('grappelli.urls')),
    
+    # User views
    url(r'^$', views.home),
    
    url(r'^projects/$', views.projects),
@@ -27,26 +28,35 @@ urlpatterns = patterns('',
    url(r'^project/(\d+)/corpus/(\d+)/$', views.corpus),
    url(r'^project/(\d+)/corpus/(\d+)/delete/$', views.delete_corpus),
    
+    # Visualizations
    url(r'^corpus/(\d+)/explorer$', views.explorer_graph),
-    url(r'^chart$', views.explorer_chart),
-    url(r'^matrix$', views.explorer_matrix),
-    
-    #url(r'^exploration$', views.exploration),
+    url(r'^corpus/(\d+)/matrix$', views.explorer_matrix),
    
+    # Getting data [which?]
    url(r'^chart/corpus/(\d+)/data.csv$', views.send_csv),
    url(r'^corpus/(\d+)/node_link.json$', views.node_link),
    url(r'^corpus/(\d+)/adjancy_matrix$', views.node_link),
+    url(r'^corpus/(\d+)/adjacency.json$', views.adjacency),
+
+
+    """RESTful API
+    These URLs allow operations on the database in a RESTful way.
+    """

-    url(r'^api$', gargantext_web.api.Root),
+    # retrieve all the metadata from a given node's children
    url(r'^api/nodes/(\d+)/children/metadata$', gargantext_web.api.NodesChildrenMetatadata.as_view()),
+    # retrieve the ngrams from a given node's children
+    url(r'^api/nodes/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),
+    # perform a query on a given node's children
    url(r'^api/nodes/(\d+)/children/queries$', gargantext_web.api.NodesChildrenQueries.as_view()),
-    
+    # get all the nodes
    url(r'^api/nodes$', gargantext_web.api.NodesController.get),
-    url(r'^api/nodes/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),
-    url(r'^api/nodes/(\d+)/data$', gargantext_web.api.CorpusController.data),

-    url(r'^graph-it$', views.graph_it),
-    url(r'^ngrams$', views.ngrams),
+    # other (DEPRECATED, TO BE REMOVED)
+    url(r'^api/nodes$', gargantext_web.api.NodesController.get),
+    url(r'^api/corpus/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),
+    url(r'^api/corpus/(\d+)/metadata$', gargantext_web.api.CorpusController.metadata),
+    url(r'^api/corpus/(\d+)/data$', gargantext_web.api.CorpusController.data),
 )



--- a/gargantext_web/views.py
+++ b/gargantext_web/views.py
@@ -26,7 +26,6 @@ from collections import defaultdict

 from parsing.FileParsers import *

-
 # SOME FUNCTIONS

 def query_to_dicts(query_string, *query_args):
@@ -243,6 +242,7 @@ def project(request, project_id):
                
                # async
                corpus.children.filter(type_id=type_document.pk).extract_ngrams(keys=['title',])
+                #corpus.children.filter(type_id=type_document.pk).extract_ngrams(keys=['title',])

            except Exception as error:
                print(error)
@@ -367,7 +367,6 @@ def corpus(request, project_id, corpus_id):
    
    return HttpResponse(html)

-
 def delete_project(request, node_id):
    Node.objects.filter(id=node_id).all().delete()
    return HttpResponseRedirect('/projects/')
@@ -390,14 +389,16 @@ def explorer_graph(request, corpus_id):
    
    return HttpResponse(html)

-def explorer_matrix(request):
+def explorer_matrix(request, corpus_id):
    t = get_template('matrix.html')
    user = request.user
    date = datetime.datetime.now()
+    corpus = Node.objects.get(id=corpus_id)

    html = t.render(Context({\
            'user': user,\
            'date': date,\
+            'corpus': corpus,\
            }))
    
    return HttpResponse(html)
@@ -469,78 +470,27 @@ def send_csv(request, corpus_id):

    return response

-def node_link(request, corpus_id):
-    '''
-    Create the HttpResponse object with the graph dataset.
-    '''
-
-    import pandas as pd
-    from copy import copy
-    import numpy as np
-    import networkx as nx
-    from networkx.readwrite import json_graph
-    from gargantext_web.api import JsonHttpResponse
-    
-    from analysis.louvain import best_partition
-    from analysis.functions import create_whitelist, create_cooc
-
-    matrix = defaultdict(lambda : defaultdict(float))
-    labels = dict()
-    corpus = Node.objects.get(id=corpus_id)
-    type_cooc = NodeType.objects.get(name="Cooccurrence")
-
-    if Node.objects.filter(type=type_cooc, parent=corpus).first() is None:
-        print("Coocurrences do not exist yet, create it.")
-        whitelist = create_whitelist(request.user, corpus)
-        cooc = create_cooc(user=request.user, corpus=corpus, whitelist=whitelist)
-        print(cooc.id, "Cooc created")
-    else:
-        cooc = Node.objects.filter(type=type_cooc, parent=corpus).first()

-    for cooccurrence in NodeNgramNgram.objects.filter(node=cooc):
-        labels[cooccurrence.ngramx.id] = cooccurrence.ngramx.terms
-        labels[cooccurrence.ngramy.id] = cooccurrence.ngramy.terms

-        matrix[cooccurrence.ngramx.id][cooccurrence.ngramy.id] = cooccurrence.score
-        matrix[cooccurrence.ngramy.id][cooccurrence.ngramx.id] = cooccurrence.score
+# To get the data
+from gargantext_web.api import JsonHttpResponse
+from analysis.functions import get_cooc

-    df = pd.DataFrame(matrix).T.fillna(0)
-    x = copy(df.values)
-    x = x / x.sum(axis=1)

-    # Removing unconnected nodes
-    threshold = min(x.max(axis=1))
-    matrix_filtered = np.where(x > threshold, 1, 0)
-    #matrix_filtered = np.where(x > threshold, x, 0)
-    
-    G = nx.from_numpy_matrix(matrix_filtered)
-    G = nx.relabel_nodes(G, dict(enumerate([ labels[x] for x in list(df.columns)])))
-    #G = nx.relabel_nodes(G, dict(enumerate(df.columns)))
-    
-    # Removing too connected nodes (find automatic way to do it)
-#    outdeg = G.degree()
-#    to_remove = [n for n in outdeg if outdeg[n] >= 10]
-#    G.remove_nodes_from(to_remove)
+def node_link(request, corpus_id):
+    '''
+    Create the HttpResponse object with the node_link dataset.
+    '''

-    partition = best_partition(G)
   
-    for node in G.nodes():
-        try:
-            #node,type(labels[node])
-            G.node[node]['label'] = node
-#            G.node[node]['color'] = '19,180,300'
-        except Exception as error:
-            print(error)
+    data = get_cooc(request=request, corpus_id=corpus_id, type="node_link")
+    return JsonHttpResponse(data)

-    data = json_graph.node_link_data(G)
-#    data = json_graph.node_link_data(G, attrs={\
-#            'source':'source',\
-#            'target':'target',\
-#            'weight':'weight',\
-#            #'label':'label',\
-#            #'color':'color',\
-#            'id':'id',})
-    #print(data)
+def adjacency(request, corpus_id):
+    '''
+    Create the HttpResponse object with the adjacency dataset.
+    '''
+    data = get_cooc(request=request, corpus_id=corpus_id, type="adjacency")
    return JsonHttpResponse(data)


@@ -555,7 +505,6 @@ def graph_it(request):
    }))    
    return HttpResponse(html)

-
 def ngrams(request):
    '''The ngrams list.'''
    t = get_template('ngrams.html')

--- a/init/README.rst
+++ b/init/README.rst
@@ -4,23 +4,30 @@ Install the requirements
 1)  Install all the Debian packages listed in dependances.deb
    (also: sudo apt-get install postgresql-contrib)

-2)  Create a virtual enironnement with pyvenv: apt-get install python-virtualenv
+2)  Create a Pythton virtual enironnement

-3)  Type: source [your virtual environment directory]/bin/activate
-
-4)  Do your work!
+    On Debian:
+    ---------
+    sudo apt-get install python3.4-venv
+    pyvenv3 /srv/gargantext_env

-5)  Type: deactivate
+    On ubuntu:
+    ---------
+    sudo apt-get install python-pip
+    sudo pip install -U pip
+    sudo pip install -U virtualenv

+    ensuite tu peux créer ton virtualenv dans le dossier de travail ou à un
+    endroit choisi :

-Configure stuff
---------------
+    sudo virtualenv -p python3 /srv/gargantext_env

-1)  ln -s [the project folder] /srv/gargantext
+3)  Type: source [your virtual environment directory]/bin/activate

-2)  ln -s [your folder for tree tagger] [the project folder]/parsing/Tagger/treetagger
+4)  sudo chown -R user:user /srv/gargantext_env
+    pip install -r /srv/gargantext/init/requirements.txt

-Warning: for ln, path has to be absolute!
+5)  Type: deactivate


 In PostreSQL
@@ -52,6 +59,36 @@ Populate the database
 python manage.py syncdb


+Last steps of configuration:
+----------------------------
+
+1) If your project is not in /srv/gargantext:
+    ln -s [the project folder] /srv/gargantext
+
+2) build gargantext_lib
+    wget http://docs.delanoe.org/gargantext_lib.tar.bz2
+    cd /srv/
+    sudo tar xvjf gargantext_lib.tar.bz2
+    sudo chown user:user /srv/gargantext_lib
+
+3) Explorer: 
+cd /srv/gargantext_lib/js
+git clone git@github.com:PkSM3/garg.git
+
+4)  Adapt all symlinks:
+ln -s [your folder for tree tagger] [the project folder]/parsing/Tagger/treetagger
+Warning: for ln, path has to be absolute!
+
+5) patch CTE
+patch /srv/gargantext_env/lib/python3.4/site-packages/cte_tree/models.py /srv/gargantext/init/cte_tree.models.diff
+
+6) init nodetypes and main variables
+/srv/gargantext/manage.py shell < /srv/gargantext/init/init.py
+
+
+Extras:
+======
+
 Start the Python Notebook server
 --------------------------------


--- a/init/dependances.deb
+++ b/init/dependances.deb
-
-
+sudo apt-get install postgresql
+sudo apt-get install postgresql-contrib
 sudo apt-get install python-virtualenv
-sudo apt-cache search libpng
 sudo apt-get install libpng12-dev
 sudo apt-get install libpng-dev
-apt-cache search freetype
-apt-cache search freetype | grep dev
+sudo apt-cache search freetype
 sudo apt-get install libfreetype6-dev
 sudo apt-cache search python-dev
 sudo apt-get install python-dev
 sudo apt-get install libpq-dev
+sudo apt-get postgresql-contrib
+sudo aptèget install libpq-dev

-
-
-postgresql-contrib
-
-libpq-dev
-# rajouter david
-#
-#
+# Pour avoir toutes les dependences de matpolotlib (c'est sale, trouver
+sudo apt-get build-dep python-matplotlib
 #Paquets Debian a installer
 # easy_install -U distribute (matplotlib)
 #lxml
-libffi-dev
-libxml2-dev
-libxslt1-dev
+sudo apt-get install libffi-dev
+sudo apt-get install libxml2-dev
+sudo apt-get install libxslt1-dev

 # ipython readline
-libncurses5-dev
-pandoc
-
+sudo apt-get install libncurses5-dev
+sudo apt-get install pandoc

 # scipy:
-gfortran
-libopenblas-dev
-liblapack-dev
+sudo apt-get install gfortran
+sudo apt-get install libopenblas-dev
+sudo apt-get install liblapack-dev


--- a/init/init.py
+++ b/init/init.py
@@ -82,6 +82,13 @@ except Exception as error:
    typeDoc     = NodeType(name='Synonyme')
    typeDoc.save()

+try:
+    typeDoc     = NodeType.objects.get(name='Cooccurrence')
+except Exception as error:
+    print(error)
+    typeDoc     = NodeType(name='Cooccurrence')
+    typeDoc.save()
+


 # In[33]:

--- a/init/requirements.txt
+++ b/init/requirements.txt
@@ -4,13 +4,16 @@ Jinja2==2.7.3
 MarkupSafe==0.23
 Pillow==2.5.3
 Pygments==1.6
+SQLAlchemy==0.9.8
 South==1.0
+aldjemy==0.3.51
 amqp==1.4.6
 anyjson==0.3.3
 billiard==3.3.0.18
 celery==3.1.15
 certifi==14.05.14
 cffi==0.8.6
+chardet==2.3.0
 cryptography==0.6
 decorator==3.4.0
 django-autoslug==1.7.2
@@ -27,9 +30,9 @@ graphviz==0.4
 ipython==2.2.0
 kombu==3.0.23
 lxml==3.3.6
-matplotlib==1.4.0
+#matplotlib==1.4.0
 networkx==1.9
-nltk==3.0a4
+#nltk==3.0a4
 nose==1.3.4
 numpy==1.8.2
 pandas==0.14.1

--- a/node/models.py
+++ b/node/models.py
@@ -200,6 +200,7 @@ class Node(CTENode):
        self.node_resource.update(parsed=True)

    
+    @current_app.task(filter=task_method)
    def extract_ngrams(self, keys, ngramsextractorscache=None, ngramscaches=None):
        # if there is no cache...
        if ngramsextractorscache is None:

--- a/parsing/FileParsers/EuropressFileParser.py
+++ b/parsing/FileParsers/EuropressFileParser.py
-
-
 import re
 import locale
 from lxml import etree
 from datetime import datetime, date

-from parsing.FileParsers.FileParser import FileParser
-from parsing.NgramsExtractors import *
+from .FileParser import FileParser
+from ..NgramsExtractors import *




--- a/parsing/FileParsers/FileParser.py
+++ b/parsing/FileParsers/FileParser.py
@@ -3,7 +3,7 @@ import dateutil.parser
 import zipfile
 import chardet

-from parsing.Caches import LanguagesCache
+from ..Caches import LanguagesCache
    

 class FileParser:

--- a/parsing/FileParsers/IsiFileParser.py
+++ b/parsing/FileParsers/IsiFileParser.py
-from parsing.FileParsers.RisFileParser import RisFileParser
+from .RisFileParser import RisFileParser


 class IsiFileParser(RisFileParser):

--- a/parsing/FileParsers/PubmedFileParser.py
+++ b/parsing/FileParsers/PubmedFileParser.py
 from django.db import transaction
 from lxml import etree
-from parsing.FileParsers.FileParser import FileParser
-from parsing.NgramsExtractors import *
+from .FileParser import FileParser
+from ..NgramsExtractors import *

 class PubmedFileParser(FileParser):
    

--- a/parsing/FileParsers/RisFileParser.py
+++ b/parsing/FileParsers/RisFileParser.py
 from django.db import transaction
-from parsing.FileParsers.FileParser import FileParser
+from .FileParser import FileParser


 class RisFileParser(FileParser):

--- a/parsing/NgramsExtractors/EnglishNgramsExtractor.py
+++ b/parsing/NgramsExtractors/EnglishNgramsExtractor.py
-from parsing.NgramsExtractors.NgramsExtractor import NgramsExtractor
-from parsing.Taggers import NltkTagger
+from .NgramsExtractor import NgramsExtractor
+from ..Taggers import NltkTagger


 class EnglishNgramsExtractor(NgramsExtractor):

--- a/parsing/Taggers/NltkTagger.py
+++ b/parsing/Taggers/NltkTagger.py
-from parsing.Taggers.Tagger import Tagger
+from .Tagger import Tagger

 import nltk


--- a/parsing/Taggers/TreeTagger.py
+++ b/parsing/Taggers/TreeTagger.py
-from parsing.Taggers.Tagger import Tagger
+from .Tagger import Tagger

 import subprocess
 import threading

--- a/parsing/Taggers/test.py
+++ b/parsing/Taggers/test.py
-# from NltkTagger import NltkTagger
-# tagger = NltkTagger()
-# text0 = "Forman Brown (1901–1996) was one of the world's leaders in puppet theatre in his day, as well as an important early gay novelist. He was a member of the Yale Puppeteers and the driving force behind Turnabout Theatre. He was born in Otsego, Michigan, in 1901 and died in 1996, two days after his 95th birthday. Brown briefly taught at North Carolina State College, followed by an extensive tour of Europe."
-# text1 = "James Patrick (born c. 1940) is the pseudonym of a Scottish sociologist, which he used to publish a book A Glasgow Gang Observed. It attracted some attention in Scotland when it was published in 1973. It was based on research he had done in 1966, when he was aged 26. At that time he was working as a teacher in an Approved School, a Scottish reformatory. One gang member in the school, \"Tim Malloy\" (born 1950, also a pseudonym and a generic term for a Glasgow Catholic), agreed to infiltrate him into his gang in Maryhill in Glasgow. Patrick spent four months as a gang member, observing their behaviour."
-
-from TreeTagger import TreeTagger
-tagger = TreeTagger()
-text0 = "La saison 1921-1922 du Foot-Ball Club Juventus est la vingtième de l'histoire du club, créé vingt-cinq ans plus tôt en 1897. La société turinoise qui fête cette année son 25e anniversaire prend part à l'édition du championnat dissident d'Italie de la CCI (appelé alors la Première division), la dernière édition d'une compétition annuelle de football avant l'ère fasciste de Mussolini."
-text1 = "Le terme oblong désigne une forme qui est plus longue que large et dont les angles sont arrondis. En langage bibliographique, oblong signifie un format dont la largeur excède la hauteur. Ce qui correspond au format paysage en termes informatiques et \"à l'italienne\", pour l'imprimerie."
-text2 = "Les sanglots longs des violons de l'automne bercent mon coeur d'une langueur monotone."
-
-print()
-print(tagger.tag_text(text0))
-print()
-print(tagger.tag_text(text1))
-print()
-print(tagger.tag_text(text2))
-print()
\ No newline at end of file
--- a/static/css/d3matrix.css
+++ b/static/css/d3matrix.css
+/* Copyright 2013 Michael Bostock. All rights reserved. Do not copy. */
+
+/*@import url(http://fonts.googleapis.com/css?family=PT+Serif|PT+Serif:b|PT+Serif:i|PT+Sans|PT+Sans:b);*/
+
+html {
+  min-width: 1040px;
+}
+
+.ocks-org body {
+  background: #fcfcfa;
+  color: #333;
+  font-family: "PT Serif", serif;
+  margin: 1em auto 4em auto;
+  position: relative;
+  width: 960px;
+}
+
+.ocks-org header,
+.ocks-org footer,
+.ocks-org aside,
+.ocks-org h1,
+.ocks-org h2,
+.ocks-org h3,
+.ocks-org h4 {
+  font-family: "PT Sans", sans-serif;
+}
+
+.ocks-org h1,
+.ocks-org h2,
+.ocks-org h3,
+.ocks-org h4 {
+  color: #000;
+}
+
+.ocks-org header,
+.ocks-org footer {
+  color: #636363;
+}
+
+h1 {
+  font-size: 64px;
+  font-weight: 300;
+  letter-spacing: -2px;
+  margin: .3em 0 .1em 0;
+}
+
+h2 {
+  margin-top: 2em;
+}
+
+h1, h2 {
+  text-rendering: optimizeLegibility;
+}
+
+h2 a[name],
+h2 a[id] {
+  color: #ccc;
+  right: 100%;
+  padding: 0 .3em;
+  position: absolute;
+}
+
+header,
+footer {
+  font-size: small;
+}
+
+.ocks-org header aside,
+.ocks-org footer aside {
+  float: left;
+  margin-right: .5em;
+}
+
+.ocks-org header aside:after,
+.ocks-org footer aside:after {
+  padding-left: .5em;
+  content: "/";
+}
+
+footer {
+  margin-top: 8em;
+}
+
+h1 ~ aside {
+  font-size: small;
+  right: 0;
+  position: absolute;
+  width: 180px;
+}
+
+.attribution {
+  font-size: small;
+  margin-bottom: 2em;
+}
+
+body > p, li > p {
+  line-height: 1.5em;
+}
+
+body > p {
+  width: 720px;
+}
+
+body > blockquote {
+  width: 640px;
+}
+
+blockquote q {
+  display: block;
+  font-style: oblique;
+}
+
+ul {
+  padding: 0;
+}
+
+li {
+  width: 690px;
+  margin-left: 30px;
+}
+
+a {
+  color: steelblue;
+}
+
+a:not(:hover) {
+  text-decoration: none;
+}
+
+pre, code, textarea {
+  font-family: "Menlo", monospace;
+}
+
+code {
+  line-height: 1em;
+}
+
+textarea {
+  font-size: 100%;
+}
+
+body > pre {
+  border-left: solid 2px #ccc;
+  padding-left: 18px;
+  margin: 2em 0 2em -20px;
+}
+
+.html .value,
+.javascript .string,
+.javascript .regexp {
+  color: #756bb1;
+}
+
+.html .tag,
+.css .tag,
+.javascript .keyword {
+  color: #3182bd;
+}
+
+.comment {
+  color: #636363;
+}
+
+.html .doctype,
+.javascript .number {
+  color: #31a354;
+}
+
+.html .attribute,
+.css .attribute,
+.javascript .class,
+.javascript .special {
+  color: #e6550d;
+}
+
+svg {
+  font: 10px sans-serif;
+}
+
+.axis path, .axis line {
+  fill: none;
+  stroke: #000;
+  shape-rendering: crispEdges;
+}
+
+sup, sub {
+  line-height: 0;
+}
+
+q:before {
+  content: "“";
+}
+
+q:after {
+  content: "”";
+}
+
+blockquote q {
+  line-height: 1.5em;
+  display: inline;
+}
+
+blockquote q:before,
+blockquote q:after {
+  content: "";
+}
--- a/templates/corpus.html
+++ b/templates/corpus.html
@@ -127,8 +127,8 @@
 						<div class="jumbotron">
 						<h3><a href="/corpus/{{ corpus.id }}/explorer">Visualizations</a></h3>
 						<ol>
-								<li>Matrix</li>
-								<li>Static maps</li>
+								<li><a href="/corpus/{{ corpus.id }}/matrix">Adjacency matrix</a></li>
+								<li><a href="/corpus/{{ corpus.id }}/explorer">Static maps</a></li>
 								<li>Dynamics maps</li>
 						</ol>
 						</div>

--- a/templates/explorer.html
+++ b/templates/explorer.html
@@ -120,22 +120,19 @@
                  </ul>


-                  <!--
+                  
                  <ul id="colorGraph" class="nav navbar-nav navbar-right">
                     <li class="dropdown">
 	                 	<a href="#" class="dropdown-toggle" data-toggle="dropdown">
 	                          <img title="Set Colors" src="{% static "js/libs/img2/colors.png" %}" width="20px"><b class="caret"></b></img>
 	                 	</a>                     
                      <ul class="dropdown-menu">
-                        <li><a href="#" onclick='clustersBy("country")'>By Country</a></li>
-                        <li><a href="#" onclick='clustersBy("acronym")'>By Acronym</a></li>
+                        <li><a href="#" onclick='clustersBy("group")'>By Group</a></li>
                        <li><a href="#" onclick='clustersBy("default")'>By Default</a></li>
-                        <li class="divider"></li>
-                        <li><a href="#"> <span class="glyphicon glyphicon-repeat"></span> <strike>Properties</strike></a></li>
                      </ul>
                     </li>
                  </ul>
-                  -->
+                  <!---->



@@ -361,7 +358,7 @@
    <script type="text/javascript" src="{% static "js/libs/jquery/jquery.easytabs.min.js" %}"></script>
    
    
-    <script src="{% static "js/libs/bootstrap/js/bootstrap.min.js" %}"></script>
+    <!--<script src="{% static "js/libs/bootstrap/js/bootstrap.min.js" %}"></script>-->
    <script src="{% static "js/libs/bootstrap/js/bootstrap-modal.js" %}" type="text/javascript"></script>
    <script src="{% static "js/libs/bootstrap/js/bootstrap-hover-dropdown.min.js" %}" type="text/javascript"></script>
    

--- a/templates/matrix.html
+++ b/templates/matrix.html
@@ -5,6 +5,22 @@
 {% load staticfiles %}
 <link rel="stylesheet" href="{% static "css/bootstrap.css" %}">
 <link rel="stylesheet" href="{% static "css/bootstrap-theme.min.css" %}">
+<link rel="stylesheet" href="{% static "css/d3matrix.css" %}">
+
+<style>
+.background {
+  fill: #eee;
+}
+
+line {
+  stroke: #fff;
+}
+
+text.active {
+  fill: red;
+}
+</style>
+
 {% endblock %}


@@ -17,7 +33,11 @@
        </div>
 </div>

-<script src="{% static "js/d3/d3.v2.min.js" %}></script>
+<div id="graphid" style="visibility: hidden;">/corpus/{{ corpus.id }}/adjacency.json</div>
+
+
+<script src="{% static "js/jquery/jquery.min.js" %}" type="text/javascript"></script>
+<script src="{% static "js/d3/d3.v2.min.js" %}"></script>

 <p>Order: <select id="order">
  <option value="name">by Name</option>
@@ -39,11 +59,12 @@ var x = d3.scale.ordinal().rangeBands([0, width]),
 var svg = d3.select("body").append("svg")
    .attr("width", width + margin.left + margin.right)
    .attr("height", height + margin.top + margin.bottom)
-    .style("margin-left", -margin.left + "px")
+    //.style("margin-left", -margin.left + "px")
  .append("g")
    .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

-		d3.json("{% static "img/miserables.json" %}, function(miserables) {
+    var filename = document.getElementById("graphid").innerHTML
+		d3.json(filename, function(miserables) {
  var matrix = [],
      nodes = miserables.nodes,
      n = nodes.length;
@@ -57,12 +78,12 @@ var svg = d3.select("body").append("svg")

  // Convert links to matrix; count character occurrences.
  miserables.links.forEach(function(link) {
-    matrix[link.source][link.target].z += link.value;
-    matrix[link.target][link.source].z += link.value;
-    matrix[link.source][link.source].z += link.value;
-    matrix[link.target][link.target].z += link.value;
-    nodes[link.source].count += link.value;
-    nodes[link.target].count += link.value;
+    matrix[link.source][link.target].z += link.weight;
+    matrix[link.target][link.source].z += link.weight;
+    matrix[link.source][link.source].z += link.weight;
+    matrix[link.target][link.target].z += link.weight;
+    nodes[link.source].count += link.weight;
+    nodes[link.target].count += link.weight;
  });

  // Precompute the orders.

--- a/templates/project.html
+++ b/templates/project.html
@@ -146,7 +146,7 @@
            data: [
            {% if donut %}
            {% for part in donut %}
-						{label: '{{ part.source }}, {{part.count}} docs', value: {{ part.part }} },
+						{label: '{{ part.source }}', value: {{ part.part }} },
            {% endfor %}
            {% endif %}