{
 "metadata": {
  "name": "",
  "signature": "sha256:65f487ee62067486e4f832ed088fe02ede3daa27052c9dcaf58b3edffa169245"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from node.models import Node, NodeType,\\\n",
      "                        Project, Corpus, Document,\\\n",
      "                        Ngram, Node_Ngram,\\\n",
      "                        User, Language, ResourceType\n",
      "            \n",
      "from parsing.Caches import NgramsCache\n",
      "            \n",
      "from django.db import connection\n",
      "cursor = connection.cursor()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "me = User.objects.get(username='alexandre')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import networkx as nx\n",
      "from networkx.readwrite import json_graph"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import csv"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 3
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "help(csv.writer)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Help on built-in function writer in module _csv:\n",
        "\n",
        "writer(...)\n",
        "    csv_writer = csv.writer(fileobj [, dialect='excel']\n",
        "                                [optional keyword args])\n",
        "        for row in sequence:\n",
        "            csv_writer.writerow(row)\n",
        "    \n",
        "        [or]\n",
        "    \n",
        "        csv_writer = csv.writer(fileobj [, dialect='excel']\n",
        "                                [optional keyword args])\n",
        "        csv_writer.writerows(rows)\n",
        "    \n",
        "    The \"fileobj\" argument can be any object that supports the file API.\n",
        "\n"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "type(x)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 52,
       "text": [
        "_io.TextIOWrapper"
       ]
      }
     ],
     "prompt_number": 52
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "file = open('/tmp/test.graph', 'w')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "-c:1: ResourceWarning: unclosed file <_io.TextIOWrapper name='/tmp/test.graph' mode='w' encoding='UTF-8'>\n",
        "\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "WARNING:py.warnings:-c:1: ResourceWarning: unclosed file <_io.TextIOWrapper name='/tmp/test.graph' mode='w' encoding='UTF-8'>\n",
        "\n"
       ]
      }
     ],
     "prompt_number": 42
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "file.write('ici il fait beau')\n",
      "file.close()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 46
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "G = nx.complete_graph(30)\n",
      "f = open(\"graph.json\", \"w\")\n",
      "f.write(json_graph.node_link_graph(G))\n",
      "f.close()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "-c:2: ResourceWarning: unclosed file <_io.TextIOWrapper name='graph.json' mode='w' encoding='UTF-8'>\n",
        "\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "WARNING:py.warnings:-c:2: ResourceWarning: unclosed file <_io.TextIOWrapper name='graph.json' mode='w' encoding='UTF-8'>\n",
        "\n"
       ]
      },
      {
       "ename": "AttributeError",
       "evalue": "'Graph' object has no attribute 'get'",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
        "\u001b[1;32m<ipython-input-17-7d4aa550fd32>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[0mG\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcomplete_graph\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m30\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"graph.json\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"w\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mjson_graph\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnode_link_graph\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mG\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      4\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
        "\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/networkx/readwrite/json_graph/node_link.py\u001b[0m in \u001b[0;36mnode_link_graph\u001b[1;34m(data, directed, multigraph, attrs)\u001b[0m\n\u001b[0;32m    134\u001b[0m     \u001b[0mnode_link_data\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0madjacency_data\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtree_data\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    135\u001b[0m     \"\"\"\n\u001b[1;32m--> 136\u001b[1;33m     \u001b[0mmultigraph\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'multigraph'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmultigraph\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    137\u001b[0m     \u001b[0mdirected\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'directed'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdirected\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    138\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mmultigraph\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
        "\u001b[1;31mAttributeError\u001b[0m: 'Graph' object has no attribute 'get'"
       ]
      }
     ],
     "prompt_number": 17
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "typeCorpus  = NodeType.objects.get(name='Corpus')\n",
      "for corpus in Node.objects.filter(type=typeCorpus):\n",
      "    print(\"#%d - %s\" % (corpus.id, corpus))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "#7 - Presse corpus\n"
       ]
      }
     ],
     "prompt_number": 33
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "try:\n",
      "    me = User.objects.get(username='alexandre')\n",
      "except:\n",
      "    me = User(username='alexandre')\n",
      "    me.save()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 34
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#\u00a0corpus = Node.objects.filter(type=typeCorpus).first()\n",
      "corpus = Node.objects.get(id=13064)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 3
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Occurences"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cursor.execute(\"\"\"\n",
      "    SELECT\n",
      "        metadata -> 'publication_year' as year,\n",
      "        metadata -> 'publication_month' as month, \n",
      "        metadata -> 'publication_day' as day,\n",
      "        COUNT(*)\n",
      "    FROM\n",
      "        node_node AS n\n",
      "    WHERE\n",
      "        n.parent_id = %s\n",
      "    GROUP BY\n",
      "        day, month, year\n",
      "    ORDER BY\n",
      "        year, month, day ASC\n",
      "    LIMIT\n",
      "        20\n",
      "\"\"\", [corpus.id])\n",
      "\n",
      "\n",
      "while True:\n",
      "    row = cursor.fetchone()\n",
      "    if row is None:\n",
      "        break\n",
      "    print('\\'' + row[0] + '/' + row[1] + '/' + row[2] + '\\'' \n",
      "          + ',' + '\\'' + str(row[3]) + '\\'' )"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "'1954/11/18','2'\n",
        "'1958/11/18','1'\n",
        "'1959/11/18','1'\n",
        "'1968/11/18','1'\n",
        "'1969/11/18','2'\n",
        "'1971/11/18','4'\n",
        "'1972/11/18','1'\n",
        "'1974/11/18','2'\n",
        "'1975/11/18','3'\n",
        "'1976/11/18','1'\n",
        "'1977/11/18','6'\n",
        "'1978/11/18','11'\n",
        "'1979/11/18','9'\n",
        "'1980/11/18','6'\n",
        "'1981/11/18','4'\n",
        "'1982/11/18','7'\n",
        "'1983/11/18','14'\n",
        "'1984/11/18','17'\n",
        "'1985/11/18','18'\n",
        "'1986/02/21','1'\n"
       ]
      }
     ],
     "prompt_number": 6
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cursor.execute(\"\"\"\n",
      "    SELECT\n",
      "        COUNT(*) AS occurrences,\n",
      "        ngX.terms\n",
      "    FROM\n",
      "        node_node AS n\n",
      "    INNER JOIN\n",
      "        node_node_ngram AS nngX ON nngX.node_id = n.id\n",
      "    INNER JOIN\n",
      "        node_ngram AS ngX ON ngX.id = nngX.ngram_id\n",
      "    WHERE\n",
      "        n.parent_id = %s\n",
      "    AND\n",
      "        ngX.n >= 2\n",
      "    GROUP BY\n",
      "        ngX.terms\n",
      "    Having\n",
      "        COUNT(*) > 7\n",
      "    ORDER BY\n",
      "        occurrences DESC\n",
      "    LIMIT\n",
      "        100\n",
      "    \n",
      "\"\"\", [corpus.id])\n",
      "\n",
      "while True:\n",
      "    row = cursor.fetchone()\n",
      "    if row is None:\n",
      "        break\n",
      "    print(row)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "(138, 'honey bees')\n",
        "(132, 'apis mellifera')\n",
        "(69, 'honey bee')\n",
        "(66, 'apis mellifera l')\n",
        "(45, 'pesticide residues')\n",
        "(39, 'gas chromatography')\n",
        "(36, 'varroa destructor')\n",
        "(36, 'honey bee colonies')\n",
        "(30, 'sublethal effects')\n",
        "(27, 'apidae )')\n",
        "(21, 'neonicotinoid insecticides')\n",
        "(21, 'honey bee ( hymenoptera')\n",
        "(18, 'bee products')\n",
        "(18, 'megachile rotundata')\n",
        "(18, 'solid-phase extraction')\n",
        "(18, 'simultaneous determination')\n",
        "(18, 'mass spectrometric')\n",
        "(15, 'case study')\n",
        "(15, 'honey samples')\n",
        "(15, 'liquid chromatography')\n",
        "(15, 'high performance liquid chromatography')\n",
        "(15, 'varroa mites')\n",
        "(12, 'organochlorine pesticides')\n",
        "(12, 'gas chromatography-mass spectrometry')\n",
        "(12, 'liquid chromatography-mass spectrometry')\n",
        "(12, 'colony health')\n",
        "(12, 'gas chromatographic')\n",
        "(12, 'colony collapse disorder')\n",
        "(12, 'bumble bees')\n",
        "(12, 'varroa jacobsoni')\n",
        "(9, 'chemiluminescent elisa')\n",
        "(9, 'diversionary plantings for reduction of pesticide related bee mortality')\n",
        "(9, 'pesticides and law')\n",
        "(9, 'plant protection products')\n",
        "(9, 'nomia melanderi')\n",
        "(9, 'electron-capture detection')\n",
        "(9, 'managed pollinator cap coordinated agricultural project a national research')\n",
        "(9, 'apis florea f')\n",
        "(9, 'solid-phase microextraction')\n",
        "(9, 'extension initiative')\n",
        "(9, 'crop pollination')\n",
        "(9, 'non-apis bees')\n",
        "(9, 'honey bees ( apis mellifera')\n",
        "(9, 'liquid chromatography-tandem mass spectrometry')\n",
        "(9, 'bee pollen')\n",
        "(9, 'foraging behavior')\n",
        "(9, 'biological control')\n",
        "(9, 'nosema ceranae')\n",
        "(9, 'organophosphorus pesticides')\n",
        "(9, 'field conditions')\n",
        "(9, 'honey bee apis mellifera l')\n",
        "(9, 'laboratory tests')\n",
        "(9, 'beauveria bassiana')\n",
        "(9, 'comparative toxicity')\n",
        "(9, 'high levels')\n",
        "(9, 'pesticide exposure')\n",
        "(9, 'fluvalinate residues')\n",
        "(9, 'insecticide residues')\n",
        "(9, 'osmia lignaria')\n",
        "(9, 'bombus impatiens')\n",
        "(9, 'honey bee health')\n",
        "(9, 'agricultural landscape')\n",
        "(9, 'dispersive liquid-liquid microextraction')\n",
        "(9, 'matrix solid-phase dispersion')\n"
       ]
      }
     ],
     "prompt_number": 28
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Cooccurrences par ann\u00e9e"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cursor.execute(\"\"\"\n",
      "    SELECT\n",
      "        COUNT(*) AS occurrences,\n",
      "        n.metadata->'publication_year' AS year,\n",
      "        ngX.terms\n",
      "    FROM\n",
      "        node_node AS n\n",
      "    INNER JOIN\n",
      "        node_node_ngram AS nngX ON nngX.node_id = n.id\n",
      "    INNER JOIN\n",
      "        node_ngram AS ngX ON ngX.id = nngX.ngram_id\n",
      "    WHERE\n",
      "        n.parent_id = %s\n",
      "    GROUP BY\n",
      "        terms,\n",
      "        year\n",
      "    ORDER BY\n",
      "        occurrences DESC\n",
      "    LIMIT\n",
      "        20\n",
      "\"\"\", [corpus.id])\n",
      "\n",
      "while True:\n",
      "    row = cursor.fetchone()\n",
      "    if row is None:\n",
      "        break\n",
      "    print(row)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "(24, '2014', 'patients')\n",
        "(22, '2005', 'patients')\n",
        "(18, '2005', 'study')\n",
        "(15, '2014', 'voice')\n",
        "(14, '2002', 'disease')\n",
        "(14, '2013', 'patients')\n",
        "(14, '2006', 'study')\n",
        "(13, '2014', 'treatment')\n",
        "(12, '2011', 'patients')\n",
        "(12, '2004', 'voice')\n",
        "(12, '2012', 'patients')\n",
        "(12, '2003', 'patients')\n",
        "(12, '2005', 'voice')\n",
        "(11, '2002', 'patients')\n",
        "(11, '2014', 'study')\n",
        "(10, '2007', 'patients')\n",
        "(10, '2006', 'patients')\n",
        "(10, '2004', 'study')\n",
        "(10, '2001', 'patients')\n",
        "(10, '2014', 'phase')\n"
       ]
      }
     ],
     "prompt_number": 105
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Cr\u00e9ation d'une liste de synonymes"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ngramsCache = NgramsCache(Language.objects.get(iso2='fr'))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 17
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "synonymePairs = [\n",
      "    ['danger', 'risques'],\n",
      "    ['risque', 'risques'],\n",
      "    ['R\u00e9gent', 'R\u00e9gent TS']\n",
      "]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 18
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "try:\n",
      "    typeSynonyme = NodeType.objects.get(name='Synonyme')\n",
      "except:\n",
      "    typeSynonyme = NodeType(name='Synonyme')\n",
      "    typeSynonyme.save()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 19
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "listSynonymes = Node(name='Syno abeilles', type=typeSynonyme, user=me)\n",
      "listSynonymes.save()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 22
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "for synonymePair in synonymePairs:\n",
      "    NodeNgramNgram(\n",
      "        ngramx = ngramsCache[synonymePair[0]],\n",
      "        ngramy = ngramsCache[synonymePair[1]],\n",
      "        node = listSynonymes,\n",
      "        score = 1.\n",
      "    ).save()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 23
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "listSynonymes.id"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 24,
       "text": [
        "61297"
       ]
      }
     ],
     "prompt_number": 24
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Occurrences avec synonymes"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "'''cursor.execute(\"\"\"\n",
      "    SELECT\n",
      "        COUNT(*) AS occurrences,\n",
      "        ngx.terms\n",
      "    FROM\n",
      "        node_node AS n\n",
      "    INNER JOIN\n",
      "        node_node_ngram AS nngX ON nngX.node_id = n.id\n",
      "    INNER JOIN\n",
      "        node_ngram AS ngX ON ngX.id = nngX.ngram_id\n",
      "    WHERE\n",
      "        n.parent_id = %s\n",
      "    GROUP BY\n",
      "        ngX.terms\n",
      "    ORDER BY\n",
      "        occurrences DESC\n",
      "    LIMIT\n",
      "        20\n",
      "\"\"\")'''\n",
      "cursor.execute(\"\"\"\n",
      "    SELECT\n",
      "        n.id\n",
      "    FROM\n",
      "        node_node AS n\n",
      "    INNER JOIN\n",
      "        node_node_ngram AS nngx ON nngx.node_id = n.id\n",
      "    INNER JOIN\n",
      "        node_nodengramngram AS nngng ON nngng.ngramx_id = nngx.ngram_id\n",
      "    INNER JOIN\n",
      "        node_node_ngram AS nngy ON nngy.id = nngng.ngramy_id\n",
      "    WHERE\n",
      "        n.parent_id = %s\n",
      "\"\"\", [corpus.id])\n",
      "#\u00a0\"\"\" % [listSynonymes.id])\n",
      "\n",
      "while True:\n",
      "    row = cursor.fetchone()\n",
      "    if row is None:\n",
      "        break\n",
      "    print(row)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 26
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Cooccurrences"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cursor.execute(\"\"\"\n",
      "    SELECT\n",
      "        COUNT(*) AS cooccurrences,\n",
      "        ngX.terms,\n",
      "        ngY.terms\n",
      "    FROM\n",
      "        node_node AS n\n",
      "        \n",
      "    INNER JOIN\n",
      "        node_node_ngram AS nngX ON nngX.node_id = n.id\n",
      "    INNER JOIN\n",
      "        node_ngram AS ngX ON ngX.id = nngX.ngram_id\n",
      "        \n",
      "    INNER JOIN\n",
      "        node_node_ngram AS nngY ON nngY.node_id = n.id\n",
      "    INNER JOIN\n",
      "        node_ngram AS ngY ON ngY.id = nngY.ngram_id\n",
      "        \n",
      "    WHERE\n",
      "        n.parent_id = %s\n",
      "    AND\n",
      "        nngX.ngram_id in (select id from node_node_ngram WHERE node_id = 61298 )\n",
      "    AND\n",
      "        nngY.ngram_id in (select id from node_node_ngram WHERE node_id = 61298 )\n",
      "    AND\n",
      "        nngX.ngram_id <> nngY.ngram_id\n",
      "        \n",
      "    GROUP BY\n",
      "        ngX.id,\n",
      "        ngX.terms,\n",
      "        ngY.id,\n",
      "        ngY.terms\n",
      "    ORDER BY\n",
      "        cooccurrences DESC\n",
      "    LIMIT\n",
      "        200\n",
      "\"\"\", [corpus.id])\n",
      "\n",
      "while True:\n",
      "    row = cursor.fetchone()\n",
      "    if row is None:\n",
      "        break\n",
      "    print(row)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 15
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cursor.execute(\"\"\"\n",
      "    SELECT\n",
      "        COUNT(*) AS cooccurrences,\n",
      "        ngX.terms,\n",
      "        ngY.terms\n",
      "    FROM\n",
      "        node_node AS n\n",
      "        \n",
      "    INNER JOIN\n",
      "        node_node_ngram AS nngX ON nngX.node_id = n.id\n",
      "    INNER JOIN\n",
      "        node_ngram AS ngX ON ngX.id = nngX.ngram_id\n",
      "        \n",
      "    INNER JOIN\n",
      "        node_node_ngram AS nngY ON nngY.node_id = n.id\n",
      "    INNER JOIN\n",
      "        node_ngram AS ngY ON ngY.id = nngY.ngram_id\n",
      "\n",
      "    WHERE\n",
      "        n.parent_id = %s\n",
      "    AND\n",
      "        nngX.ngram_id <> nngY.ngram_id\n",
      "        \n",
      "    GROUP BY\n",
      "        ngX.id,\n",
      "        ngX.terms,\n",
      "        ngY.id,\n",
      "        ngY.terms\n",
      "    ORDER BY\n",
      "        cooccurrences DESC\n",
      "    LIMIT\n",
      "        20\n",
      "\"\"\", [corpus.id])\n",
      "\n",
      "while True:\n",
      "    row = cursor.fetchone()\n",
      "    if row is None:\n",
      "        break\n",
      "    print(row)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "ERROR: An unexpected error occurred while tokenizing input\n",
        "The following traceback may be corrupted or invalid\n",
        "The error message is: ('EOF in multi-line string', (1, 0))\n",
        "\n"
       ]
      },
      {
       "ename": "OperationalError",
       "evalue": "arr\u00eat des connexions suite \u00e0 la demande de l'administrateur\nSSL connection has been closed unexpectedly\n",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mOperationalError\u001b[0m                          Traceback (most recent call last)",
        "\u001b[1;32m<ipython-input-11-752593da5735>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     31\u001b[0m     \u001b[0mLIMIT\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     32\u001b[0m         \u001b[1;36m20\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 33\u001b[1;33m \"\"\", [corpus.id])\n\u001b[0m\u001b[0;32m     34\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     35\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
        "\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m     67\u001b[0m         \u001b[0mstart\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     68\u001b[0m         \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 69\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mCursorDebugWrapper\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     70\u001b[0m         \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     71\u001b[0m             \u001b[0mstop\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
        "\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m     51\u001b[0m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     52\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 53\u001b[1;33m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     54\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     55\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mexecutemany\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparam_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
        "\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/utils.py\u001b[0m in \u001b[0;36m__exit__\u001b[1;34m(self, exc_type, exc_value, traceback)\u001b[0m\n\u001b[0;32m     97\u001b[0m                 \u001b[1;32mif\u001b[0m \u001b[0mdj_exc_type\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mDataError\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mIntegrityError\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     98\u001b[0m                     \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrapper\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merrors_occurred\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 99\u001b[1;33m                 \u001b[0msix\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreraise\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdj_exc_type\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdj_exc_value\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtraceback\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    100\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    101\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
        "\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/utils/six.py\u001b[0m in \u001b[0;36mreraise\u001b[1;34m(tp, value, tb)\u001b[0m\n\u001b[0;32m    547\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mreraise\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtb\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    548\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__traceback__\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mtb\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 549\u001b[1;33m             \u001b[1;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    550\u001b[0m         \u001b[1;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    551\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
        "\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m     51\u001b[0m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     52\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 53\u001b[1;33m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     54\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     55\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mexecutemany\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparam_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
        "\u001b[1;31mOperationalError\u001b[0m: arr\u00eat des connexions suite \u00e0 la demande de l'administrateur\nSSL connection has been closed unexpectedly\n"
       ]
      }
     ],
     "prompt_number": 11
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}