{
 "metadata": {
  "name": "",
  "signature": "sha256:7d01da7300982ebf3acd799b54b93beda7dec63ba8f164356465e08a34dc3311"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from node.models import Node"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 11
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "root1 = Node.add_root(name=\"Documents of user1\", user_id=1, type_id=2)\n",
      "root2 = Node.add_root(name=\"Documents of user2\", user_id=2, type_id=2)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 12
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "root1.add_child(name=\"Project for Alexandre\", user_id=1, type_id=1)\n",
      "root1.add_child(name=\"Other project for Alexandre\", user_id=1, type_id=1)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 13,
       "text": [
        "<Node: Other project for Alexandre>"
       ]
      }
     ],
     "prompt_number": 13
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "root2.add_child(name=\"Project for Mat\", user_id=2, type_id=1)\n",
      "root2.add_child(name=\"Other project for Mat\", user_id=2, type_id=1)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 14,
       "text": [
        "<Node: Other project for Mat>"
       ]
      }
     ],
     "prompt_number": 14
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "for node in Node.objects.filter(user_id__gt=0):\n",
      "    print(\"[%3d] \" % node.id + node.depth*\"    \" + node.name)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "[  1]     Documents of user1\n",
        "[  3]         Project for Alexandre\n",
        "[  4]         Other project for Alexandre\n",
        "[  2]     Documents of user2\n",
        "[  5]         Project for Mat\n",
        "[  6]         Other project for Mat\n"
       ]
      }
     ],
     "prompt_number": 15
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 15
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "root1.get_children().first().add_child(name=\"Un doc au pif\", user_id=1, type_id=1)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 16,
       "text": [
        "<Node: Un doc au pif>"
       ]
      }
     ],
     "prompt_number": 16
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "for node in root1.get_descendants():\n",
      "    print(node.depth*\"    \" + node.name)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "        Project for Alexandre\n",
        "            Un doc au pif\n",
        "        Other project for Alexandre\n"
       ]
      }
     ],
     "prompt_number": 17
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "Node.dump_bulk()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 18,
       "text": [
        "[{'data': {'file': '',\n",
        "   'type': 2,\n",
        "   'metadata': {},\n",
        "   'date': datetime.date(2014, 10, 8),\n",
        "   'name': 'Documents of user1',\n",
        "   'user': 1},\n",
        "  'children': [{'data': {'file': '',\n",
        "     'type': 1,\n",
        "     'metadata': {},\n",
        "     'date': datetime.date(2014, 10, 8),\n",
        "     'name': 'Project for Alexandre',\n",
        "     'user': 1},\n",
        "    'children': [{'data': {'file': '',\n",
        "       'type': 1,\n",
        "       'metadata': {},\n",
        "       'date': datetime.date(2014, 10, 8),\n",
        "       'name': 'Un doc au pif',\n",
        "       'user': 1},\n",
        "      'id': 7}],\n",
        "    'id': 3},\n",
        "   {'data': {'file': '',\n",
        "     'type': 1,\n",
        "     'metadata': {},\n",
        "     'date': datetime.date(2014, 10, 8),\n",
        "     'name': 'Other project for Alexandre',\n",
        "     'user': 1},\n",
        "    'id': 4}],\n",
        "  'id': 1},\n",
        " {'data': {'file': '',\n",
        "   'type': 2,\n",
        "   'metadata': {},\n",
        "   'date': datetime.date(2014, 10, 8),\n",
        "   'name': 'Documents of user2',\n",
        "   'user': 2},\n",
        "  'children': [{'data': {'file': '',\n",
        "     'type': 1,\n",
        "     'metadata': {},\n",
        "     'date': datetime.date(2014, 10, 8),\n",
        "     'name': 'Project for Mat',\n",
        "     'user': 2},\n",
        "    'id': 5},\n",
        "   {'data': {'file': '',\n",
        "     'type': 1,\n",
        "     'metadata': {},\n",
        "     'date': datetime.date(2014, 10, 8),\n",
        "     'name': 'Other project for Mat',\n",
        "     'user': 2},\n",
        "    'id': 6}],\n",
        "  'id': 2}]"
       ]
      }
     ],
     "prompt_number": 18
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "Node.load_bulk([{\"data\":{\"name\":\"test\",\"user_id\":1,\"type_id\":1}}])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 19,
       "text": [
        "[8]"
       ]
      }
     ],
     "prompt_number": 19
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 19
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "n = Node.objects.all()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "len(n)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 2,
       "text": [
        "0"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "NodeType.objects.all()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 32,
       "text": [
        "[<NodeType: Root>, <NodeType: Project>, <NodeType: Document>]"
       ]
      }
     ],
     "prompt_number": 32
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "r = NodeType.objects.get(name='Root')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 34
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "Node.objects.filter(type=r)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "SELECT \"node_node\".\"id\", \"node_node\".\"path\", \"node_node\".\"depth\", \"node_node\".\"numchild\", \"node_node\".\"user_id\", \"node_node\".\"type_id\", \"node_node\".\"name\", \"node_node\".\"date\", \"node_node\".\"file\", \"node_node\".\"metadata\" FROM \"node_node\" WHERE \"node_node\".\"type_id\" = 26  ORDER BY \"node_node\".\"path\" ASC\n"
       ]
      }
     ],
     "prompt_number": 39
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from django.contrib.auth.models import User"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 21
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "User.objects.all()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 23,
       "text": [
        "[<User: alexandre>, <User: mat>, <User: User #0>, <User: User #1>]"
       ]
      }
     ],
     "prompt_number": 23
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "User.objects.filter(id=1)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 24,
       "text": [
        "[<User: alexandre>]"
       ]
      }
     ],
     "prompt_number": 24
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from django.contrib import admin"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 40
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "help(admin.site.register)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Help on method register in module django.contrib.admin.sites:\n",
        "\n",
        "register(model_or_iterable, admin_class=None, **options) method of django.contrib.admin.sites.AdminSite instance\n",
        "    Registers the given model(s) with the given admin class.\n",
        "    \n",
        "    The model(s) should be Model classes, not instances.\n",
        "    \n",
        "    If an admin class isn't given, it will use ModelAdmin (the default\n",
        "    admin options). If keyword arguments are given -- e.g., list_display --\n",
        "    they'll be applied as options to the admin class.\n",
        "    \n",
        "    If a model is already registered, this will raise AlreadyRegistered.\n",
        "    \n",
        "    If a model is abstract, this will raise ImproperlyConfigured.\n",
        "\n"
       ]
      }
     ],
     "prompt_number": 41
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 39
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 104,
       "text": [
        "[]"
       ]
      }
     ],
     "prompt_number": 104
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "root.add_child(name=\"Corpus Press\", user_id=1, type_id=2)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 107,
       "text": [
        "<Node: Corpus Press>"
       ]
      }
     ],
     "prompt_number": 107
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "parent = Node.add_root(name=\"Test parent\", user_id=1, type_id=1)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 113
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "child1 = Node(name=\"Test child\", user_id=1, type_id=2)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 114
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "parent.add_child(instance=child1)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 116,
       "text": [
        "<Node: Test child>"
       ]
      }
     ],
     "prompt_number": 116
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "parent.get_children()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 117,
       "text": [
        "[<Node: Corpus Presse>, <Node: Test child>]"
       ]
      }
     ],
     "prompt_number": 117
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "child2= Node(name=\"Test child\", user_id=1, type_id=2)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 119
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "child1.add_child(instance=child2)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 120,
       "text": [
        "<Node: Test child>"
       ]
      }
     ],
     "prompt_number": 120
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "child1.get_children()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 121,
       "text": [
        "[<Node: Test child>]"
       ]
      }
     ],
     "prompt_number": 121
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "root.get_children()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 112,
       "text": [
        "[<Node: Corpus Twitter>, <Node: Corpus Twitter>]"
       ]
      }
     ],
     "prompt_number": 112
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "root.save()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 111
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "s = root.get_siblings()\n",
      "s"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 67,
       "text": [
        "[<Node: Projet 4>, <Node: Projet 4>, <Node: Projet 5>, <Node: Projet 1>, <Node: Projet 2>, <Node: Projet 3>]"
       ]
      }
     ],
     "prompt_number": 67
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "project = s[0]\n",
      "project.get_siblings()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 99,
       "text": [
        "[<Node: Projet 2>, <Node: Projet 3>, <Node: Projet 4>, <Node: Projet 4>, <Node: Projet 5>, <Node: Corpus Press>, <Node: Corpus Twitter>, <Node: Projet 1>]"
       ]
      }
     ],
     "prompt_number": 99
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "project.add_sibling(name=\"Corpus Twitter\", user_id=1, type_id=1)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 90,
       "text": [
        "<Node: Corpus Twitter>"
       ]
      }
     ],
     "prompt_number": 90
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "project.add_child(name=\"Corpus Twitter\", user_id=1, type_id=1)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 101,
       "text": [
        "<Node: Corpus Twitter>"
       ]
      }
     ],
     "prompt_number": 101
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "project.save()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 102
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "project.get_children()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 103,
       "text": [
        "[<Node: Corpus 1>, <Node: Corpus 3>, <Node: Corpus 2>, <Node: Corpus 2>, <Node: Corpus 2>, <Node: Corpus 3>, <Node: Corpus Presse>]"
       ]
      }
     ],
     "prompt_number": 103
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "children = r.get_children()\n",
      "children"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 74,
       "text": [
        "[<Node: Corpus Twitter>]"
       ]
      }
     ],
     "prompt_number": 74
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "child = children[1]\n",
      "child"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 63,
       "text": [
        "<Node: Corpus Science>"
       ]
      }
     ],
     "prompt_number": 63
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "child.delete()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 64
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "child.save()\n",
      "child"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 65,
       "text": [
        "<Node: Corpus Science>"
       ]
      }
     ],
     "prompt_number": 65
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "Node.dump_bulk()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "ename": "KeyError",
       "evalue": "3",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
        "\u001b[1;32m<ipython-input-66-a6f171e65441>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mNode\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdump_bulk\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
        "\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/treebeard/ns_tree.py\u001b[0m in \u001b[0;36mdump_bulk\u001b[1;34m(cls, parent, keep_ids)\u001b[0m\n\u001b[0;32m    590\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    591\u001b[0m                 \u001b[0mparentobj\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpyobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_parent\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 592\u001b[1;33m                 \u001b[0mparentser\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlnk\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mparentobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpk\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    593\u001b[0m                 \u001b[1;32mif\u001b[0m \u001b[1;34m'children'\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mparentser\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    594\u001b[0m                     \u001b[0mparentser\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'children'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
        "\u001b[1;31mKeyError\u001b[0m: 3"
       ]
      }
     ],
     "prompt_number": 66
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "help(root.get_siblings)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Help on method get_siblings in module treebeard.ns_tree:\n",
        "\n",
        "get_siblings() method of node.models.Node instance\n",
        "    :returns: A queryset of all the node's siblings, including the node\n",
        "        itself.\n",
        "\n"
       ]
      }
     ],
     "prompt_number": 32
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "n = s[0]\n",
      "n.get_parent()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 22
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "Node.get_root_nodes()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 28,
       "text": [
        "[<Node: Projet 3>, <Node: Projet 2>, <Node: Projet 1>]"
       ]
      }
     ],
     "prompt_number": 28
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "help(Node.get_root_nodes())"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Help on HStoreQuerySet in module django_hstore.query object:\n",
        "\n",
        "class HStoreQuerySet(django.db.models.query.QuerySet)\n",
        " |  Method resolution order:\n",
        " |      HStoreQuerySet\n",
        " |      django.db.models.query.QuerySet\n",
        " |      builtins.object\n",
        " |  \n",
        " |  Methods defined here:\n",
        " |  \n",
        " |  __init__(self, model=None, query=None, using=None, *args, **kwargs)\n",
        " |  \n",
        " |  hkeys = selector(self, *args, **params)\n",
        " |  \n",
        " |  hpeek = selector(self, *args, **params)\n",
        " |  \n",
        " |  hremove = updater(self, *args, **params)\n",
        " |  \n",
        " |  hslice = selector(self, *args, **params)\n",
        " |  \n",
        " |  hupdate = updater(self, *args, **params)\n",
        " |  \n",
        " |  ----------------------------------------------------------------------\n",
        " |  Methods inherited from django.db.models.query.QuerySet:\n",
        " |  \n",
        " |  __and__(self, other)\n",
        " |  \n",
        " |  __deepcopy__(self, memo)\n",
        " |      Deep copy of a QuerySet doesn't populate the cache\n",
        " |  \n",
        " |  __getitem__(self, k)\n",
        " |      Retrieves an item or slice from the set of results.\n",
        " |  \n",
        " |  __getstate__(self)\n",
        " |      Allows the QuerySet to be pickled.\n",
        " |  \n",
        " |  __iter__(self)\n",
        " |      The queryset iterator protocol uses three nested iterators in the\n",
        " |      default case:\n",
        " |          1. sql.compiler:execute_sql()\n",
        " |             - Returns 100 rows at time (constants.GET_ITERATOR_CHUNK_SIZE)\n",
        " |               using cursor.fetchmany(). This part is responsible for\n",
        " |               doing some column masking, and returning the rows in chunks.\n",
        " |          2. sql/compiler.results_iter()\n",
        " |             - Returns one row at time. At this point the rows are still just\n",
        " |               tuples. In some cases the return values are converted to\n",
        " |               Python values at this location (see resolve_columns(),\n",
        " |               resolve_aggregate()).\n",
        " |          3. self.iterator()\n",
        " |             - Responsible for turning the rows into model objects.\n",
        " |  \n",
        " |  __len__(self)\n",
        " |  \n",
        " |  __nonzero__(self)\n",
        " |  \n",
        " |  __or__(self, other)\n",
        " |  \n",
        " |  __repr__(self)\n",
        " |  \n",
        " |  aggregate(self, *args, **kwargs)\n",
        " |      Returns a dictionary containing the calculations (aggregation)\n",
        " |      over the current queryset\n",
        " |      \n",
        " |      If args is present the expression is passed as a kwarg using\n",
        " |      the Aggregate object's default alias.\n",
        " |  \n",
        " |  all(self)\n",
        " |      Returns a new QuerySet that is a copy of the current one. This allows a\n",
        " |      QuerySet to proxy for a model manager in some cases.\n",
        " |  \n",
        " |  annotate(self, *args, **kwargs)\n",
        " |      Return a query set in which the returned objects have been annotated\n",
        " |      with data aggregated from related fields.\n",
        " |  \n",
        " |  bulk_create(self, objs, batch_size=None)\n",
        " |      Inserts each of the instances into the database. This does *not* call\n",
        " |      save() on each of the instances, does not send any pre/post save\n",
        " |      signals, and does not set the primary key attribute if it is an\n",
        " |      autoincrement field.\n",
        " |  \n",
        " |  complex_filter(self, filter_obj)\n",
        " |      Returns a new QuerySet instance with filter_obj added to the filters.\n",
        " |      \n",
        " |      filter_obj can be a Q object (or anything with an add_to_query()\n",
        " |      method) or a dictionary of keyword lookup arguments.\n",
        " |      \n",
        " |      This exists to support framework features such as 'limit_choices_to',\n",
        " |      and usually it will be more natural to use other methods.\n",
        " |  \n",
        " |  count(self)\n",
        " |      Performs a SELECT COUNT() and returns the number of records as an\n",
        " |      integer.\n",
        " |      \n",
        " |      If the QuerySet is already fully cached this simply returns the length\n",
        " |      of the cached results set to avoid multiple SELECT COUNT(*) calls.\n",
        " |  \n",
        " |  create(self, **kwargs)\n",
        " |      Creates a new object with the given kwargs, saving it to the database\n",
        " |      and returning the created object.\n",
        " |  \n",
        " |  dates(self, field_name, kind, order='ASC')\n",
        " |      Returns a list of date objects representing all available dates for\n",
        " |      the given field_name, scoped to 'kind'.\n",
        " |  \n",
        " |  datetimes(self, field_name, kind, order='ASC', tzinfo=None)\n",
        " |      Returns a list of datetime objects representing all available\n",
        " |      datetimes for the given field_name, scoped to 'kind'.\n",
        " |  \n",
        " |  defer(self, *fields)\n",
        " |      Defers the loading of data for certain fields until they are accessed.\n",
        " |      The set of fields to defer is added to any existing set of deferred\n",
        " |      fields. The only exception to this is if None is passed in as the only\n",
        " |      parameter, in which case all deferrals are removed (None acts as a\n",
        " |      reset option).\n",
        " |  \n",
        " |  delete(self)\n",
        " |      Deletes the records in the current QuerySet.\n",
        " |  \n",
        " |  distinct(self, *field_names)\n",
        " |      Returns a new QuerySet instance that will select only distinct results.\n",
        " |  \n",
        " |  earliest(self, field_name=None)\n",
        " |  \n",
        " |  exclude(self, *args, **kwargs)\n",
        " |      Returns a new QuerySet instance with NOT (args) ANDed to the existing\n",
        " |      set.\n",
        " |  \n",
        " |  exists(self)\n",
        " |  \n",
        " |  extra(self, select=None, where=None, params=None, tables=None, order_by=None, select_params=None)\n",
        " |      Adds extra SQL fragments to the query.\n",
        " |  \n",
        " |  filter(self, *args, **kwargs)\n",
        " |      Returns a new QuerySet instance with the args ANDed to the existing\n",
        " |      set.\n",
        " |  \n",
        " |  first(self)\n",
        " |      Returns the first object of a query, returns None if no match is found.\n",
        " |  \n",
        " |  get(self, *args, **kwargs)\n",
        " |      Performs the query and returns a single object matching the given\n",
        " |      keyword arguments.\n",
        " |  \n",
        " |  get_or_create(self, **kwargs)\n",
        " |      Looks up an object with the given kwargs, creating one if necessary.\n",
        " |      Returns a tuple of (object, created), where created is a boolean\n",
        " |      specifying whether an object was created.\n",
        " |  \n",
        " |  in_bulk(self, id_list)\n",
        " |      Returns a dictionary mapping each of the given IDs to the object with\n",
        " |      that ID.\n",
        " |  \n",
        " |  iterator(self)\n",
        " |      An iterator over the results from applying this QuerySet to the\n",
        " |      database.\n",
        " |  \n",
        " |  last(self)\n",
        " |      Returns the last object of a query, returns None if no match is found.\n",
        " |  \n",
        " |  latest(self, field_name=None)\n",
        " |  \n",
        " |  none(self)\n",
        " |      Returns an empty QuerySet.\n",
        " |  \n",
        " |  only(self, *fields)\n",
        " |      Essentially, the opposite of defer. Only the fields passed into this\n",
        " |      method and that are not already specified as deferred are loaded\n",
        " |      immediately when the queryset is evaluated.\n",
        " |  \n",
        " |  order_by(self, *field_names)\n",
        " |      Returns a new QuerySet instance with the ordering changed.\n",
        " |  \n",
        " |  prefetch_related(self, *lookups)\n",
        " |      Returns a new QuerySet instance that will prefetch the specified\n",
        " |      Many-To-One and Many-To-Many related objects when the QuerySet is\n",
        " |      evaluated.\n",
        " |      \n",
        " |      When prefetch_related() is called more than once, the list of lookups to\n",
        " |      prefetch is appended to. If prefetch_related(None) is called, the\n",
        " |      the list is cleared.\n",
        " |  \n",
        " |  reverse(self)\n",
        " |      Reverses the ordering of the QuerySet.\n",
        " |  \n",
        " |  select_for_update(self, **kwargs)\n",
        " |      Returns a new QuerySet instance that will select objects with a\n",
        " |      FOR UPDATE lock.\n",
        " |  \n",
        " |  select_related(self, *fields, **kwargs)\n",
        " |      Returns a new QuerySet instance that will select related objects.\n",
        " |      \n",
        " |      If fields are specified, they must be ForeignKey fields and only those\n",
        " |      related objects are included in the selection.\n",
        " |      \n",
        " |      If select_related(None) is called, the list is cleared.\n",
        " |  \n",
        " |  update(self, **kwargs)\n",
        " |      Updates all elements in the current QuerySet, setting all the given\n",
        " |      fields to the appropriate values.\n",
        " |  \n",
        " |  using(self, alias)\n",
        " |      Selects which database this QuerySet should excecute its query against.\n",
        " |  \n",
        " |  values(self, *fields)\n",
        " |  \n",
        " |  values_list(self, *fields, **kwargs)\n",
        " |  \n",
        " |  ----------------------------------------------------------------------\n",
        " |  Data descriptors inherited from django.db.models.query.QuerySet:\n",
        " |  \n",
        " |  __dict__\n",
        " |      dictionary for instance variables (if defined)\n",
        " |  \n",
        " |  __weakref__\n",
        " |      list of weak references to the object (if defined)\n",
        " |  \n",
        " |  db\n",
        " |      Return the database that will be used if this query is executed now\n",
        " |  \n",
        " |  ordered\n",
        " |      Returns True if the QuerySet is ordered -- i.e. has an order_by()\n",
        " |      clause or a default ordering on the model.\n",
        " |  \n",
        " |  ----------------------------------------------------------------------\n",
        " |  Data and other attributes inherited from django.db.models.query.QuerySet:\n",
        " |  \n",
        " |  value_annotation = True\n",
        "\n"
       ]
      }
     ],
     "prompt_number": 29
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import nltk\n",
      "from nltk.stem.snowball import EnglishStemmer\n",
      "stemmer = EnglishStemmer()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from collections import defaultdict"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 6
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import zipfile"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "zipfile.is_zipfile(\"/tmp/date.zip\")"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 2,
       "text": [
        "False"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "with zipfile.ZipFile(\"/tmp/date.zip\", 'r') as f:\n",
      "    for x in f.namelist():\n",
      "        print(x)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "ename": "FileNotFoundError",
       "evalue": "[Errno 2] No such file or directory: '/tmp/date.zip'",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
        "\u001b[1;32m<ipython-input-3-62bd2ffbe177>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mwith\u001b[0m \u001b[0mzipfile\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mZipFile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"/tmp/date.zip\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'r'\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m     \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnamelist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m         \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
        "\u001b[1;32m/usr/lib/python3.4/zipfile.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, file, mode, compression, allowZip64)\u001b[0m\n\u001b[0;32m    921\u001b[0m             \u001b[0mmodeDict\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;34m'r'\u001b[0m \u001b[1;33m:\u001b[0m \u001b[1;34m'rb'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'w'\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;34m'wb'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'a'\u001b[0m \u001b[1;33m:\u001b[0m \u001b[1;34m'r+b'\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    922\u001b[0m             \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 923\u001b[1;33m                 \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mio\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfile\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmodeDict\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mmode\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    924\u001b[0m             \u001b[1;32mexcept\u001b[0m \u001b[0mOSError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    925\u001b[0m                 \u001b[1;32mif\u001b[0m \u001b[0mmode\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'a'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
        "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/tmp/date.zip'"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from lxml import etree"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "help(etree.parse)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Help on built-in function parse in module lxml.etree:\n",
        "\n",
        "parse(...)\n",
        "    parse(source, parser=None, base_url=None)\n",
        "    \n",
        "    Return an ElementTree object loaded with source elements.  If no parser\n",
        "    is provided as second argument, the default parser is used.\n",
        "    \n",
        "    The ``source`` can be any of the following:\n",
        "    \n",
        "    - a file name/path\n",
        "    - a file object\n",
        "    - a file-like object\n",
        "    - a URL using the HTTP or FTP protocol\n",
        "    \n",
        "    To parse from a string, use the ``fromstring()`` function instead.\n",
        "    \n",
        "    Note that it is generally faster to parse from a file path or URL\n",
        "    than from an open file object or file-like object.  Transparent\n",
        "    decompression from gzip compressed sources is supported (unless\n",
        "    explicitly disabled in libxml2).\n",
        "    \n",
        "    The ``base_url`` keyword allows setting a URL for the document\n",
        "    when parsing from a file-like object.  This is needed when looking\n",
        "    up external entities (DTD, XInclude, ...) with relative paths.\n",
        "\n"
       ]
      }
     ],
     "prompt_number": 6
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "# Importation\n",
      "## Europresse"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "c = gargantext.bdd.Europresse()\n",
      "c.add(\"/home/alexandre/projets/abeilles/documents/Europresse/html/\")"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 7
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "for doc in c:\n",
      "    d = documents.models.Document()\n",
      "    d.project_id = \"1\"\n",
      "    d.corpus_id = \"1\"\n",
      "    d.analyst_id = \"1\"\n",
      "    try:\n",
      "        d.uniqu_id = doc[\"object_id\"]\n",
      "        d.date = doc[\"date\"]\n",
      "        d.title = doc[\"title\"]\n",
      "        d.authors = doc[\"authors\"]\n",
      "        d.text = doc[\"text\"]\n",
      "        d.source = doc[\"source\"]\n",
      "        d.save()\n",
      "    except:\n",
      "        pass"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 88
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## ISI (todo)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "projects = documents.models.Project.objects.all()\n",
      "for p in projects:\n",
      "    corpora = documents.models.Corpus.objects.filter(project_id=p.id)\n",
      "    print(p.id, p.title)\n",
      "    for c in corpora:\n",
      "        print(\"|_\", c.id,\":\", c.title)\n",
      "    print(\"\")"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "3 Hola Ebola\n",
        "\n",
        "2 Fukushima again\n",
        "|_ 7 : Test\n",
        "\n",
        "4 Thanks anthrax\n",
        "\n",
        "1 Bees swarm\n",
        "|_ 9 : Bees tweets\n",
        "|_ 4 : Health academic publications\n",
        "|_ 2 : bees and (pesticides or chemicals or neocotinoids)\n",
        "|_ 1 : Quand les abeilles meurent, les articles sont compt\u00e9s\n",
        "\n",
        "6 CIRDEM\n",
        "|_ 8 : Zotero fichier du 9 sept.\n",
        "\n"
       ]
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def extractNgrams(corpus_pk=1):\n",
      "    corpus = documents.models.Corpus.objects.get(pk=1)\n",
      "    data = gargantext.Corpus()\n",
      "    docs = data.query('''select *  from documents_document\n",
      "                        where corpus_id = %d\n",
      "                        limit 90;''' % corpus_pk)\n",
      "    words = gargantext.Ngrams()\n",
      "    words.get(docs, key='text', unique_id=\"unique_id\")\n",
      "    return(words)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 29
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data = gargantext.Corpus()\n",
      "docs = data.query('''select *  from documents_document\n",
      "                        where corpus_id = %d\n",
      "                        limit 9;''' % 1)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 6
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def ngram(terms):\n",
      "    stems = stemmer.stem(terms)\n",
      "    n = len(stems.split(\" \"))\n",
      "    ngram = documents.models.Ngram.objects.get_or_create(terms = terms,\\\n",
      "                                                 stem = stems,\\\n",
      "                                                 n= n)\n",
      "    return(ngram[0])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 7
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "doc = documents.models.Document.objects.get(pk = 9103)\n",
      "doc.title"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 8,
       "text": [
        "'France/R\u00e9gent TS: contr\u00f4le judiciaire annul\u00e9 pour BASF, confirm\u00e9 pour Bayer'"
       ]
      }
     ],
     "prompt_number": 8
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "gram = ngram(\"de\")"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 41
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def addNgram2doc(gram, doc):\n",
      "    ngramDoc = documents.models.NgramDocument.objects.get_or_create(terms=gram,\\\n",
      "                                                           document = doc,\\\n",
      "                                                           defaults={'occurrences':0})[0]\n",
      "    ngramDoc.occurrences = F('occurrences') + 1\n",
      "    ngramDoc.save()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 40
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "# Insert ngrams"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "l = set()\n",
      "d = defaultdict(lambda : defaultdict(lambda: defaultdict(lambda: defaultdict(int))))\n",
      "\n",
      "docs = documents.models.Document.objects.all().filter(project_id=1)[:1000] \n",
      "\n",
      "for doc in docs:\n",
      "    sentences = nltk.sent_tokenize(doc.text)\n",
      "    for sentence in sentences:\n",
      "        words = nltk.wordpunct_tokenize(sentence)\n",
      "        #print(len(words))\n",
      "        for word in words:\n",
      "            stems = stemmer.stem(word)\n",
      "            new = (word, stems, len(stems.split(\" \")))\n",
      "            l.add(new)\n",
      "            \n",
      "            d[word][doc.id]['count'] = d[word][doc.id].get('count', 0) + 1\n",
      "            \n",
      "\n",
      "new_grams = [documents.models.Ngram(terms=x[0], stem=x[1], n=x[2]) for x in l]\n",
      "new_gramDoc = [ documents.models.NgramDocumentTemporary(terms=k, document=pk, occurrences=d[k][pk]['count']) \\\n",
      "               for k in d.keys() \\\n",
      "               for pk in d[k].keys()\\\n",
      "               ]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 13
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "documents.models.NgramTemporary.objects.bulk_create(new_grams)\n",
      "documents.models.NgramDocumentTemporary.objects.bulk_create(new_gramDoc)\n",
      "\n",
      "from django.db import connection\n",
      "cursor = connection.cursor()\n",
      "# LOCK TABLE documents_ngramtemporary IN EXCLUSIVE MODE;\n",
      "query_string = \"\"\"\n",
      "                 INSERT INTO documents_ngram \n",
      "                 SELECT * FROM documents_ngramtemporary WHERE NOT EXISTS \n",
      "                 ( SELECT 1 FROM documents_ngram WHERE \n",
      "                 documents_ngram.terms = documents_ngramtemporary.terms);\n",
      "                 \n",
      "                 delete from documents_ngramtemporary;\n",
      "                 \n",
      "                 INSERT INTO \n",
      "                 documents_ngramdocument (terms_id, document_id, occurrences)\n",
      "                 SELECT \n",
      "                 GT.id, DT.id, NDT.occurrences \n",
      "                 FROM \n",
      "                 documents_ngramdocumenttemporary as NDT \n",
      "                 INNER JOIN documents_document AS DT ON DT.id = NDT.document \n",
      "                 INNER JOIN documents_ngram AS GT ON GT.terms = NDT.terms ;\n",
      "                 \n",
      "                 delete from documents_ngramdocumenttemporary;\n",
      "             \"\"\"\n",
      "cursor.execute(query_string)\n",
      "\n",
      "try:\n",
      "    while True:\n",
      "        row = cursor.fetchone()\n",
      "        if row is None:\n",
      "            break\n",
      "        print(row)\n",
      "except:\n",
      "    pass"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 14
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "# Insert NgramsDoc"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "\n",
      "SELECT t1.terms_id, t2.terms_id, COUNT(*) AS c, t3.project_id\n",
      "FROM documents_ngramdocument AS t1\n",
      "\n",
      "INNER JOIN documents_ngramdocument AS t2\n",
      "ON t1.document_id = t2.document_id\n",
      "\n",
      "INNER JOIN documents_corpus\n",
      "\n",
      "GROUP BY t1.terms_id, t2.terms_id;\n",
      "\n",
      "# add corpus_id in column !"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 20
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#Coocurrences"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 75
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 76
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "curs.execute(\"select * from documents_project;\")\n",
      "curs.fetchone()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 77,
       "text": [
        "(3, datetime.date(2014, 9, 8), 1, 'Hola Ebola', 'Dance with the risks', {})"
       ]
      }
     ],
     "prompt_number": 77
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# anything can be used as a file if it has .read() and .readline() methods\n",
      "import io\n",
      "data = io.StringIO()\n",
      "data.write('\\n'.join(['Test\\tretest\\t2',\n",
      "                  'Madonna\\tMado\\t45',\n",
      "                  'Federico\\tDi Gregorio\\t3']))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 81,
       "text": [
        "52"
       ]
      }
     ],
     "prompt_number": 81
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data.seek(0)\n",
      "curs.copy_from(data, 'documents_ngramtemporary', columns=('terms', 'stem', 'n'))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "ename": "InternalError",
       "evalue": "ERREUR:  la transaction est annul\u00e9e, les commandes sont ignor\u00e9es jusqu'\u00e0 la fin du bloc\nde la transaction\n",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mInternalError\u001b[0m                             Traceback (most recent call last)",
        "\u001b[1;32m<ipython-input-82-a9cf2ba7718f>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mseek\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mcurs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcopy_from\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'documents_ngramtemporary'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'terms'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'stem'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'n'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
        "\u001b[1;31mInternalError\u001b[0m: ERREUR:  la transaction est annul\u00e9e, les commandes sont ignor\u00e9es jusqu'\u00e0 la fin du bloc\nde la transaction\n"
       ]
      }
     ],
     "prompt_number": 82
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 57
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 57
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 57
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 12
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 15
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 12
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 0
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 69
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 20
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 73
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 58
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 61
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 64
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 65
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 12
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 12
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 12
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 12
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 12
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 78
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 12
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#REDIS"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "BROKER_URL = 'redis://localhost:6379/0'\n",
      "# redis://:password@hostname:port/db_number"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "BROKER_TRANSPORT_OPTIONS = {'visibility_timeout': 3600}  # 1 hour."
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "CELERY_RESULT_BACKEND = 'redis://localhost:6379/0'"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 3
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "BROKER_TRANSPORT_OPTIONS = {'fanout_prefix': True}"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 4
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "BROKER_TRANSPORT_OPTIONS = {'visibility_timeout': 43200}"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from __future__ import absolute_import\n",
      "\n",
      "from celery import Celery\n",
      "\n",
      "app = Celery('proj',\n",
      "             broker='redis://localhost:6379/0',\n",
      "             backend='redis://localhost:6379/0',\n",
      "             include=['proj.tasks'])\n",
      "\n",
      "# Optional configuration, see the application user guide.\n",
      "app.conf.update(\n",
      "    CELERY_TASK_RESULT_EXPIRES=3600,\n",
      ")\n",
      "\n",
      "if __name__ == '__main__':\n",
      "    pass#app.start()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 9
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from __future__ import absolute_import\n",
      "\n",
      "\n",
      "@app.task\n",
      "def add(x, y):\n",
      "    return x + y\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 10
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "app.send_task(add(3, 1000))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 15,
       "text": [
        "<AsyncResult: c4807752-eb28-4e0f-b8b9-fce8267bddd3>"
       ]
      }
     ],
     "prompt_number": 15
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "celery -A proj worker --loglevel=info"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "ename": "SyntaxError",
       "evalue": "invalid syntax (<ipython-input-16-5806eb0c4fe2>, line 1)",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[1;36m  File \u001b[1;32m\"<ipython-input-16-5806eb0c4fe2>\"\u001b[1;36m, line \u001b[1;32m1\u001b[0m\n\u001b[1;33m    celery -A proj worker --loglevel=info\u001b[0m\n\u001b[1;37m                 ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
       ]
      }
     ],
     "prompt_number": 16
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}