Commit 3ffa1c51 authored by Administrator's avatar Administrator

[TESTS] SQL NUMPY array from nodes.

parent 3e12e57f
{
"metadata": {
"name": "",
"signature": "sha256:61ddb09ee5403d49059e3152719d000f65e90207d8cef75dd6d0dab23af8cd8b"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from node.models import Node, Ngram, Node_Ngram, NodeType, NodeNgramNgram\n",
"from django.contrib.auth.models import User"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"user = User.objects.get(username='alexandre')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpus = Node.objects.get(name='PubMed')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cr\u00e9ation des Listes"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"whitelist_type = NodeType.objects.get(name='WhiteList')\n",
"blacklist_type = NodeType.objects.get(name='BlackList')\n",
"\n",
"white_node = Node.objects.create(name='WhiteList Pubmed', user=user, parent=corpus, type=whitelist_type)\n",
"black_node = Node.objects.create(name='BlackList Pubmed', user=user, parent=corpus, type=blacklist_type)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node_Ngram.objects.filter(node=white_node).count()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 97,
"text": [
"6111"
]
}
],
"prompt_number": 97
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cr\u00e9ation de la white list"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"with transaction.atomic():\n",
" for node_ngram_object in Node_Ngram.objects.all()[:100]:\n",
" Node_Ngram.objects.create(node=white_node, ngram=node_ngram_object.ngram, occurences=1)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 131
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node_Ngram.objects.filter(node=white_node)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 132,
"text": [
"[<Node_Ngram: WhiteList Pubmed: infectious diseases>, <Node_Ngram: WhiteList Pubmed: animal kingdoms>, <Node_Ngram: WhiteList Pubmed: plant>, <Node_Ngram: WhiteList Pubmed: tobacco ringspot>, <Node_Ngram: WhiteList Pubmed: host populations>, <Node_Ngram: WhiteList Pubmed: bee hemolymph>, <Node_Ngram: WhiteList Pubmed: virions>, <Node_Ngram: WhiteList Pubmed: infections>, <Node_Ngram: WhiteList Pubmed: transkingdom host alteration>, <Node_Ngram: WhiteList Pubmed: virus>, <Node_Ngram: WhiteList Pubmed: phylogenetic analysis>, <Node_Ngram: WhiteList Pubmed: negative impact>, <Node_Ngram: WhiteList Pubmed: varroa mites>, <Node_Ngram: WhiteList Pubmed: significant source>, <Node_Ngram: WhiteList Pubmed: winter>, <Node_Ngram: WhiteList Pubmed: gastric cecum>, <Node_Ngram: WhiteList Pubmed: intracellular life cycle>, <Node_Ngram: WhiteList Pubmed: threat>, <Node_Ngram: WhiteList Pubmed: trsv>, <Node_Ngram: WhiteList Pubmed: spread>, '...(remaining elements truncated)...']"
]
}
],
"prompt_number": 132
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cr\u00e9ation de la black list"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"with transaction.atomic():\n",
" for node_ngram_object in Node_Ngram.objects.all()[101:150]:\n",
" Node_Ngram.objects.create(node=black_node, ngram=node_ngram_object.ngram, occurences=1)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node_Ngram.objects.filter(node=black_node)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 12,
"text": [
"[<Node_Ngram: BlackList Pubmed: complete cessation>, <Node_Ngram: BlackList Pubmed: bee viruses>, <Node_Ngram: BlackList Pubmed: honey bee colonies>, <Node_Ngram: BlackList Pubmed: virus resistance>, <Node_Ngram: BlackList Pubmed: several honey bee viruses>, <Node_Ngram: BlackList Pubmed: experimental protocol>, <Node_Ngram: BlackList Pubmed: triggers>, <Node_Ngram: BlackList Pubmed: rna viruses>, <Node_Ngram: BlackList Pubmed: molecular pattern>, <Node_Ngram: BlackList Pubmed: correlates>, <Node_Ngram: BlackList Pubmed: honey bees>, <Node_Ngram: BlackList Pubmed: ccd>, <Node_Ngram: BlackList Pubmed: colonies>, <Node_Ngram: BlackList Pubmed: pathogens>, <Node_Ngram: BlackList Pubmed: viral pathogen>, <Node_Ngram: BlackList Pubmed: numerous agricultural crops>, <Node_Ngram: BlackList Pubmed: our results>, <Node_Ngram: BlackList Pubmed: infection>, <Node_Ngram: BlackList Pubmed: administration>, <Node_Ngram: BlackList Pubmed: work>, '...(remaining elements truncated)...']"
]
}
],
"prompt_number": 12
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cr\u00e9ation des synonymes"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"syno_type = NodeType.objects.get(name='Synonyme')\n",
"syno_node = Node.objects.create(name='Syno Pubmed',\n",
" user=user, \n",
" parent=corpus, \n",
" type=syno_type)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"synonyme1, synonyme2 = Node_Ngram.objects.filter(node=white_node)[3:5]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 23
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"NodeNgramNgram.objects.create(node=syno_node, ngramX=synonyme1.ngram, ngramY=synonyme2.ngram)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 24,
"text": [
"<NodeNgramNgram: Syno Pubmed: onset / process>"
]
}
],
"prompt_number": 24
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cooccurrence"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"white_node.children.count()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 87,
"text": [
"0"
]
}
],
"prompt_number": 87
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"black_node.pk"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 33,
"text": [
"174"
]
}
],
"prompt_number": 33
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cooc_type = NodeType.objects.get(name='Cooccurrence')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 26
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cooc = Node.objects.create(user=user, parent=corpus, type=cooc_type, name=\"Cooccurrences calcul Alpha\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cooc.pk"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 29,
"text": [
"177"
]
}
],
"prompt_number": 29
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"white_node.children.all().delete()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 35
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from django.db import connection\n",
"cursor = connection.cursor()\n",
"# LOCK TABLE documents_ngramtemporary IN EXCLUSIVE MODE;\n",
"query_string = \"\"\"\n",
"INSERT INTO node_nodengramngram (node_id, \"ngramX_id\", \"ngramY_id\", score)\n",
"\n",
"SELECT \n",
"%d as node_id, x.ngram_id, y.ngram_id, COUNT(*) AS score\n",
"\n",
"FROM\n",
"node_node_ngram AS x\n",
"\n",
"INNER JOIN \n",
"node_node_ngram AS y \n",
"ON x.node_id = y.node_id\n",
"\n",
"\n",
"WHERE\n",
"x.id in (select id from node_node_ngram WHERE node_id = %d )\n",
"AND\n",
"y.id in (select id from node_node_ngram WHERE node_id = %d )\n",
"AND\n",
"x.ngram_id <> y.ngram_id\n",
"\n",
"\n",
"GROUP BY\n",
"x.ngram_id, y.ngram_id\n",
"\n",
"LIMIT 100\n",
"\n",
" \"\"\" % (cooc.pk, white_node.pk, white_node.pk)\n",
"\n",
"cursor.execute(query_string)\n",
"\n",
"try:\n",
" while True:\n",
" row = cursor.fetchone()\n",
" if row is None:\n",
" break\n",
" print(row)\n",
"except:\n",
" pass"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "IntegrityError",
"evalue": "ERREUR: une instruction insert ou update sur la table \u00ab node_nodengramngram \u00bb viole la contrainte de cl\u00e9\n\u00e9trang\u00e8re \u00ab node_nodengramngram_node_id_fkey \u00bb\nDETAIL: La cl\u00e9 (node_id)=(6409333) n'est pas pr\u00e9sente dans la table \u00ab node_node \u00bb.\n",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mIntegrityError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-133-26412084c03e>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 31\u001b[0m \"\"\" % (cooc.pk, white_node.pk, white_node.pk)\n\u001b[0;32m 32\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 33\u001b[1;33m \u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mquery_string\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 34\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 35\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m 67\u001b[0m \u001b[0mstart\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 68\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 69\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mCursorDebugWrapper\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 70\u001b[0m \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 71\u001b[0m \u001b[0mstop\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 52\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 53\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 54\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 55\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mexecutemany\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparam_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/utils.py\u001b[0m in \u001b[0;36m__exit__\u001b[1;34m(self, exc_type, exc_value, traceback)\u001b[0m\n\u001b[0;32m 97\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdj_exc_type\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mDataError\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mIntegrityError\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 98\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrapper\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merrors_occurred\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 99\u001b[1;33m \u001b[0msix\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreraise\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdj_exc_type\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdj_exc_value\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtraceback\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 100\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 101\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/utils/six.py\u001b[0m in \u001b[0;36mreraise\u001b[1;34m(tp, value, tb)\u001b[0m\n\u001b[0;32m 547\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mreraise\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtb\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 548\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__traceback__\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mtb\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 549\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 550\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 551\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m 49\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdb\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrap_database_errors\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 50\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mparams\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 51\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 52\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mIntegrityError\u001b[0m: ERREUR: une instruction insert ou update sur la table \u00ab node_nodengramngram \u00bb viole la contrainte de cl\u00e9\n\u00e9trang\u00e8re \u00ab node_nodengramngram_node_id_fkey \u00bb\nDETAIL: La cl\u00e9 (node_id)=(6409333) n'est pas pr\u00e9sente dans la table \u00ab node_node \u00bb.\n"
]
}
],
"prompt_number": 133
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 47
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 52
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"import pandas as pd\n",
"from collections import defaultdict\n",
"matrix = defaultdict(lambda : defaultdict(float))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 107
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 98,
"text": [
"[<NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / animal kingdoms>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / plant>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / tobacco ringspot>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / host populations>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / bee hemolymph>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / virions>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / infections>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / transkingdom host alteration>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / virus>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / phylogenetic analysis>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / animal kingdoms>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / plant>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / tobacco ringspot>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / host populations>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / bee hemolymph>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / virions>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / infections>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / transkingdom host alteration>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / virus>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / phylogenetic analysis>, '...(remaining elements truncated)...']"
]
}
],
"prompt_number": 98
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for cooc in NodeNgramNgram.objects.filter(node=cooc):\n",
" if cooc.score > 10:\n",
" #print(x.ngramX.terms, x.ngramY.terms)\n",
" matrix[cooc.ngramX.terms][cooc.ngramY.terms] = x.score\n",
" "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 125
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix = pd.DataFrame(matrix).T.fillna(0)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 126
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>animal kingdoms</th>\n",
" <th>infectious diseases</th>\n",
" <th>plant</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>animal kingdoms</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>apis mellifera</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>bees</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>cause</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ccd</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ceranae</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>collapse</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>colonies</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>colony collapse disorder</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>colony collapse disorder ( ccd )</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deformed wing</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dwv</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>evidence</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>furthermore</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>health</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hives</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>honey bee</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>honey bee colonies</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>honey bees</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>honeybees</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>iapv</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>in</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>infection</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>infections</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>infectious diseases</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>israeli acute paralysis</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>losses</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>n</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>nosema</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>nosema ceranae</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>pathogens</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>pesticides</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>plant</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>presence</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>prevalence</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>rna viruses</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>samples</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>study</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>transmission</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>united states</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>varroa</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>virus</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>viruses</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>winter</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 127,
"text": [
" animal kingdoms infectious diseases plant\n",
"animal kingdoms 0 0 0\n",
"apis mellifera 0 0 0\n",
"bees 0 0 0\n",
"cause 0 0 0\n",
"ccd 0 0 0\n",
"ceranae 0 0 0\n",
"collapse 0 0 0\n",
"colonies 0 0 0\n",
"colony collapse disorder 0 0 0\n",
"colony collapse disorder ( ccd ) 0 0 0\n",
"deformed wing 0 0 0\n",
"dwv 0 0 0\n",
"evidence 0 0 0\n",
"furthermore 0 0 0\n",
"health 0 0 0\n",
"hives 0 0 0\n",
"honey bee 0 0 0\n",
"honey bee colonies 0 0 0\n",
"honey bees 0 0 0\n",
"honeybees 0 0 0\n",
"iapv 0 0 0\n",
"in 0 0 0\n",
"infection 0 0 0\n",
"infections 0 0 0\n",
"infectious diseases 0 0 0\n",
"israeli acute paralysis 0 0 0\n",
"losses 0 0 0\n",
"n 0 0 0\n",
"nosema 0 0 0\n",
"nosema ceranae 0 0 0\n",
"pathogens 0 0 0\n",
"pesticides 0 0 0\n",
"plant 0 0 0\n",
"presence 0 0 0\n",
"prevalence 0 0 0\n",
"rna viruses 0 0 0\n",
"samples 0 0 0\n",
"study 0 0 0\n",
"transmission 0 0 0\n",
"united states 0 0 0\n",
"varroa 0 0 0\n",
"virus 0 0 0\n",
"viruses 0 0 0\n",
"winter 0 0 0"
]
}
],
"prompt_number": 127
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix /= matrix.sum(axis=1)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 123
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>animal kingdoms</th>\n",
" <th>apis mellifera</th>\n",
" <th>bees</th>\n",
" <th>cause</th>\n",
" <th>ccd</th>\n",
" <th>ceranae</th>\n",
" <th>collapse</th>\n",
" <th>colonies</th>\n",
" <th>colony collapse disorder</th>\n",
" <th>colony collapse disorder ( ccd )</th>\n",
" <th>...</th>\n",
" <th>prevalence</th>\n",
" <th>rna viruses</th>\n",
" <th>samples</th>\n",
" <th>study</th>\n",
" <th>transmission</th>\n",
" <th>united states</th>\n",
" <th>varroa</th>\n",
" <th>virus</th>\n",
" <th>viruses</th>\n",
" <th>winter</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>animal kingdoms</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>infectious diseases</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>plant</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3 rows \u00d7 44 columns</p>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 124,
"text": [
" animal kingdoms apis mellifera bees cause ccd \\\n",
"animal kingdoms NaN NaN NaN NaN NaN \n",
"infectious diseases NaN NaN NaN NaN NaN \n",
"plant NaN NaN NaN NaN NaN \n",
"\n",
" ceranae collapse colonies colony collapse disorder \\\n",
"animal kingdoms NaN NaN NaN NaN \n",
"infectious diseases NaN NaN NaN NaN \n",
"plant NaN NaN NaN NaN \n",
"\n",
" colony collapse disorder ( ccd ) \\\n",
"animal kingdoms NaN \n",
"infectious diseases NaN \n",
"plant NaN \n",
"\n",
" ... prevalence \\\n",
"animal kingdoms ... NaN \n",
"infectious diseases ... NaN \n",
"plant ... NaN \n",
"\n",
" rna viruses samples study transmission united states \\\n",
"animal kingdoms NaN NaN NaN NaN NaN \n",
"infectious diseases NaN NaN NaN NaN NaN \n",
"plant NaN NaN NaN NaN NaN \n",
"\n",
" varroa virus viruses winter \n",
"animal kingdoms NaN NaN NaN NaN \n",
"infectious diseases NaN NaN NaN NaN \n",
"plant NaN NaN NaN NaN \n",
"\n",
"[3 rows x 44 columns]"
]
}
],
"prompt_number": 124
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
\ No newline at end of file
{
"metadata": {
"name": "",
"signature": "sha256:3f50b37062cddfbe31615a30e24c87da3b196cf7a8ee08a57c80ab210e8bd205"
"signature": "sha256:61ddb09ee5403d49059e3152719d000f65e90207d8cef75dd6d0dab23af8cd8b"
},
"nbformat": 3,
"nbformat_minor": 0,
......@@ -64,6 +64,26 @@
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node_Ngram.objects.filter(node=white_node).count()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 97,
"text": [
"6111"
]
}
],
"prompt_number": 97
},
{
"cell_type": "markdown",
"metadata": {},
......@@ -82,7 +102,7 @@
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
"prompt_number": 131
},
{
"cell_type": "code",
......@@ -96,13 +116,13 @@
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"prompt_number": 132,
"text": [
"[<Node_Ngram: WhiteList Pubmed: information>, <Node_Ngram: WhiteList Pubmed: agricultural species>, <Node_Ngram: WhiteList Pubmed: number>, <Node_Ngram: WhiteList Pubmed: onset>, <Node_Ngram: WhiteList Pubmed: process>, <Node_Ngram: WhiteList Pubmed: relationship>, <Node_Ngram: WhiteList Pubmed: health>, <Node_Ngram: WhiteList Pubmed: \" possible>, <Node_Ngram: WhiteList Pubmed: \" neonicotinoid>, <Node_Ngram: WhiteList Pubmed: criteria>, <Node_Ngram: WhiteList Pubmed: several candidate causes>, <Node_Ngram: WhiteList Pubmed: effort>, <Node_Ngram: WhiteList Pubmed: research>, <Node_Ngram: WhiteList Pubmed: full causal analysis>, <Node_Ngram: WhiteList Pubmed: formal causal analysis approach>, <Node_Ngram: WhiteList Pubmed: article>, <Node_Ngram: WhiteList Pubmed: \" probable>, <Node_Ngram: WhiteList Pubmed: contributing factor>, <Node_Ngram: WhiteList Pubmed: colony collapse disorder>, <Node_Ngram: WhiteList Pubmed: apis mellifera>, '...(remaining elements truncated)...']"
"[<Node_Ngram: WhiteList Pubmed: infectious diseases>, <Node_Ngram: WhiteList Pubmed: animal kingdoms>, <Node_Ngram: WhiteList Pubmed: plant>, <Node_Ngram: WhiteList Pubmed: tobacco ringspot>, <Node_Ngram: WhiteList Pubmed: host populations>, <Node_Ngram: WhiteList Pubmed: bee hemolymph>, <Node_Ngram: WhiteList Pubmed: virions>, <Node_Ngram: WhiteList Pubmed: infections>, <Node_Ngram: WhiteList Pubmed: transkingdom host alteration>, <Node_Ngram: WhiteList Pubmed: virus>, <Node_Ngram: WhiteList Pubmed: phylogenetic analysis>, <Node_Ngram: WhiteList Pubmed: negative impact>, <Node_Ngram: WhiteList Pubmed: varroa mites>, <Node_Ngram: WhiteList Pubmed: significant source>, <Node_Ngram: WhiteList Pubmed: winter>, <Node_Ngram: WhiteList Pubmed: gastric cecum>, <Node_Ngram: WhiteList Pubmed: intracellular life cycle>, <Node_Ngram: WhiteList Pubmed: threat>, <Node_Ngram: WhiteList Pubmed: trsv>, <Node_Ngram: WhiteList Pubmed: spread>, '...(remaining elements truncated)...']"
]
}
],
"prompt_number": 9
"prompt_number": 132
},
{
"cell_type": "markdown",
......@@ -148,7 +168,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cr\u00e9ation des syonymes"
"# Cr\u00e9ation des synonymes"
]
},
{
......@@ -208,21 +228,750 @@
"cell_type": "code",
"collapsed": false,
"input": [
"white_node.pk"
"white_node.children.count()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 87,
"text": [
"0"
]
}
],
"prompt_number": 87
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"black_node.pk"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 33,
"text": [
"174"
]
}
],
"prompt_number": 33
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cooc_type = NodeType.objects.get(name='Cooccurrence')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 26
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cooc = Node.objects.create(user=user, parent=corpus, type=cooc_type, name=\"Cooccurrences calcul Alpha\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cooc.pk"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 29,
"text": [
"177"
]
}
],
"prompt_number": 29
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"white_node.children.all().delete()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 35
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from django.db import connection\n",
"cursor = connection.cursor()\n",
"# LOCK TABLE documents_ngramtemporary IN EXCLUSIVE MODE;\n",
"query_string = \"\"\"\n",
"INSERT INTO node_nodengramngram (node_id, \"ngramX_id\", \"ngramY_id\", score)\n",
"\n",
"SELECT \n",
"%d as node_id, x.ngram_id, y.ngram_id, COUNT(*) AS score\n",
"\n",
"FROM\n",
"node_node_ngram AS x\n",
"\n",
"INNER JOIN \n",
"node_node_ngram AS y \n",
"ON x.node_id = y.node_id\n",
"\n",
"\n",
"WHERE\n",
"x.id in (select id from node_node_ngram WHERE node_id = %d )\n",
"AND\n",
"y.id in (select id from node_node_ngram WHERE node_id = %d )\n",
"AND\n",
"x.ngram_id <> y.ngram_id\n",
"\n",
"\n",
"GROUP BY\n",
"x.ngram_id, y.ngram_id\n",
"\n",
"LIMIT 100\n",
"\n",
" \"\"\" % (cooc.pk, white_node.pk, white_node.pk)\n",
"\n",
"cursor.execute(query_string)\n",
"\n",
"try:\n",
" while True:\n",
" row = cursor.fetchone()\n",
" if row is None:\n",
" break\n",
" print(row)\n",
"except:\n",
" pass"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "IntegrityError",
"evalue": "ERREUR: une instruction insert ou update sur la table \u00ab node_nodengramngram \u00bb viole la contrainte de cl\u00e9\n\u00e9trang\u00e8re \u00ab node_nodengramngram_node_id_fkey \u00bb\nDETAIL: La cl\u00e9 (node_id)=(6409333) n'est pas pr\u00e9sente dans la table \u00ab node_node \u00bb.\n",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mIntegrityError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-133-26412084c03e>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 31\u001b[0m \"\"\" % (cooc.pk, white_node.pk, white_node.pk)\n\u001b[0;32m 32\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 33\u001b[1;33m \u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mquery_string\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 34\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 35\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m 67\u001b[0m \u001b[0mstart\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 68\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 69\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mCursorDebugWrapper\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 70\u001b[0m \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 71\u001b[0m \u001b[0mstop\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 52\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 53\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 54\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 55\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mexecutemany\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparam_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/utils.py\u001b[0m in \u001b[0;36m__exit__\u001b[1;34m(self, exc_type, exc_value, traceback)\u001b[0m\n\u001b[0;32m 97\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdj_exc_type\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mDataError\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mIntegrityError\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 98\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrapper\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merrors_occurred\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 99\u001b[1;33m \u001b[0msix\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreraise\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdj_exc_type\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdj_exc_value\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtraceback\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 100\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 101\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/utils/six.py\u001b[0m in \u001b[0;36mreraise\u001b[1;34m(tp, value, tb)\u001b[0m\n\u001b[0;32m 547\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mreraise\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtb\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 548\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__traceback__\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mtb\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 549\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 550\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 551\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m 49\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdb\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrap_database_errors\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 50\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mparams\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 51\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 52\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mIntegrityError\u001b[0m: ERREUR: une instruction insert ou update sur la table \u00ab node_nodengramngram \u00bb viole la contrainte de cl\u00e9\n\u00e9trang\u00e8re \u00ab node_nodengramngram_node_id_fkey \u00bb\nDETAIL: La cl\u00e9 (node_id)=(6409333) n'est pas pr\u00e9sente dans la table \u00ab node_node \u00bb.\n"
]
}
],
"prompt_number": 133
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 47
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 52
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"import pandas as pd\n",
"from collections import defaultdict\n",
"matrix = defaultdict(lambda : defaultdict(float))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 107
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 98,
"text": [
"[<NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / animal kingdoms>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / plant>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / tobacco ringspot>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / host populations>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / bee hemolymph>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / virions>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / infections>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / transkingdom host alteration>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / virus>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / phylogenetic analysis>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / animal kingdoms>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / plant>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / tobacco ringspot>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / host populations>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / bee hemolymph>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / virions>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / infections>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / transkingdom host alteration>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / virus>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / phylogenetic analysis>, '...(remaining elements truncated)...']"
]
}
],
"prompt_number": 98
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for cooc in NodeNgramNgram.objects.filter(node=cooc):\n",
" if cooc.score > 10:\n",
" #print(x.ngramX.terms, x.ngramY.terms)\n",
" matrix[cooc.ngramX.terms][cooc.ngramY.terms] = x.score\n",
" "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 125
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix = pd.DataFrame(matrix).T.fillna(0)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 126
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>animal kingdoms</th>\n",
" <th>infectious diseases</th>\n",
" <th>plant</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>animal kingdoms</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>apis mellifera</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>bees</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>cause</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ccd</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ceranae</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>collapse</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>colonies</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>colony collapse disorder</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>colony collapse disorder ( ccd )</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deformed wing</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dwv</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>evidence</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>furthermore</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>health</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hives</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>honey bee</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>honey bee colonies</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>honey bees</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>honeybees</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>iapv</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>in</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>infection</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>infections</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>infectious diseases</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>israeli acute paralysis</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>losses</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>n</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>nosema</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>nosema ceranae</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>pathogens</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>pesticides</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>plant</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>presence</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>prevalence</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>rna viruses</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>samples</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>study</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>transmission</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>united states</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>varroa</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>virus</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>viruses</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>winter</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 127,
"text": [
" animal kingdoms infectious diseases plant\n",
"animal kingdoms 0 0 0\n",
"apis mellifera 0 0 0\n",
"bees 0 0 0\n",
"cause 0 0 0\n",
"ccd 0 0 0\n",
"ceranae 0 0 0\n",
"collapse 0 0 0\n",
"colonies 0 0 0\n",
"colony collapse disorder 0 0 0\n",
"colony collapse disorder ( ccd ) 0 0 0\n",
"deformed wing 0 0 0\n",
"dwv 0 0 0\n",
"evidence 0 0 0\n",
"furthermore 0 0 0\n",
"health 0 0 0\n",
"hives 0 0 0\n",
"honey bee 0 0 0\n",
"honey bee colonies 0 0 0\n",
"honey bees 0 0 0\n",
"honeybees 0 0 0\n",
"iapv 0 0 0\n",
"in 0 0 0\n",
"infection 0 0 0\n",
"infections 0 0 0\n",
"infectious diseases 0 0 0\n",
"israeli acute paralysis 0 0 0\n",
"losses 0 0 0\n",
"n 0 0 0\n",
"nosema 0 0 0\n",
"nosema ceranae 0 0 0\n",
"pathogens 0 0 0\n",
"pesticides 0 0 0\n",
"plant 0 0 0\n",
"presence 0 0 0\n",
"prevalence 0 0 0\n",
"rna viruses 0 0 0\n",
"samples 0 0 0\n",
"study 0 0 0\n",
"transmission 0 0 0\n",
"united states 0 0 0\n",
"varroa 0 0 0\n",
"virus 0 0 0\n",
"viruses 0 0 0\n",
"winter 0 0 0"
]
}
],
"prompt_number": 127
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix /= matrix.sum(axis=1)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 123
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>animal kingdoms</th>\n",
" <th>apis mellifera</th>\n",
" <th>bees</th>\n",
" <th>cause</th>\n",
" <th>ccd</th>\n",
" <th>ceranae</th>\n",
" <th>collapse</th>\n",
" <th>colonies</th>\n",
" <th>colony collapse disorder</th>\n",
" <th>colony collapse disorder ( ccd )</th>\n",
" <th>...</th>\n",
" <th>prevalence</th>\n",
" <th>rna viruses</th>\n",
" <th>samples</th>\n",
" <th>study</th>\n",
" <th>transmission</th>\n",
" <th>united states</th>\n",
" <th>varroa</th>\n",
" <th>virus</th>\n",
" <th>viruses</th>\n",
" <th>winter</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>animal kingdoms</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>infectious diseases</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>plant</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3 rows \u00d7 44 columns</p>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 25,
"prompt_number": 124,
"text": [
"173"
" animal kingdoms apis mellifera bees cause ccd \\\n",
"animal kingdoms NaN NaN NaN NaN NaN \n",
"infectious diseases NaN NaN NaN NaN NaN \n",
"plant NaN NaN NaN NaN NaN \n",
"\n",
" ceranae collapse colonies colony collapse disorder \\\n",
"animal kingdoms NaN NaN NaN NaN \n",
"infectious diseases NaN NaN NaN NaN \n",
"plant NaN NaN NaN NaN \n",
"\n",
" colony collapse disorder ( ccd ) \\\n",
"animal kingdoms NaN \n",
"infectious diseases NaN \n",
"plant NaN \n",
"\n",
" ... prevalence \\\n",
"animal kingdoms ... NaN \n",
"infectious diseases ... NaN \n",
"plant ... NaN \n",
"\n",
" rna viruses samples study transmission united states \\\n",
"animal kingdoms NaN NaN NaN NaN NaN \n",
"infectious diseases NaN NaN NaN NaN NaN \n",
"plant NaN NaN NaN NaN NaN \n",
"\n",
" varroa virus viruses winter \n",
"animal kingdoms NaN NaN NaN NaN \n",
"infectious diseases NaN NaN NaN NaN \n",
"plant NaN NaN NaN NaN \n",
"\n",
"[3 rows x 44 columns]"
]
}
],
"prompt_number": 25
"prompt_number": 124
},
{
"cell_type": "code",
......
{
"metadata": {
"name": "",
"signature": "sha256:92c58fbc1aad2501dd486f1bfd40ad7fbe605f697fea30c71f3c0a31a36766cf"
"signature": "sha256:d0ac96b232bdca40d2b67ddfc85c941e41c3760733e29c981ec727196317e1a1"
},
"nbformat": 3,
"nbformat_minor": 0,
......@@ -1619,6 +1619,16 @@
"outputs": [],
"prompt_number": 20
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"select t1.terms_id , \n"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
......
......@@ -2,7 +2,7 @@ from django.contrib import admin
from django.forms import ModelForm, ModelChoiceField
from nested_inlines.admin import NestedModelAdmin, NestedStackedInline, NestedTabularInline
from node.models import NodeType, Language, Node, Project, Corpus, Document, DatabaseType, Resource
from node.models import NodeType, Language, Node, Project, Corpus, Document, DatabaseType, Resource, Node_Ngram
class ResourceInLine(admin.TabularInline):
model = Resource
......@@ -137,4 +137,5 @@ admin.site.register(Project, ProjectAdmin)
admin.site.register(Corpus, CorpusAdmin)
admin.site.register(Document, DocumentAdmin)
admin.site.register(Node_Ngram)
SELECT
177 as node_id, x.ngram_id as ngramX_id, y.ngram_id as ngramY_id, COUNT(*) AS score
id, 177 as node_id, x.ngram_id as ngramX_id, y.ngram_id as ngramY_id, COUNT(*) AS score
FROM
node_node_ngram AS x
......
INSERT INTO node_nodengramngram (node_id, "ngramX_id", "ngramY_id", score)
SELECT
177 as node_id, x.ngram_id as ngramX_id, y.ngram_id as ngramY_id, COUNT(*) AS score
177 as node_id, x.ngram_id, y.ngram_id, COUNT(*) AS score
FROM
node_node_ngram AS x
......@@ -21,4 +23,4 @@ x.ngram_id <> y.ngram_id
GROUP BY
x.ngram_id, y.ngram_id
LIMIT 10
LIMIT 1000
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment