Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
6b0e0d41
Commit
6b0e0d41
authored
Nov 20, 2014
by
Administrator
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[TEST DEV] Towards the graphs.
parent
339886ab
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
1653 additions
and
80 deletions
+1653
-80
SQL_TESTS.ipynb
SQL_TESTS.ipynb
+200
-80
WorkFlow2.ipynb
WorkFlow2.ipynb
+1453
-0
No files found.
SQL_TESTS.ipynb
View file @
6b0e0d41
{
"metadata": {
"name": "",
"signature": "sha256:
33c2f41e3ea5983e768350b4012544242c5df9b394091647362f00929812a921
"
"signature": "sha256:
65f487ee62067486e4f832ed088fe02ede3daa27052c9dcaf58b3edffa169245
"
},
"nbformat": 3,
"nbformat_minor": 0,
...
...
@@ -27,6 +27,17 @@
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"me = User.objects.get(username='alexandre')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
...
...
@@ -37,7 +48,7 @@
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number":
15
"prompt_number":
2
},
{
"cell_type": "code",
...
...
@@ -48,7 +59,7 @@
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number":
1
"prompt_number":
3
},
{
"cell_type": "code",
...
...
@@ -227,12 +238,12 @@
"collapsed": false,
"input": [
"#\u00a0corpus = Node.objects.filter(type=typeCorpus).first()\n",
"corpus = Node.objects.get(id=
44338
)"
"corpus = Node.objects.get(id=
13064
)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number":
4
"prompt_number":
3
},
{
"cell_type": "heading",
...
...
@@ -279,30 +290,30 @@
"output_type": "stream",
"stream": "stdout",
"text": [
"'
2004/01/02
','2'\n",
"'
2004/01/03
','1'\n",
"'
2004/01/06','8
'\n",
"'
2004/01/07','5
'\n",
"'
2004/01/08','8
'\n",
"'
2004/01/09','1
'\n",
"'
2004/01/10','3
'\n",
"'
2004/01/12
','2'\n",
"'
2004/01/13','6
'\n",
"'
2004/01/15','2
'\n",
"'
2004/01/16','1
'\n",
"'
2004/01/17','5
'\n",
"'
2004/01/19','2
'\n",
"'
2004/01/20','2
'\n",
"'
2004/01/21','7
'\n",
"'
2004/01/23','1
'\n",
"'
2004/01/24','
4'\n",
"'
2004/01/25','4
'\n",
"'
2004/01/26','5
'\n",
"'
2004/01/27','2
'\n"
"'
1954/11/18
','2'\n",
"'
1958/11/18
','1'\n",
"'
1959/11/18','1
'\n",
"'
1968/11/18','1
'\n",
"'
1969/11/18','2
'\n",
"'
1971/11/18','4
'\n",
"'
1972/11/18','1
'\n",
"'
1974/11/18
','2'\n",
"'
1975/11/18','3
'\n",
"'
1976/11/18','1
'\n",
"'
1977/11/18','6
'\n",
"'
1978/11/18','11
'\n",
"'
1979/11/18','9
'\n",
"'
1980/11/18','6
'\n",
"'
1981/11/18','4
'\n",
"'
1982/11/18','7
'\n",
"'
1983/11/18','1
4'\n",
"'
1984/11/18','17
'\n",
"'
1985/11/18','18
'\n",
"'
1986/02/21','1
'\n"
]
}
],
"prompt_number":
37
"prompt_number":
6
},
{
"cell_type": "code",
...
...
@@ -320,12 +331,17 @@
" node_ngram AS ngX ON ngX.id = nngX.ngram_id\n",
" WHERE\n",
" n.parent_id = %s\n",
" AND\n",
" ngX.n >= 2\n",
" GROUP BY\n",
" ngX.terms\n",
" Having\n",
" COUNT(*) > 7\n",
" ORDER BY\n",
" occurrences DESC\n",
" LIMIT\n",
" 20\n",
" 100\n",
" \n",
"\"\"\", [corpus.id])\n",
"\n",
"while True:\n",
...
...
@@ -341,30 +357,74 @@
"output_type": "stream",
"stream": "stdout",
"text": [
"(196, 'patients')\n",
"(135, 'voice')\n",
"(129, 'study')\n",
"(111, 'disease')\n",
"(69, 'treatment')\n",
"(66, 'life')\n",
"(58, 'patient')\n",
"(53, 'quality')\n",
"(49, 'care')\n",
"(45, 'use')\n",
"(44, 'Patients')\n",
"(43, 'people')\n",
"(41, 'development')\n",
"(41, 'purpose')\n",
"(40, 's disease')\n",
"(39, 's')\n",
"(38, 'results')\n",
"(37, 'diagnosis')\n",
"(36, 'years')\n",
"(34, 'women')\n"
"(138, 'honey bees')\n",
"(132, 'apis mellifera')\n",
"(69, 'honey bee')\n",
"(66, 'apis mellifera l')\n",
"(45, 'pesticide residues')\n",
"(39, 'gas chromatography')\n",
"(36, 'varroa destructor')\n",
"(36, 'honey bee colonies')\n",
"(30, 'sublethal effects')\n",
"(27, 'apidae )')\n",
"(21, 'neonicotinoid insecticides')\n",
"(21, 'honey bee ( hymenoptera')\n",
"(18, 'bee products')\n",
"(18, 'megachile rotundata')\n",
"(18, 'solid-phase extraction')\n",
"(18, 'simultaneous determination')\n",
"(18, 'mass spectrometric')\n",
"(15, 'case study')\n",
"(15, 'honey samples')\n",
"(15, 'liquid chromatography')\n",
"(15, 'high performance liquid chromatography')\n",
"(15, 'varroa mites')\n",
"(12, 'organochlorine pesticides')\n",
"(12, 'gas chromatography-mass spectrometry')\n",
"(12, 'liquid chromatography-mass spectrometry')\n",
"(12, 'colony health')\n",
"(12, 'gas chromatographic')\n",
"(12, 'colony collapse disorder')\n",
"(12, 'bumble bees')\n",
"(12, 'varroa jacobsoni')\n",
"(9, 'chemiluminescent elisa')\n",
"(9, 'diversionary plantings for reduction of pesticide related bee mortality')\n",
"(9, 'pesticides and law')\n",
"(9, 'plant protection products')\n",
"(9, 'nomia melanderi')\n",
"(9, 'electron-capture detection')\n",
"(9, 'managed pollinator cap coordinated agricultural project a national research')\n",
"(9, 'apis florea f')\n",
"(9, 'solid-phase microextraction')\n",
"(9, 'extension initiative')\n",
"(9, 'crop pollination')\n",
"(9, 'non-apis bees')\n",
"(9, 'honey bees ( apis mellifera')\n",
"(9, 'liquid chromatography-tandem mass spectrometry')\n",
"(9, 'bee pollen')\n",
"(9, 'foraging behavior')\n",
"(9, 'biological control')\n",
"(9, 'nosema ceranae')\n",
"(9, 'organophosphorus pesticides')\n",
"(9, 'field conditions')\n",
"(9, 'honey bee apis mellifera l')\n",
"(9, 'laboratory tests')\n",
"(9, 'beauveria bassiana')\n",
"(9, 'comparative toxicity')\n",
"(9, 'high levels')\n",
"(9, 'pesticide exposure')\n",
"(9, 'fluvalinate residues')\n",
"(9, 'insecticide residues')\n",
"(9, 'osmia lignaria')\n",
"(9, 'bombus impatiens')\n",
"(9, 'honey bee health')\n",
"(9, 'agricultural landscape')\n",
"(9, 'dispersive liquid-liquid microextraction')\n",
"(9, 'matrix solid-phase dispersion')\n"
]
}
],
"prompt_number":
104
"prompt_number":
28
},
{
"cell_type": "heading",
...
...
@@ -455,7 +515,7 @@
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number":
21
"prompt_number":
17
},
{
"cell_type": "code",
...
...
@@ -470,7 +530,7 @@
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number":
22
"prompt_number":
18
},
{
"cell_type": "code",
...
...
@@ -485,7 +545,7 @@
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number":
23
"prompt_number":
19
},
{
"cell_type": "code",
...
...
@@ -497,7 +557,7 @@
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
4
"prompt_number": 2
2
},
{
"cell_type": "code",
...
...
@@ -514,7 +574,7 @@
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
5
"prompt_number": 2
3
},
{
"cell_type": "code",
...
...
@@ -528,13 +588,13 @@
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 2
6
,
"prompt_number": 2
4
,
"text": [
"6"
"6
1297
"
]
}
],
"prompt_number": 2
6
"prompt_number": 2
4
},
{
"cell_type": "heading",
...
...
@@ -592,7 +652,7 @@
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number":
47
"prompt_number":
26
},
{
"cell_type": "heading",
...
...
@@ -627,7 +687,60 @@
" WHERE\n",
" n.parent_id = %s\n",
" AND\n",
" nngX.ngram_id > nngY.ngram_id\n",
" nngX.ngram_id in (select id from node_node_ngram WHERE node_id = 61298 )\n",
" AND\n",
" nngY.ngram_id in (select id from node_node_ngram WHERE node_id = 61298 )\n",
" AND\n",
" nngX.ngram_id <> nngY.ngram_id\n",
" \n",
" GROUP BY\n",
" ngX.id,\n",
" ngX.terms,\n",
" ngY.id,\n",
" ngY.terms\n",
" ORDER BY\n",
" cooccurrences DESC\n",
" LIMIT\n",
" 200\n",
"\"\"\", [corpus.id])\n",
"\n",
"while True:\n",
" row = cursor.fetchone()\n",
" if row is None:\n",
" break\n",
" print(row)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cursor.execute(\"\"\"\n",
" SELECT\n",
" COUNT(*) AS cooccurrences,\n",
" ngX.terms,\n",
" ngY.terms\n",
" FROM\n",
" node_node AS n\n",
" \n",
" INNER JOIN\n",
" node_node_ngram AS nngX ON nngX.node_id = n.id\n",
" INNER JOIN\n",
" node_ngram AS ngX ON ngX.id = nngX.ngram_id\n",
" \n",
" INNER JOIN\n",
" node_node_ngram AS nngY ON nngY.node_id = n.id\n",
" INNER JOIN\n",
" node_ngram AS ngY ON ngY.id = nngY.ngram_id\n",
"\n",
" WHERE\n",
" n.parent_id = %s\n",
" AND\n",
" nngX.ngram_id <> nngY.ngram_id\n",
" \n",
" GROUP BY\n",
" ngX.id,\n",
...
...
@@ -651,32 +764,39 @@
"outputs": [
{
"output_type": "stream",
"stream": "std
out
",
"stream": "std
err
",
"text": [
"(98, 'patients', 'study')\n",
"(88, 'patients', 'disease')\n",
"(78, 'voice', 'patients')\n",
"(76, 'Parkinson', 's disease')\n",
"(64, 'life', 'patients')\n",
"(62, 'life', 'quality')\n",
"(60, 'treatment', 'patients')\n",
"(56, 'patient', 'patients')\n",
"(56, 'voice', 'study')\n",
"(54, 'Patients', 'patients')\n",
"(54, 'purpose', 'study')\n",
"(54, 'voice', 'disease')\n",
"(52, 'study', 'disease')\n",
"(48, 'voice', 'treatment')\n",
"(46, 'treatment', 'disease')\n",
"(42, 'quality', 'patients')\n",
"(42, 'life', 'study')\n",
"(40, 'care', 'patients')\n",
"(40, 'PD', 'Parkinson')\n",
"(40, 'PD', 's disease')\n"
"ERROR: An unexpected error occurred while tokenizing input\n",
"The following traceback may be corrupted or invalid\n",
"The error message is: ('EOF in multi-line string', (1, 0))\n",
"\n"
]
},
{
"ename": "OperationalError",
"evalue": "arr\u00eat des connexions suite \u00e0 la demande de l'administrateur\nSSL connection has been closed unexpectedly\n",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mOperationalError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-11-752593da5735>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 31\u001b[0m \u001b[0mLIMIT\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 32\u001b[0m \u001b[1;36m20\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 33\u001b[1;33m \"\"\", [corpus.id])\n\u001b[0m\u001b[0;32m 34\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 35\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m 67\u001b[0m \u001b[0mstart\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 68\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 69\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mCursorDebugWrapper\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 70\u001b[0m \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 71\u001b[0m \u001b[0mstop\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 52\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 53\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 54\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 55\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mexecutemany\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparam_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/utils.py\u001b[0m in \u001b[0;36m__exit__\u001b[1;34m(self, exc_type, exc_value, traceback)\u001b[0m\n\u001b[0;32m 97\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdj_exc_type\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mDataError\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mIntegrityError\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 98\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrapper\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merrors_occurred\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 99\u001b[1;33m \u001b[0msix\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreraise\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdj_exc_type\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdj_exc_value\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtraceback\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 100\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 101\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/utils/six.py\u001b[0m in \u001b[0;36mreraise\u001b[1;34m(tp, value, tb)\u001b[0m\n\u001b[0;32m 547\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mreraise\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtb\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 548\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__traceback__\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mtb\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 549\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 550\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 551\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 52\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 53\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 54\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 55\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mexecutemany\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparam_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mOperationalError\u001b[0m: arr\u00eat des connexions suite \u00e0 la demande de l'administrateur\nSSL connection has been closed unexpectedly\n"
]
}
],
"prompt_number": 108
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
...
...
WorkFlow2.ipynb
0 → 100644
View file @
6b0e0d41
{
"metadata": {
"name": "",
"signature": "sha256:225dd3e49010ec98ad91be828e48d40276edd9a4242811d192ef4667286a0611"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from node.models import Node, NodeType,\\\n",
" Project, Corpus, Document,\\\n",
" Ngram, Node_Ngram,\\\n",
" User, Language, ResourceType\n",
"from django.db import connection\n",
"cursor = connection.cursor()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"me = User.objects.get(username='alexandre')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print(me.id)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpus = Corpus(id=13064)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"document = NodeType.objects.get(name='Document')\n",
"# note cr\u00e9er un type Document_Poubelle (et une fonction vider la poubelle)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpus.children.count()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
"916"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node_Ngram.objects.all().delete()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 27
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpus.children.filter(type_id=document.pk).extract_ngrams(keys=['title',])\n",
"\n",
"# bug\n",
"#corpus.children.filter(type_id=document.pk).extract_ngrams(keys=['title','abstract'])\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"children = corpus.children.all()[:10]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 46
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for child in children:\n",
" for n in Node_Ngram.objects.filter(node=child):\n",
" print(n.ngram.terms)\n",
" print(\"=\" * 10)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"equipment\n",
"technique used in laboratory evaluation of pesticide dusts in toxicological studies with honeybees\n",
"==========\n",
"pesticide dusts to honeybees\n",
"toxicity\n",
"==========\n",
"pesticides to honey bees in laboratory and field tests in southern california\n",
"toxicity\n",
"==========\n",
"field applications of some of the newer pesticides on honey bees\n",
"effects\n",
"==========\n",
"protecting honeybees\n",
"pesticides\n",
"==========\n",
"pollen gathering of honey bees reduced by pesticide sprays\n",
"==========\n",
"pesticide toxicity and honey bees\n",
"==========\n",
"newer pesticides dont harm environment\n",
"where have all bees gone\n",
"==========\n",
"honeybees\n",
"pesticides and law\n",
"==========\n",
"honeybees\n",
"pesticides and law\n",
"==========\n"
]
}
],
"prompt_number": 48
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#liste_ordered = collections.OrderedDict(sorted(liste.items()), key=lambda t: t[1])"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#Cr\u00e9ation des listes"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import collections\n",
"liste = collections.defaultdict(int)\n",
"try:\n",
" whitelist_type = NodeType.objects.get(name='WhiteList')\n",
" blacklist_type = NodeType.objects.get(name='BlackList')\n",
"except:\n",
" whitelist_type = NodeType(name='WhiteList')\n",
" whitelist_type.save()\n",
" \n",
" blacklist_type = NodeType(name='BlackList')\n",
" blacklist_type.save()\n",
"\n",
"white_list = Node.objects.create(name='WhiteList Pubmed', user=me, parent=corpus, type=whitelist_type)\n",
"black_list = Node.objects.create(name='BlackList Pubmed', user=me, parent=corpus, type=blacklist_type)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 37
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node_Ngram.objects.filter(node=white_list).count()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 38,
"text": [
"0"
]
}
],
"prompt_number": 38
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cr\u00e9ation de la white list (simple sans syn ni black plus tard...)"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# delete avant pour \u00e9viter les doublons\n",
"Node_Ngram.objects.filter(node=white_list).all().delete()\n",
"cursor.execute(\"\"\"\n",
" SELECT\n",
" ngX.id,\n",
" ngX.terms,\n",
" COUNT(*) AS occurrences\n",
" FROM\n",
" node_node AS n\n",
" INNER JOIN\n",
" node_node_ngram AS nngX ON nngX.node_id = n.id\n",
" INNER JOIN\n",
" node_ngram AS ngX ON ngX.id = nngX.ngram_id\n",
" WHERE\n",
" n.parent_id = %s\n",
" AND\n",
" ngX.n >= 1\n",
" GROUP BY\n",
" ngX.id\n",
" Having\n",
" COUNT(*) >= 3\n",
" ORDER BY\n",
" occurrences DESC\n",
" LIMIT\n",
" 100\n",
" \n",
"\"\"\", [corpus.id])\n",
"\n",
"# \u00e0 optimiser avec un insert dans la requ\u00eate SQL\n",
"with transaction.atomic():\n",
" while True:\n",
" row = cursor.fetchone()\n",
" if row is None:\n",
" break\n",
" #print(row)\n",
" Node_Ngram.objects.create(node=white_list, ngram=Ngram.objects.get(id=row[0]), weight=row[2])"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 43
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node_Ngram.objects.filter(node=white_list).count()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 44,
"text": [
"100"
]
}
],
"prompt_number": 44
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"white_list.id"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 45,
"text": [
"61305"
]
}
],
"prompt_number": 45
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cursor.execute(\"\"\"\n",
" SELECT\n",
" COUNT(*) AS cooccurrences,\n",
" ngX.terms,\n",
" ngY.terms\n",
" FROM\n",
" node_node AS n\n",
" \n",
" INNER JOIN\n",
" node_node_ngram AS nngX ON nngX.node_id = n.id\n",
" INNER JOIN\n",
" node_ngram AS ngX ON ngX.id = nngX.ngram_id\n",
" \n",
" INNER JOIN\n",
" node_node_ngram AS nngY ON nngY.node_id = n.id\n",
" INNER JOIN\n",
" node_ngram AS ngY ON ngY.id = nngY.ngram_id\n",
" \n",
" WHERE\n",
" n.parent_id = %s\n",
" AND\n",
" nngX.ngram_id in (select id from node_node_ngram WHERE node_id = %s )\n",
" AND\n",
" nngY.ngram_id in (select id from node_node_ngram WHERE node_id = %s )\n",
" AND\n",
" nngX.ngram_id <> nngY.ngram_id\n",
" \n",
" GROUP BY\n",
" ngX.id,\n",
" ngX.terms,\n",
" ngY.id,\n",
" ngY.terms\n",
" ORDER BY\n",
" cooccurrences DESC\n",
" LIMIT\n",
" 200\n",
"\"\"\", [corpus.id, white_list.id, white_list.id])\n",
"\n",
"while True:\n",
" row = cursor.fetchone()\n",
" if row is None:\n",
" break\n",
" print(row)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 42
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 42
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 42
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cr\u00e9ation de la black list"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"with transaction.atomic():\n",
" for node_ngram_object in Node_Ngram.objects.all()[101:150]:\n",
" Node_Ngram.objects.create(node=black_node, ngram=node_ngram_object.ngram, score=1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "'score' is an invalid keyword argument for this function",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-24-b9a499432911>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mtransaction\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0matomic\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mnode_ngram_object\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mNode_Ngram\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mobjects\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mall\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m101\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;36m150\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mNode_Ngram\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mobjects\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcreate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mblack_node\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mngram\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mnode_ngram_object\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mngram\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mscore\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/models/manager.py\u001b[0m in \u001b[0;36mcreate\u001b[1;34m(self, **kwargs)\u001b[0m\n\u001b[0;32m 155\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 156\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mcreate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 157\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_queryset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcreate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 158\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 159\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mbulk_create\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/models/query.py\u001b[0m in \u001b[0;36mcreate\u001b[1;34m(self, **kwargs)\u001b[0m\n\u001b[0;32m 318\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mreturning\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mcreated\u001b[0m \u001b[0mobject\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 319\u001b[0m \"\"\"\n\u001b[1;32m--> 320\u001b[1;33m \u001b[0mobj\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 321\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_for_write\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 322\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mforce_insert\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0musing\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/models/base.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 415\u001b[0m \u001b[1;32mpass\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 416\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 417\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"'%s' is an invalid keyword argument for this function\"\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 418\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mModel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 419\u001b[0m \u001b[0msignals\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpost_init\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msender\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__class__\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minstance\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mTypeError\u001b[0m: 'score' is an invalid keyword argument for this function"
]
}
],
"prompt_number": 24
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node_Ngram.objects.filter(node=black_node)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cr\u00e9ation des synonymes"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"syno_type = NodeType.objects.get(name='Synonyme')\n",
"syno_node = Node.objects.create(name='Syno Pubmed',\n",
" user=user, \n",
" parent=corpus, \n",
" type=syno_type)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"synonyme1, synonyme2 = Node_Ngram.objects.filter(node=white_node)[3:5]"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"NodeNgramNgram.objects.create(node=syno_node, ngramX=synonyme1.ngram, ngramY=synonyme2.ngram)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cooccurrence"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 25,
"text": [
"0"
]
}
],
"prompt_number": 25
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"black_node.pk"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 18,
"text": [
"61295"
]
}
],
"prompt_number": 18
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try:\n",
" cooc_type = NodeType.objects.get(name='Cooccurrence')\n",
"except:\n",
" cooc_type = NodeType(name='Cooccurrence')\n",
" cooc_type.save()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 26
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cooc = Node.objects.create(user=me,\\\n",
" parent=corpus,\\\n",
" type=cooc_type,\\\n",
" name=\"Cooccurrences calcul Alpha\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 27
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cooc.pk"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 28,
"text": [
"61300"
]
}
],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"white_node.pk"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 42,
"text": [
"61298"
]
}
],
"prompt_number": 42
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from django.db import connection\n",
"cursor = connection.cursor()\n",
"# LOCK TABLE documents_ngramtemporary IN EXCLUSIVE MODE;\n",
"query_string = \"\"\"\n",
"\n",
"INSERT INTO node_nodengramngram (node_id, \"ngramx_id\", \"ngramy_id\", score)\n",
"\n",
"SELECT \n",
"%d as node_id, x.ngram_id, y.ngram_id, COUNT(*) AS score\n",
"\n",
"FROM\n",
"node_node_ngram AS x\n",
"\n",
"INNER JOIN \n",
"node_node_ngram AS y ON x.node_id = y.node_id\n",
"\n",
"WHERE\n",
" x.id in (select id from node_node_ngram WHERE node_id = %d )\n",
"AND\n",
" y.id in (select id from node_node_ngram WHERE node_id = %d )\n",
"AND\n",
" x.id > y.id\n",
"\n",
"\n",
"GROUP BY\n",
"x.ngram_id, y.ngram_id\n",
"\n",
"HAVING count(*) > 1\n",
"\n",
"ORDER BY score\n",
"\n",
"LIMIT 300\n",
"\n",
" \"\"\" % (cooc.pk, white_node.pk, white_node.pk)\n",
"\n",
"cursor.execute(query_string)\n",
"\n",
"try:\n",
" while True:\n",
" row = cursor.fetchone()\n",
" if row is None:\n",
" break\n",
" print(row)\n",
"except:\n",
" pass"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 76
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from copy import copy\n",
"import numpy as np\n",
"import pandas as pd\n",
"import networkx as nx\n",
"from collections import defaultdict\n",
"from analysis.louvain import *\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 44
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix = \"\""
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 57
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix = defaultdict(lambda : defaultdict(float))\n",
"for cooccurrence in NodeNgramNgram.objects.filter(node=cooc):\n",
" if cooccurrence.score >= 1 :\n",
" #print(x.ngramX.terms, x.ngramY.terms)\n",
" matrix[cooccurrence.ngramx.terms][cooccurrence.ngramx.terms] = cooccurrence.score\n",
" matrix[cooccurrence.ngramy.terms][cooccurrence.ngramx.terms] = cooccurrence.score"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 58
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df = pd.DataFrame(matrix).T.fillna(0)\n",
"x = copy(df.values)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 59
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"x = np.where((x.sum(axis=1) > x.shape[0] / 2), 0, x )\n",
"x = np.where((x.sum(axis=1) > x.shape[0] / 10), 0, x )"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "shape mismatch: objects cannot be broadcast to a single shape",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-39-28332fbcc3a3>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwhere\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msum\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwhere\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msum\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;36m10\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mValueError\u001b[0m: shape mismatch: objects cannot be broadcast to a single shape"
]
}
],
"prompt_number": 39
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"x = x / x.sum(axis=1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "operands could not be broadcast together with shapes (87,74) (87,) ",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-60-044d62562031>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m/\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msum\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mValueError\u001b[0m: operands could not be broadcast together with shapes (87,74) (87,) "
]
}
],
"prompt_number": 60
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix_filtered = np.where(x > .4, 1, 0)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 50
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix_filtered"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 51,
"text": [
"array([[1, 0, 0, ..., 0, 0, 0],\n",
" [0, 1, 0, ..., 0, 0, 0],\n",
" [0, 0, 1, ..., 0, 0, 0],\n",
" ..., \n",
" [0, 0, 0, ..., 0, 0, 0],\n",
" [0, 0, 0, ..., 0, 1, 0],\n",
" [0, 0, 0, ..., 0, 0, 1]])"
]
}
],
"prompt_number": 51
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"G = nx.from_numpy_matrix(matrix_filtered)\n",
"G = nx.relabel_nodes(G, dict(enumerate(df.columns)))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "NetworkXError",
"evalue": "('Adjacency matrix is not square.', 'nx,ny=(87, 74)')",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mNetworkXError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-52-a97ac71e981e>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mG\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfrom_numpy_matrix\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmatrix_filtered\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mG\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrelabel_nodes\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mG\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0menumerate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/networkx/convert_matrix.py\u001b[0m in \u001b[0;36mfrom_numpy_matrix\u001b[1;34m(A, create_using)\u001b[0m\n\u001b[0;32m 267\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mn\u001b[0m\u001b[1;33m!=\u001b[0m\u001b[0mm\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 268\u001b[0m raise nx.NetworkXError(\"Adjacency matrix is not square.\",\n\u001b[1;32m--> 269\u001b[1;33m \"nx,ny=%s\"%(A.shape,))\n\u001b[0m\u001b[0;32m 270\u001b[0m \u001b[0mdt\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mA\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 271\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mNetworkXError\u001b[0m: ('Adjacency matrix is not square.', 'nx,ny=(87, 74)')"
]
}
],
"prompt_number": 52
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"nx.draw(G, with_labels=True)\n",
"plt.show()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"partition = best_partition(G)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#partition"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pos = nx.spring_layout(G)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"count = 0.0\n",
"node_min = 3\n",
"for com in set(partition.values()) :\n",
" count = count + 1\n",
" list_nodes = [nodes for nodes in partition.keys() if partition[nodes] == com]\n",
" \n",
" if len(list_nodes) > node_min:\n",
" nx.draw_networkx_nodes(G, pos, list_nodes, node_size = 20, with_labels=True)#, node_color = str(count / size))\n",
" nx.draw_networkx_edges(G, pos, alpha=0.5)\n",
" plt.title(\"Clique \" + str(count))\n",
" \n",
" for node in list_nodes: \n",
" print(node)\n",
" plt.show()\n",
" print(\"-\" * 30)\n"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 145
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 146
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 147
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 0
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 0
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 0
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 0
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 0
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment