Commit a677c8b5 authored by Administrator's avatar Administrator

removing Gargantext_web tutorial (going to ..)

parent 806b9f9f
{
"metadata": {
"name": "",
"signature": "sha256:7a09b299e7a4e13d4edf362d485d580a9e5c59830ddf8c459a309fc220773c77"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"cd .."
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"/home/alexandre/projets/gargantext.py\n"
]
}
],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import gargantext"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cd gargantext_web/"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"/home/alexandre/projets/gargantext.py/gargantext_web\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import documents"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c = gargantext.bdd.Europresse()\n",
"c.add(\"/home/alexandre/projets/abeilles/documents/Europresse/html/\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for doc in c:\n",
" d = documents.models.Document()\n",
" d.project_id = \"1\"\n",
" d.corpus_id = \"1\"\n",
" d.analyst_id = \"1\"\n",
" try:\n",
" d.uniqu_id = doc[\"object_id\"]\n",
" d.date = doc[\"date\"]\n",
" d.title = doc[\"title\"]\n",
" d.authors = doc[\"authors\"]\n",
" d.text = doc[\"text\"]\n",
" d.source = doc[\"source\"]\n",
" d.save()\n",
" except:\n",
" pass"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 88
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"documents.models.Project.objects.all()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
"[<Project: Hola Ebola>, <Project: Fukushima again>, <Project: Thanks anthrax>, <Project: Bees swarm>, <Project: CIRDEM>]"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpus = gargantext.Corpus()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpus_dict = corpus.query('''select * from documents_document\n",
" where corpus_id = %d\n",
" limit 9;''' % 1)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for c in corpus_dict:\n",
" print(c['id'])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"9103\n",
"10718\n",
"10719\n",
"10720\n",
"6516\n",
"7605\n",
"9104\n",
"9105\n",
"9106\n"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"words = gargantext.Ngrams()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"words.get(corpus_dict, unique_id=\"unique_id\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"words.count()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
"/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/numpy/core/_methods.py:55: RuntimeWarning: Mean of empty slice.\n",
" warnings.warn(\"Mean of empty slice.\", RuntimeWarning)\n",
"\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"WARNING:py.warnings:/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/numpy/core/_methods.py:55: RuntimeWarning: Mean of empty slice.\n",
" warnings.warn(\"Mean of empty slice.\", RuntimeWarning)\n",
"\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"0 occurrences, 0 forms, nan avg\n"
]
}
],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 23
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 24
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"words = gargantext.Ngrams()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 25,
"text": [
"[<Document: Notebook test>, <Document: Titre d'un article>]"
]
}
],
"prompt_number": 25
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"documents.models.Document.objects.raw('select count(*),source from documents_document group by source;'):"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "InvalidQuery",
"evalue": "Raw query must include the primary key",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mInvalidQuery\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-45-5495b08ea061>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdocuments\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmodels\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDocument\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mobjects\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mraw\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'select count(*),source from documents_document group by source;'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mp\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/models/query.py\u001b[0m in \u001b[0;36m__iter__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1413\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mskip\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1414\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_meta\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpk\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mattname\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mskip\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1415\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mInvalidQuery\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Raw query must include the primary key'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1416\u001b[0m \u001b[0mmodel_cls\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdeferred_class_factory\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mskip\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1417\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mInvalidQuery\u001b[0m: Raw query must include the primary key"
]
}
],
"prompt_number": 45
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"{'count': 34, 'source': 'Sud Ouest'}\n",
"{'count': 26, 'source': 'Le Progr\u00e8s - Lyon'}\n",
"{'count': 9, 'source': 'Le Figaro'}\n",
"{'count': 9, 'source': 'AFP Infos Fran\u00e7aises'}\n",
"{'count': 8, 'source': 'Les Echos'}\n",
"{'count': 8, 'source': 'Le Monde'}\n",
"{'count': 6, 'source': 'Le Parisien'}\n",
"{'count': 5, 'source': 'Le T\u00e9l\u00e9gramme (Bretagne)'}\n",
"{'count': 4, 'source': 'La Presse'}\n"
]
}
],
"prompt_number": 82
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 78
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"result = query_to_dicts('''select to_char(date, 'YYYY-MM'), count(*) \n",
" from documents_document\n",
" group by to_char(date, 'YYYY-MM')\n",
" order by 1 DESC;''')\n",
"for i in result:\n",
" print(i)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"{'to_char': '2001-12', 'count': 9}\n",
"{'to_char': '2001-11', 'count': 10}\n",
"{'to_char': '2001-10', 'count': 2}\n",
"{'to_char': '2001-09', 'count': 10}\n",
"{'to_char': '2001-08', 'count': 36}\n",
"{'to_char': '2001-07', 'count': 15}\n",
"{'to_char': '2001-06', 'count': 12}\n",
"{'to_char': '2001-05', 'count': 13}\n",
"{'to_char': '2001-04', 'count': 13}\n",
"{'to_char': '2001-03', 'count': 16}\n",
"{'to_char': '2001-02', 'count': 14}\n",
"{'to_char': '2001-01', 'count': 4}\n"
]
}
],
"prompt_number": 61
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"result = query_to_dicts('''select to_char(date, 'YYYY'), count(*) \n",
" from documents_document \n",
" group by to_char(date, 'YYYY')\n",
" order by 1 DESC;''')\n",
"for i in result:\n",
" print(i)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"{'to_char': '2001', 'count': 154}\n"
]
}
],
"prompt_number": 62
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"result = query_to_dicts('''select to_char(date, 'YYYY-MM'), count(*) \n",
" from documents_document \n",
" group by to_char(date, 'YYYY-MM')\n",
" order by 1 DESC;''')\n",
"for i in result:\n",
" print(i)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"{'to_char': '2001-12', 'count': 9}\n",
"{'to_char': '2001-11', 'count': 10}\n",
"{'to_char': '2001-10', 'count': 2}\n",
"{'to_char': '2001-09', 'count': 10}\n",
"{'to_char': '2001-08', 'count': 36}\n",
"{'to_char': '2001-07', 'count': 15}\n",
"{'to_char': '2001-06', 'count': 12}\n",
"{'to_char': '2001-05', 'count': 13}\n",
"{'to_char': '2001-04', 'count': 13}\n",
"{'to_char': '2001-03', 'count': 16}\n",
"{'to_char': '2001-02', 'count': 14}\n",
"{'to_char': '2001-01', 'count': 4}\n"
]
}
],
"prompt_number": 83
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 91
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment