Commit 0df9b00c authored by PkSM3's avatar PkSM3

Merge branch 'master' of ssh://delanoe.org:1979/gargantext into samuel

parents 64ac97bc 6c403f1f
{
"metadata": {
"name": "",
"signature": "sha256:0e63832a6b33d476c8b284b72b0740bd9ade357e5ebb1f73bdc399bbd2824a16"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from node.models import Node, NodeType,\\\n",
" Project, Corpus, Document,\\\n",
" Ngram, Node_Ngram,\\\n",
" User, Language, ResourceType"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node.objects.all().delete()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pycountry\n",
"\n",
"for language in pycountry.languages:\n",
" try:\n",
" implemented = 1 if language.alpha2 in ['en', 'fr'] else 0\n",
" Language(iso2=language.alpha2, iso3=language.terminology, fullname=language.name, implemented=implemented).save()\n",
" except:\n",
" pass"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 24
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"english = Language.objects.get(iso2='en')\n",
"french = Language.objects.get(iso2='fr')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 25
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try:\n",
" me = User.objects.get(username='alexandre')\n",
"except:\n",
" me = User(username='alexandre')\n",
" me.save()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try:\n",
" typeProject = NodeType.objects.get(name='Project')\n",
"except Exception as error:\n",
" print(error)\n",
" typeProject = NodeType(name='Project')\n",
" typeProject.save() \n",
"\n",
"try:\n",
" typeCorpus = NodeType.objects.get(name='Corpus')\n",
"except Exception as error:\n",
" print(error)\n",
" typeCorpus = NodeType(name='Corpus')\n",
" typeCorpus.save()\n",
" \n",
"try:\n",
" typeDoc = NodeType.objects.get(name='Document')\n",
"except Exception as error:\n",
" print(error)\n",
" typeDoc = NodeType(name='Document')\n",
" typeDoc.save()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try:\n",
" typePubmed = ResourceType.objects.get(name='pubmed')\n",
" typeIsi = ResourceType.objects.get(name='isi')\n",
" typeRis = ResourceType.objects.get(name='ris')\n",
" typePresseFrench = ResourceType.objects.get(name='europress_french')\n",
" typePresseEnglish = ResourceType.objects.get(name='europress_english')\n",
"\n",
"except Exception as error:\n",
" print(error)\n",
" \n",
" typePubmed = ResourceType(name='pubmed')\n",
" typePubmed.save() \n",
" \n",
" typeIsi = ResourceType(name='isi')\n",
" typeIsi.save()\n",
" \n",
" typeRis = ResourceType(name='ris')\n",
" typeRis.save()\n",
" \n",
" typePresseFrench = ResourceType(name='europress_french')\n",
" typePresseFrench.save()\n",
" \n",
" typePresseEnglish = ResourceType(name='europress_english')\n",
" typePresseEnglish.save()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"get() returned more than one ResourceType -- it returned 2!\n"
]
}
],
"prompt_number": 33
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node.objects.all().delete()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 34
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try:\n",
" project = Node.objects.get(name='Bees project')\n",
"except:\n",
" project = Node(name='Bees project', type=typeProject, user=me)\n",
" project.save()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 9
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Pubmed"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try:\n",
" corpus_pubmed = Node.objects.get(name='PubMed corpus')\n",
"except:\n",
" corpus_pubmed = Node(parent=project, name='PubMed corpus', type=typeCorpus, user=me)\n",
" corpus_pubmed.save()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpus_pubmed.add_resource(file='/srv/gargantext_lib/data_samples/pubmedBig.zip', type=typePubmed)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 11,
"text": [
"<Resource: Resource object>"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#corpus_abeille.add_resource(file='/srv/gargantext_lib/data_samples/pubmed.zip', type=typePubmed)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpus_pubmed.parse_resources()\n",
"corpus_pubmed.children.count()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 13,
"text": [
"600"
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpus_pubmed.id"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 14,
"text": [
"3131"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpus_pubmed.children.all().extract_ngrams(['title', 'abstract'])\n",
"#Node_Ngram.objects.filter(node=corpus_pubmed.children.all()[0]).count()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 15
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### RIS"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"try:\n",
" corpus_ris = Node.objects.get(name='RIS corpus')\n",
"except:\n",
" corpus_ris = Node(parent=project, name='RIS corpus', type=typeCorpus, user=me)\n",
" corpus_ris.save()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"corpus_ris.add_resource(file='/srv/gargantext_lib/data_samples/risUnix.zip', type=typeRis)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"corpus_ris.parse_resources()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"corpus_ris.children.count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"corpus_ris.children.all()[15].metadata"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"corpus_ris.name = \"ZOTERO CORPUS (CIRDEM)\"\n",
"corpus_ris.save()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Science"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"try:\n",
" science = Node.objects.get(name='WOS corpus')\n",
"except:\n",
" science = Node(parent=project, name='WOS corpus', type=typeCorpus, user=me)\n",
" science.save()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"science.add_resource(file='/srv/gargantext_lib/data_samples/isi.zip', type=typeIsi)\n",
"science.parse_resources()\n",
"science.children.count()"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#[n.metadata for n in science.children.all()]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"science.children.all().extract_ngrams(['title',])\n",
"Node_Ngram.objects.filter(node=science.children.all()[0]).count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Press"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try:\n",
" presse = Node.objects.get(name='Presse corpus')\n",
"except:\n",
" presse = Node(parent=project, name='Presse corpus', type=typeCorpus, user=me)\n",
" presse.save()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"presse.language = Language.objects.get(iso2='fr')\n",
"presse.save()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"presse.add_resource(file='/srv/gargantext_lib/data_samples/html/html_french.zip', type=typePresse)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 14,
"text": [
"<Resource: Resource object>"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"presse.parse_resources()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"presse.children.count()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 16,
"text": [
"88"
]
}
],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for child in presse.children.all():\n",
" print(child.metadata['title'])\n",
" child.extract_ngrams(['title',])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Conf\u00e9d\u00e9ration paysanne : \" retrait imm\u00e9diat \" du R\u00e9gent\n",
"defaultdict(<class 'float'>, {'retrait imm\u00e9diat': 1.0, 'R\u00e9gent': 1.0, 'Conf\u00e9d\u00e9ration paysanne': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Gaucho, R\u00e9gent : la mobilisation continue\n",
"defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, 'mobilisation continue': 1.0, 'Gaucho': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=44 mode='rb'>\n",
" for inst in _active[:]:\n",
"\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"WARNING:py.warnings:/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=44 mode='rb'>\n",
" for inst in _active[:]:\n",
"\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=43 mode='wb'>\n",
" for inst in _active[:]:\n",
"\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"WARNING:py.warnings:/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=43 mode='wb'>\n",
" for inst in _active[:]:\n",
"\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=46 mode='rb'>\n",
" for inst in _active[:]:\n",
"\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"WARNING:py.warnings:/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=46 mode='rb'>\n",
" for inst in _active[:]:\n",
"\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"GB/rapport: \"oui mais\" au ma\u00efs OGM, \"non mais\" pour colza et betterave\n",
"defaultdict(<class 'float'>, {'betterave': 1.0, 'ma\u00efs': 1.0, 'GB rapport': 1.0, 'colza': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=47 mode='rb'>\n",
" for inst in _active[:]:\n",
"\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"WARNING:py.warnings:/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=47 mode='rb'>\n",
" for inst in _active[:]:\n",
"\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=45 mode='wb'>\n",
" for inst in _active[:]:\n",
"\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"WARNING:py.warnings:/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=45 mode='wb'>\n",
" for inst in _active[:]:\n",
"\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=49 mode='rb'>\n",
" for inst in _active[:]:\n",
"\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"WARNING:py.warnings:/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=49 mode='rb'>\n",
" for inst in _active[:]:\n",
"\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Rapport: \"oui mais\" au ma\u00efs OGM, \"non mais\" pour colza et betterave \u00e0 sucre\n",
"defaultdict(<class 'float'>, {'ma\u00efs': 1.0, 'betterave': 1.0, 'Rapport': 1.0, 'sucre': 1.0, 'colza': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=48 mode='rb'>\n",
" for inst in _active[:]:\n",
"\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"WARNING:py.warnings:/usr/lib/python3.4/subprocess.py:473: ResourceWarning: unclosed file <_io.FileIO name=48 mode='rb'>\n",
" for inst in _active[:]:\n",
"\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Rapport: \"oui mais\" au ma\u00efs OGM, \"non mais\" pour colza et betterave \u00e0 sucre\n",
"defaultdict(<class 'float'>, {'ma\u00efs': 1.0, 'betterave': 1.0, 'Rapport': 1.0, 'sucre': 1.0, 'colza': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"La Conf\u00e9d\u00e9ration exige le retrait du R\u00e9gent\n",
"defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, 'Conf\u00e9d\u00e9ration exige': 1.0, 'retrait': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"La Conf\u00e9d\u00e9ration exige le retrait du R\u00e9gent\n",
"defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, 'Conf\u00e9d\u00e9ration exige': 1.0, 'retrait': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"La Conf\u00e9d\u00e9ration paysanne demande le retrait du R\u00e9gent\n",
"defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, 'retrait': 1.0, 'Conf\u00e9d\u00e9ration paysanne demande': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Enqu\u00eate R\u00e9gent: BASF demande le statut de \"t\u00e9moin assist\u00e9\"\n",
"defaultdict(<class 'float'>, {'t\u00e9moin assist\u00e9': 1.0, 'statut': 1.0, 'Enqu\u00eate R\u00e9gent': 1.0, 'BASF demande': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Enqu\u00eate R\u00e9gent: BASF demande le statut de \"t\u00e9moin assist\u00e9\"\n",
"defaultdict(<class 'float'>, {'t\u00e9moin assist\u00e9': 1.0, 'statut': 1.0, 'Enqu\u00eate R\u00e9gent': 1.0, 'BASF demande': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Un film-enqu\u00eate\n",
"defaultdict(<class 'float'>, {'film-enqu\u00eate': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"La Conf\u00e9d\u00e9ration Paysanne demande le \"retrait imm\u00e9diat\" du R\u00e9gent TS\n",
"defaultdict(<class 'float'>, {'retrait imm\u00e9diat': 1.0, 'R\u00e9gent TS': 1.0, 'Conf\u00e9d\u00e9ration Paysanne demande': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"La Conf\u00e9d\u00e9ration Paysanne demande le \"retrait imm\u00e9diat\" du R\u00e9gent TS\n",
"defaultdict(<class 'float'>, {'retrait imm\u00e9diat': 1.0, 'R\u00e9gent TS': 1.0, 'Conf\u00e9d\u00e9ration Paysanne demande': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Insecticide R\u00e9gent TS: un juge souhaite enqu\u00eater sur la mise en danger d'autrui\n",
"defaultdict(<class 'float'>, {'juge souhaite enqu\u00eater': 1.0, 'mise': 1.0, 'Insecticide R\u00e9gent TS': 1.0, 'danger d': 1.0, 'autrui': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Sous-estimation des risques li\u00e9s \u00e0 l'utilisation du R\u00e9gent TS\n",
"defaultdict(<class 'float'>, {'l': 1.0, 'utilisation': 1.0, 'Sous-estimation': 1.0, 'risques li\u00e9s': 1.0, 'R\u00e9gent TS': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"L'affaire de l'insecticide rebondit\n",
"defaultdict(<class 'float'>, {'l': 1.0, 'affaire': 1.0, 'insecticide rebondit': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Coup d'oeil sur 2003 : les faits marquants\n",
"defaultdict(<class 'float'>, {'faits marquants': 1.0, 'Coup d': 1.0, 'oeil': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"La Conf\u00e9d\u00e9ration exige le retrait du R\u00e9gent\n",
"defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, 'Conf\u00e9d\u00e9ration exige': 1.0, 'retrait': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Le juge veut enqu\u00eater sur la mise en danger d'autrui\n",
"defaultdict(<class 'float'>, {'mise': 1.0, 'juge veut enqu\u00eater': 1.0, 'danger d': 1.0, 'autrui': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"La Conf\u00e9d\u00e9ration paysanne demande le retrait du R\u00e9gent TS"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"defaultdict(<class 'float'>, {'R\u00e9gent TS': 1.0, 'retrait': 1.0, 'Conf\u00e9d\u00e9ration paysanne demande': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Maires anti-Gaucho devant le tribunal\n",
"defaultdict(<class 'float'>, {'Maires anti-Gaucho': 1.0, 'tribunal': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"D\u00e9chets m\u00e9nagers, abeilles, OGM... Nature Avenir fait le point\n",
"defaultdict(<class 'float'>, {'D\u00e9chets m\u00e9nagers': 1.0, 'point': 1.0, 'abeilles': 1.0, 'Nature Avenir fait': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"D\u00e9chets m\u00e9nagers, abeilles, OGM... Nature Avenir fait le point\n",
"defaultdict(<class 'float'>, {'D\u00e9chets m\u00e9nagers': 1.0, 'point': 1.0, 'abeilles': 1.0, 'Nature Avenir fait': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"La banlieue par la bande\n",
"defaultdict(<class 'float'>, {'banlieue': 1.0, 'bande': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Insecticide R\u00e9gent TS .\n",
"defaultdict(<class 'float'>, {'Insecticide R\u00e9gent TS': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Chimie : une nouvelle expertise affirme la toxicit\u00e9 de l'insecticide R\u00e9gent TS\n",
"defaultdict(<class 'float'>, {'nouvelle expertise affirme': 1.0, 'insecticide R\u00e9gent TS': 1.0, 'Chimie': 1.0, 'l': 1.0, 'toxicit\u00e9': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"[Une expertise judiciaire affirme que les risques pour l'homme et pour l'environnement li\u00e9s \u00e0 l'utilisation de l'insecticide R\u00e9gent TS ont \u00e9t\u00e9 sous-estim\u00e9s.]\n",
"defaultdict(<class 'float'>, {'insecticide R\u00e9gent TS ont \u00e9t\u00e9 sous-estim\u00e9s': 1.0, 'expertise judiciaire affirme': 1.0, 'utilisation': 1.0, 'l': 4.0, 'environnement li\u00e9s': 1.0, 'risques': 1.0, 'homme': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Un insecticide \u00e0 risque\n",
"defaultdict(<class 'float'>, {'risque': 1.0, 'insecticide': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"La cuv\u00e9e des miels 2003 est plus que rare, s\u00e9cheresse oblige\n",
"defaultdict(<class 'float'>, {'miels': 1.0, 's\u00e9cheresse oblige': 1.0, 'cuv\u00e9e': 1.0, 'rare': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Les semences Gaucho, des d\u00e9chets banals\u00a0?\n",
"defaultdict(<class 'float'>, {'semences Gaucho': 1.0, 'd\u00e9chets banals': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Inqui\u00e9tudes des apiculteurs finist\u00e9riens (Lire en page 8)\n",
"defaultdict(<class 'float'>, {'Inqui\u00e9tudes': 1.0, 'Lire': 1.0, 'apiculteurs finist\u00e9riens': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Inqui\u00e9tudes des apiculteurs finist\u00e9riens\n",
"defaultdict(<class 'float'>, {'Inqui\u00e9tudes': 1.0, 'apiculteurs finist\u00e9riens': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"2003 dans le r\u00e9tro\n",
"defaultdict(<class 'float'>, {'r\u00e9tro': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"A. David, apiculteur : \u00ab Rien ne change \u00bb\n",
"defaultdict(<class 'float'>, {'David': 1.0, 'Rien': 1.0, 'apiculteur': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"R\u00e9gent TS: selon une nouvelle expertise, les risques ont \u00e9t\u00e9 sous-estim\u00e9s\n",
"defaultdict(<class 'float'>, {'nouvelle expertise': 1.0, 'R\u00e9gent TS': 1.0, 'risques ont \u00e9t\u00e9 sous-estim\u00e9s': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"R\u00e9gent TS: selon une nouvelle expertise, les risques ont \u00e9t\u00e9 sous-estim\u00e9s"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"defaultdict(<class 'float'>, {'nouvelle expertise': 1.0, 'R\u00e9gent TS': 1.0, 'risques ont \u00e9t\u00e9 sous-estim\u00e9s': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"R\u00e9gent TS: les risques pour l'homme auraient \u00e9t\u00e9 sous-estim\u00e9s (expertise)\n",
"defaultdict(<class 'float'>, {'l': 1.0, 'expertise': 1.0, 'risques': 1.0, 'R\u00e9gent TS': 1.0, 'homme auraient \u00e9t\u00e9 sous-estim\u00e9s': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"R\u00e9gent TS: les risques pour l'homme auraient \u00e9t\u00e9 sous-estim\u00e9s (expertise)\n",
"defaultdict(<class 'float'>, {'l': 1.0, 'expertise': 1.0, 'risques': 1.0, 'R\u00e9gent TS': 1.0, 'homme auraient \u00e9t\u00e9 sous-estim\u00e9s': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"R\u00e9gent TS: un troisi\u00e8me expert \u00e9voque des risques pour la sant\u00e9 humaine\n",
"defaultdict(<class 'float'>, {'expert \u00e9voque': 1.0, 'risques': 1.0, 'R\u00e9gent TS': 1.0, 'sant\u00e9 humaine': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"R\u00e9gent TS: un troisi\u00e8me expert \u00e9voque des risques pour la sant\u00e9 humaine\n",
"defaultdict(<class 'float'>, {'expert \u00e9voque': 1.0, 'risques': 1.0, 'R\u00e9gent TS': 1.0, 'sant\u00e9 humaine': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Les vrais ennemis des abeilles\n",
"defaultdict(<class 'float'>, {'abeilles': 1.0, 'vrais ennemis': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Un expert d\u00e9nonce les dangers d'un pesticide\n",
"defaultdict(<class 'float'>, {'expert d\u00e9nonce': 1.0, 'pesticide': 1.0, 'dangers d': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Huissier ind\u00e9sirable : le maire de Murs-\u00c9rign\u00e9 \u00e9crit au ministre de l'Int\u00e9rieur\n",
"defaultdict(<class 'float'>, {'Murs-\u00c9rign\u00e9 \u00e9crit': 1.0, 'l': 1.0, 'Huissier ind\u00e9sirable': 1.0, 'Int\u00e9rieur': 1.0, 'maire': 1.0, 'ministre': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"R\u00e9gent TS : nouvelles accusations\n",
"defaultdict(<class 'float'>, {'nouvelles accusations': 1.0, 'R\u00e9gent TS': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Un cocotier dans votre salon ?\n",
"defaultdict(<class 'float'>, {'cocotier': 1.0, 'salon': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Pratiques commerciales: L'autre malbouffe\n",
"defaultdict(<class 'float'>, {'Pratiques commerciales': 1.0, 'autre malbouffe': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Quel avenir pour le XXIe si\u00e8cle ?"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"defaultdict(<class 'float'>, {'avenir': 1.0, 'si\u00e8cle': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Des abeilles, du miel et du pain d'\u00e9pice\n",
"defaultdict(<class 'float'>, {'pain d': 1.0, 'abeilles': 1.0, 'miel': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Abeilles et fipronil : un dossier \" empoisonnant \"\n",
"defaultdict(<class 'float'>, {'fipronil': 1.0, 'dossier': 1.0, 'Abeilles': 1.0, 'empoisonnant': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Des apiculteurs manifestent \u00e0 Angers contre \"une tentative d'intimidation\"\n",
"defaultdict(<class 'float'>, {'tentative d': 1.0, 'apiculteurs manifestent': 1.0, 'Angers': 1.0, 'intimidation': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Insecticides : manifestation apr\u00e8s \" l'intimidation \"\n",
"defaultdict(<class 'float'>, {'l': 1.0, 'manifestation': 1.0, 'Insecticides': 1.0, 'intimidation': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Apiculteurs : non \u00e0 l'atteinte aux libert\u00e9s"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"defaultdict(<class 'float'>, {'libert\u00e9s': 1.0, 'l': 1.0, 'Apiculteurs': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Une ruche politique... consensuelle\n",
"defaultdict(<class 'float'>, {'ruche politique': 1.0, 'consensuelle': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Les apiculteurs manifestent\n",
"defaultdict(<class 'float'>, {'apiculteurs manifestent': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"[Les apiculteurs locaux invitent la population vendredi 13 f\u00e9vrier \u00e0 20 heures \u00e0 la mairie pour assister \u00e0 une projection de cassettes vid\u00e9os sur la vie des abeilles et les cons\u00e9quences de l'utilisation de certains insecticides.]\n",
"defaultdict(<class 'float'>, {'vie': 1.0, 'abeilles': 1.0, 'heures': 1.0, 'l': 1.0, 'f\u00e9vrier': 1.0, 'cassettes vid\u00e9os': 1.0, 'insecticides': 1.0, 'population vendredi': 1.0, 'projection': 1.0, 'mairie': 1.0, 'utilisation': 1.0, 'cons\u00e9quences': 1.0, 'apiculteurs locaux invitent': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\" Les abeilles du coche \""
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"defaultdict(<class 'float'>, {'abeilles': 1.0, 'coche': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Gaucho : une r\u00e9action de Philippe Bodard\n",
"defaultdict(<class 'float'>, {'Philippe Bodard': 1.0, 'r\u00e9action': 1.0, 'Gaucho': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Baisse de production des apiculteurs fran\u00e7ais\n",
"defaultdict(<class 'float'>, {'apiculteurs fran\u00e7ais': 1.0, 'production': 1.0, 'Baisse': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Une esp\u00e8ce en danger\n",
"defaultdict(<class 'float'>, {'danger': 1.0, 'esp\u00e8ce': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Progression du taux de mortalit\u00e9 chez les abeilles\n",
"defaultdict(<class 'float'>, {'mortalit\u00e9': 1.0, 'abeilles': 1.0, 'Progression': 1.0, 'taux': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Un huissier chez des apiculteurs Visite impromptue, vendredi, dans le Maine-et-Loire lors d'une r\u00e9union.\n",
"defaultdict(<class 'float'>, {'apiculteurs Visite impromptue': 1.0, 'huissier': 1.0, 'Maine-et-Loire': 1.0, 'vendredi': 1.0, 'd': 1.0, 'r\u00e9union': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Un huissier chez des apiculteurs Visite surprise \u00e0 Blaison-Gohier (49) lors d'une r\u00e9union sur les insecticides.\n",
"defaultdict(<class 'float'>, {'Blaison-Gohier': 1.0, 'huissier': 1.0, 'insecticides': 1.0, 'r\u00e9union': 1.0, 'apiculteurs Visite surprise': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Un huissier chez des apiculteurs Visite impromptue \u00e0 Blaison-Gohier, lors d'une r\u00e9union sur les insecticides.\n",
"defaultdict(<class 'float'>, {'Blaison-Gohier': 1.0, 'huissier': 1.0, 'insecticides': 1.0, 'apiculteurs Visite impromptue': 1.0, 'r\u00e9union': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Un huissier chez des apiculteurs Visite impromptue, pr\u00e8s d'Angers, lors d'une r\u00e9union sur les insecticides.\n",
"defaultdict(<class 'float'>, {'r\u00e9union': 1.0, 'huissier': 1.0, 'insecticides': 1.0, 'apiculteurs Visite impromptue': 1.0, 'Angers': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Intimidation d'un huissier apr\u00e8s une r\u00e9union d'information\n",
"defaultdict(<class 'float'>, {'Intimidation d': 1.0, 'huissier': 1.0, 'r\u00e9union d': 1.0, 'information': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Les apiculteurs interpellent les citoyens\n",
"defaultdict(<class 'float'>, {'citoyens': 1.0, 'apiculteurs interpellent': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Insecticides : \" T\u00e9moins g\u00eanants \", huissier g\u00ean\u00e9\n",
"defaultdict(<class 'float'>, {'T\u00e9moins g\u00eanants': 1.0, 'huissier g\u00ean\u00e9': 1.0, 'Insecticides': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Professeur Narbonne : \" Certaines personnes n'ont pas bien fait leur boulot \"\n",
"defaultdict(<class 'float'>, {'boulot': 1.0, 'personnes n': 1.0, 'Professeur Narbonne': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"La Conf\u00e9d\u00e9ration interpelle l'agriculture raisonn\u00e9e"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"defaultdict(<class 'float'>, {'agriculture raisonn\u00e9e': 1.0, 'Conf\u00e9d\u00e9ration interpelle l': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"La Conf\u00e9d\u00e9ration interpelle l'agriculture raisonn\u00e9e\n",
"defaultdict(<class 'float'>, {'agriculture raisonn\u00e9e': 1.0, 'Conf\u00e9d\u00e9ration interpelle l': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"La Conf\u00e9d\u00e9ration interpelle l'agriculture raisonn\u00e9e\n",
"defaultdict(<class 'float'>, {'agriculture raisonn\u00e9e': 1.0, 'Conf\u00e9d\u00e9ration interpelle l': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"La Conf\u00e9d\u00e9ration interpelle l'agriculture raisonn\u00e9e\n",
"defaultdict(<class 'float'>, {'agriculture raisonn\u00e9e': 1.0, 'Conf\u00e9d\u00e9ration interpelle l': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"La Conf\u00e9d\u00e9ration interpelle l'agriculture raisonn\u00e9e\n",
"defaultdict(<class 'float'>, {'agriculture raisonn\u00e9e': 1.0, 'Conf\u00e9d\u00e9ration interpelle l': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"La Conf\u00e9d\u00e9ration interpelle l'agriculture raisonn\u00e9e\n",
"defaultdict(<class 'float'>, {'agriculture raisonn\u00e9e': 1.0, 'Conf\u00e9d\u00e9ration interpelle l': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"La Conf\u00e9d\u00e9ration interpelle l'agriculture raisonn\u00e9e\n",
"defaultdict(<class 'float'>, {'agriculture raisonn\u00e9e': 1.0, 'Conf\u00e9d\u00e9ration interpelle l': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Homologation bloqu\u00e9e pour le Regent .\n",
"defaultdict(<class 'float'>, {'Homologation bloqu\u00e9e': 1.0, 'Regent': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Insecticides : un film saisi pr\u00e8s d'Angers\n",
"defaultdict(<class 'float'>, {'film saisi': 1.0, 'Angers': 1.0, 'Insecticides': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Un huissier saisit un film sur des insecticides lors d'une r\u00e9union publique\n",
"defaultdict(<class 'float'>, {'huissier saisit': 1.0, 'film': 1.0, 'insecticides': 1.0, 'r\u00e9union publique': 1.0, 'd': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Les apiculteurs ont le bourdon\n",
"defaultdict(<class 'float'>, {'apiculteurs ont': 1.0, 'bourdon': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"R\u00e9gent : BASF demande \u00e0 \u00eatre t\u00e9moin assist\u00e9\n",
"defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, '\u00eatre t\u00e9moin assist\u00e9': 1.0, 'BASF demande': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"R\u00e9gent : BASF demande \u00e0 \u00eatre t\u00e9moin assist\u00e9\n",
"defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, '\u00eatre t\u00e9moin assist\u00e9': 1.0, 'BASF demande': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"R\u00e9gent : BASF demande \u00e0 \u00eatre t\u00e9moin assist\u00e9\n",
"defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, '\u00eatre t\u00e9moin assist\u00e9': 1.0, 'BASF demande': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"R\u00e9gent : BASF demande \u00e0 \u00eatre t\u00e9moin assist\u00e9\n",
"defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, '\u00eatre t\u00e9moin assist\u00e9': 1.0, 'BASF demande': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"BASF demande le statut de \" t\u00e9moin assist\u00e9 \"\n",
"defaultdict(<class 'float'>, {'t\u00e9moin assist\u00e9': 1.0, 'statut': 1.0, 'BASF demande': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Le Gaucho et le R\u00e9gent TS toujours en accusation\n",
"defaultdict(<class 'float'>, {'R\u00e9gent TS': 1.0, 'Gaucho': 1.0, 'accusation': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Attention, abeilles en danger"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"defaultdict(<class 'float'>, {'danger': 1.0, 'Attention': 1.0, 'abeilles': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"R\u00e9gent : BASF demande \u00e0 \u00eatre t\u00e9moin assist\u00e9\n",
"defaultdict(<class 'float'>, {'R\u00e9gent': 1.0, '\u00eatre t\u00e9moin assist\u00e9': 1.0, 'BASF demande': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"La Conf\u00e9d\u00e9ration interpelle l'agriculture raisonn\u00e9e\n",
"defaultdict(<class 'float'>, {'agriculture raisonn\u00e9e': 1.0, 'Conf\u00e9d\u00e9ration interpelle l': 1.0})"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 17
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"firstchild = presse.children.first()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for n in Node_Ngram.objects.filter(node=firstchild):\n",
" print(n.ngram.terms)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"liste_ordered = collections.OrderedDict(sorted(liste.items()), key=lambda t: t[1])"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#liste_ordered"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cr\u00e9ation des Listes"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import collections"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"liste = collections.defaultdict(int)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try:\n",
" whitelist_type = NodeType.objects.get(name='WhiteList')\n",
" blacklist_type = NodeType.objects.get(name='BlackList')\n",
"except:\n",
" whitelist_type = NodeType(name='WhiteList')\n",
" whitelist_type.save()\n",
" \n",
" blacklist_type = NodeType(name='BlackList')\n",
" blacklist_type.save()\n",
"\n",
"white_node = Node.objects.create(name='WhiteList Pubmed', user=me, parent=corpus_pubmed, type=whitelist_type)\n",
"black_node = Node.objects.create(name='BlackList Pubmed', user=me, parent=corpus_pubmed, type=blacklist_type)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node_Ngram.objects.filter(node=white_node).count()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cr\u00e9ation de la white list"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"with transaction.atomic():\n",
" for node in presse.children.all():\n",
" for node_ngram in Node_Ngram.objects.filter(node=node):\n",
" if node_ngram.ngram.n > 1:\n",
" #liste[node_ngram.ngram.terms] += node_ngram.weight\n",
" Node_Ngram.objects.create(node=white_node, ngram=node_ngram.ngram, weight=1)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"white_node.pk"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node_Ngram.objects.filter(node=white_node).count()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cr\u00e9ation de la black list"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"with transaction.atomic():\n",
" for node_ngram_object in Node_Ngram.objects.all()[101:150]:\n",
" Node_Ngram.objects.create(node=black_node, ngram=node_ngram_object.ngram, occurences=1)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node_Ngram.objects.filter(node=black_node)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cr\u00e9ation des synonymes"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"syno_type = NodeType.objects.get(name='Synonyme')\n",
"syno_node = Node.objects.create(name='Syno Pubmed',\n",
" user=user, \n",
" parent=corpus, \n",
" type=syno_type)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"synonyme1, synonyme2 = Node_Ngram.objects.filter(node=white_node)[3:5]"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"NodeNgramNgram.objects.create(node=syno_node, ngramX=synonyme1.ngram, ngramY=synonyme2.ngram)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cooccurrence"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"white_node.children.count()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"black_node.pk"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try:\n",
" cooc_type = NodeType.objects.get(name='Cooccurrence')\n",
"except:\n",
" cooc_type = NodeType(name='Cooccurrence')\n",
" cooc_type.save()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cooc = Node.objects.create(user=me,\\\n",
" parent=corpus_pubmed,\\\n",
" type=cooc_type,\\\n",
" name=\"Cooccurrences calcul Alpha\")"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cooc.pk"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"white_node.children.all().delete()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from django.db import connection\n",
"cursor = connection.cursor()\n",
"# LOCK TABLE documents_ngramtemporary IN EXCLUSIVE MODE;\n",
"query_string = \"\"\"\n",
"INSERT INTO node_nodengramngram (node_id, \"ngramX_id\", \"ngramY_id\", score)\n",
"\n",
"SELECT \n",
"%d as node_id, x.ngram_id, y.ngram_id, COUNT(*) AS score\n",
"\n",
"FROM\n",
"node_node_ngram AS x\n",
"\n",
"INNER JOIN \n",
"node_node_ngram AS y \n",
"ON x.node_id = y.node_id\n",
"\n",
"\n",
"WHERE\n",
"x.id in (select id from node_node_ngram WHERE node_id = %d )\n",
"AND\n",
"y.id in (select id from node_node_ngram WHERE node_id = %d )\n",
"AND\n",
"x.ngram_id <> y.ngram_id\n",
"\n",
"\n",
"GROUP BY\n",
"x.ngram_id, y.ngram_id\n",
"\n",
"HAVING count(*) > 1\n",
"\n",
"ORDER BY score\n",
"\n",
"LIMIT 300\n",
"\n",
" \"\"\" % (cooc.pk, white_node.pk, white_node.pk)\n",
"\n",
"cursor.execute(query_string)\n",
"\n",
"try:\n",
" while True:\n",
" row = cursor.fetchone()\n",
" if row is None:\n",
" break\n",
" print(row)\n",
"except:\n",
" pass"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from copy import copy\n",
"import numpy as np\n",
"import pandas as pd\n",
"import networkx as nx\n",
"from collections import defaultdict\n",
"from analysis.louvain import *\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix = \"\""
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix = defaultdict(lambda : defaultdict(float))\n",
"for cooccurrence in NodeNgramNgram.objects.filter(node=cooc):\n",
" if cooccurrence.score > 1 :\n",
" #print(x.ngramX.terms, x.ngramY.terms)\n",
" matrix[cooccurrence.ngramX.terms][cooccurrence.ngramY.terms] = cooccurrence.score\n",
" matrix[cooccurrence.ngramY.terms][cooccurrence.ngramX.terms] = cooccurrence.score"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df = pd.DataFrame(matrix).T.fillna(0)\n",
"x = copy(df.values)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"x = np.where((x.sum(axis=1) > x.shape[0] / 2), 0, x )\n",
"x = np.where((x.sum(axis=1) > x.shape[0] / 10), 0, x )"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"x = x / x.sum(axis=1)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix_filtered = np.where(x > .4, 1, 0)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix_filtered"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"G = nx.from_numpy_matrix(matrix_filtered)\n",
"G = nx.relabel_nodes(G, dict(enumerate(df.columns)))"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"nx.draw(G, with_labels=True)\n",
"plt.show()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"partition = best_partition(G)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#partition"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pos = nx.spring_layout(G)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"count = 0.0\n",
"node_min = 3\n",
"for com in set(partition.values()) :\n",
" count = count + 1\n",
" list_nodes = [nodes for nodes in partition.keys() if partition[nodes] == com]\n",
" \n",
" if len(list_nodes) > node_min:\n",
" nx.draw_networkx_nodes(G, pos, list_nodes, node_size = 20, with_labels=True)#, node_color = str(count / size))\n",
" nx.draw_networkx_edges(G, pos, alpha=0.5)\n",
" plt.title(\"Clique \" + str(count))\n",
" \n",
" for node in list_nodes: \n",
" print(node)\n",
" plt.show()\n",
" print(\"-\" * 30)\n"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Asynchrone"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 145
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 146
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 147
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from celerytest.tasks import add"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"add."
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"<AsyncResult: c7df5232-b80a-4dd4-b615-432a6fb206e4>"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from celerytest.tasks import Test"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"t = Test()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"res = t.addition.delay((2,2))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"res = t.addition.apply_async((2,2), countdown=2)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"res.get()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "addition() takes 2 positional arguments but 3 were given",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-5-8bb969b0b8af>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mres\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/celery/result.py\u001b[0m in \u001b[0;36mget\u001b[1;34m(self, timeout, propagate, interval, no_ack, follow_parents, EXCEPTION_STATES, PROPAGATE_STATES)\u001b[0m\n\u001b[0;32m 173\u001b[0m \u001b[0mstatus\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmeta\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'status'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 174\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mstatus\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mPROPAGATE_STATES\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mpropagate\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 175\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbackend\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexception_to_python\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmeta\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'result'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 176\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mstatus\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mEXCEPTION_STATES\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 177\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbackend\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexception_to_python\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmeta\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'result'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mTypeError\u001b[0m: addition() takes 2 positional arguments but 3 were given"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from celery.contrib.methods import current_app"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"app."
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"False"
]
}
],
"prompt_number": 6
}
],
"metadata": {}
}
]
}
\ No newline at end of file
# Without this, we couldn't use the Django environment
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")
# We're gonna use all the models!
from node.models import *
# Node.objects.get(id=26514).children.all().make_metadata_filterable()
# exit()
# Reset: all data
tables_to_empty = [
Node,
Node_Metadata,
Metadata,
NodeType,
ResourceType,
Resource,
]
for table in tables_to_empty:
print('Empty table "%s"...' % (table._meta.db_table, ))
table.objects.all().delete()
# Integration: metadata types
print('Initialize metadata...')
metadata = {
'publication_date': 'datetime',
'authors': 'string',
'language_fullname': 'string',
'abstract': 'text',
'title': 'string',
'source': 'string',
'volume': 'string',
'text': 'text',
'date': 'datetime',
'page': 'string',
'doi': 'string',
'journal': 'string',
}
for name, type in metadata.items():
Metadata(name=name, type=type).save()
# Integration: languages
print('Initialize languages...')
import pycountry
Language.objects.all().delete()
for language in pycountry.languages:
if 'alpha2' in language.__dict__:
Language(
iso2 = language.alpha2,
iso3 = language.bibliographic,
fullname = language.name,
implemented = 1 if language.alpha2 in ['en', 'fr'] else 0,
).save()
english = Language.objects.get(iso2='en')
french = Language.objects.get(iso2='fr')
# Integration: users
print('Initialize users...')
try:
me = User.objects.get(username='mat')
except:
me = User(username='mat')
me.save()
# Integration: node types
print('Initialize node types...')
try:
typeProject = NodeType.objects.get(name='Project')
except Exception as error:
print(error)
typeProject = NodeType(name='Project')
typeProject.save()
try:
typeCorpus = NodeType.objects.get(name='Corpus')
except Exception as error:
print(error)
typeCorpus = NodeType(name='Corpus')
typeCorpus.save()
try:
typeDoc = NodeType.objects.get(name='Document')
except Exception as error:
print(error)
typeDoc = NodeType(name='Document')
typeDoc.save()
# Integration: resource types
print('Initialize resource...')
try:
typePubmed = ResourceType.objects.get(name='pubmed')
typeIsi = ResourceType.objects.get(name='isi')
typeRis = ResourceType.objects.get(name='ris')
typePresse = ResourceType.objects.get(name='europress')
except Exception as error:
print(error)
typePubmed = ResourceType(name='pubmed')
typePubmed.save()
typeIsi = ResourceType(name='isi')
typeIsi.save()
typeRis = ResourceType(name='ris')
typeRis.save()
typePresse = ResourceType(name='europress')
typePresse.save()
# Integration: project
print('Initialize project...')
try:
project = Node.objects.get(name='Bees project')
except:
project = Node(name='Bees project', type=typeProject, user=me)
project.save()
# Integration: corpus
print('Initialize corpus...')
try:
corpus_pubmed = Node.objects.get(name='PubMed corpus')
except:
corpus_pubmed = Node(parent=project, name='PubMed corpus', type=typeCorpus, user=me)
corpus_pubmed.save()
print('Initialize resource...')
corpus_pubmed.add_resource(
# file='./data_samples/pubmed.zip',
file='./data_samples/pubmed_2013-04-01_HoneyBeesBeeBees.xml',
type=typePubmed,
user=me
)
for resource in corpus_pubmed.get_resources():
print('Resource #%d - %s - %s' % (resource.id, resource.digest, resource.file))
print('Parse corpus #%d...' % (corpus_pubmed.id, ))
corpus_pubmed.parse_resources(verbose=True)
print('Extract corpus #%d...' % (corpus_pubmed.id, ))
corpus_pubmed.children.all().extract_ngrams(['title',])
print('Parsed corpus #%d.' % (corpus_pubmed.id, ))
exit()
\ No newline at end of file
{
"metadata": {
"name": "",
"signature": "sha256:01c37f613503c408d979ba9eb9172cbd9b6b3be2ff0d7d35089d705cebc989c2"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from node.models import Node, NodeType,\\\n",
" Project, Corpus, Document,\\\n",
" Ngram, Node_Ngram,\\\n",
" User, Language, ResourceType\n",
" \n",
"from parsing.Caches import NgramsCache\n",
" \n",
"from django.db import connection\n",
"cursor = connection.cursor()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"me = User.objects.get(username='alexandre')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import networkx as nx\n",
"from networkx.readwrite import json_graph"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import csv"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"help(csv.writer)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Help on built-in function writer in module _csv:\n",
"\n",
"writer(...)\n",
" csv_writer = csv.writer(fileobj [, dialect='excel']\n",
" [optional keyword args])\n",
" for row in sequence:\n",
" csv_writer.writerow(row)\n",
" \n",
" [or]\n",
" \n",
" csv_writer = csv.writer(fileobj [, dialect='excel']\n",
" [optional keyword args])\n",
" csv_writer.writerows(rows)\n",
" \n",
" The \"fileobj\" argument can be any object that supports the file API.\n",
"\n"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"type(x)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 52,
"text": [
"_io.TextIOWrapper"
]
}
],
"prompt_number": 52
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"file = open('/tmp/test.graph', 'w')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
"-c:1: ResourceWarning: unclosed file <_io.TextIOWrapper name='/tmp/test.graph' mode='w' encoding='UTF-8'>\n",
"\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"WARNING:py.warnings:-c:1: ResourceWarning: unclosed file <_io.TextIOWrapper name='/tmp/test.graph' mode='w' encoding='UTF-8'>\n",
"\n"
]
}
],
"prompt_number": 42
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"file.write('ici il fait beau')\n",
"file.close()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 46
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"G = nx.complete_graph(30)\n",
"f = open(\"graph.json\", \"w\")\n",
"f.write(json_graph.node_link_graph(G))\n",
"f.close()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
"-c:2: ResourceWarning: unclosed file <_io.TextIOWrapper name='graph.json' mode='w' encoding='UTF-8'>\n",
"\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"WARNING:py.warnings:-c:2: ResourceWarning: unclosed file <_io.TextIOWrapper name='graph.json' mode='w' encoding='UTF-8'>\n",
"\n"
]
},
{
"ename": "AttributeError",
"evalue": "'Graph' object has no attribute 'get'",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-17-7d4aa550fd32>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mG\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcomplete_graph\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m30\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"graph.json\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"w\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mjson_graph\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnode_link_graph\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mG\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/networkx/readwrite/json_graph/node_link.py\u001b[0m in \u001b[0;36mnode_link_graph\u001b[1;34m(data, directed, multigraph, attrs)\u001b[0m\n\u001b[0;32m 134\u001b[0m \u001b[0mnode_link_data\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0madjacency_data\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtree_data\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 135\u001b[0m \"\"\"\n\u001b[1;32m--> 136\u001b[1;33m \u001b[0mmultigraph\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'multigraph'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmultigraph\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 137\u001b[0m \u001b[0mdirected\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'directed'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdirected\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 138\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mmultigraph\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mAttributeError\u001b[0m: 'Graph' object has no attribute 'get'"
]
}
],
"prompt_number": 17
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"typeCorpus = NodeType.objects.get(name='Corpus')\n",
"for corpus in Node.objects.filter(type=typeCorpus):\n",
" print(\"#%d - %s\" % (corpus.id, corpus))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"#7 - Presse corpus\n"
]
}
],
"prompt_number": 33
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try:\n",
" me = User.objects.get(username='alexandre')\n",
"except:\n",
" me = User(username='alexandre')\n",
" me.save()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 34
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#\u00a0corpus = Node.objects.filter(type=typeCorpus).first()\n",
"corpus = Node.objects.get(id=13064)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"Occurences"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"query_date = \"\"\"\n",
" SELECT\n",
" id,\n",
" metadata -> 'publication_year' as year,\n",
" metadata -> 'publication_month' as month, \n",
" metadata -> 'publication_day' as day,\n",
" metadata -> 'title',\n",
" FROM\n",
" node_node AS n\n",
" WHERE\n",
" n.parent_id = %d\n",
" ORDER BY\n",
" year, month, day ASC\n",
" LIMIT\n",
" 20\n",
" OFFSET\n",
" %d\n",
"\"\"\" % (corpus.id, 0)\n",
"\n",
"cursor.execute(query_date)\n",
"\n",
"while True:\n",
" row = cursor.fetchone()\n",
" if row is None:\n",
" break\n",
" print(row)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(13066, '1954', '11', '18', 'TOXICITY OF PESTICIDE DUSTS TO HONEYBEES', None)\n",
"(13065, '1954', '11', '18', 'EQUIPMENT AND TECHNIQUE USED IN LABORATORY EVALUATION OF PESTICIDE DUSTS IN TOXICOLOGICAL STUDIES WITH HONEYBEES', None)\n",
"(13067, '1958', '11', '18', 'TOXICITY OF PESTICIDES TO HONEY BEES IN LABORATORY AND FIELD TESTS IN SOUTHERN CALIFORNIA, 1955-1956', None)\n",
"(13068, '1959', '11', '18', 'THE EFFECTS OF FIELD APPLICATIONS OF SOME OF THE NEWER PESTICIDES ON HONEY BEES', None)\n",
"(13069, '1968', '11', '18', 'PROTECTING HONEYBEES FROM PESTICIDES', None)\n",
"(13071, '1969', '11', '18', 'PESTICIDE TOXICITY AND HONEY BEES', None)\n",
"(13070, '1969', '11', '18', 'POLLEN GATHERING OF HONEY BEES REDUCED BY PESTICIDE SPRAYS', None)\n",
"(13072, '1971', '11', '18', 'NEWER PESTICIDES DONT HARM ENVIRONMENT, BUT WHERE HAVE ALL BEES GONE', None)\n",
"(13075, '1971', '11', '18', 'HONEYBEES, PESTICIDES AND LAW .3.', None)\n",
"(13074, '1971', '11', '18', 'HONEYBEES, PESTICIDES AND LAW .2.', None)\n",
"(13073, '1971', '11', '18', 'HONEYBEES, PESTICIDES AND LAW .1.', None)\n",
"(13076, '1972', '11', '18', 'RICE FIELD MOSQUITO-CONTROL STUDIES WITH LOW VOLUME DURSBAN SPRAYS IN COLUSA COUNTY, CALIFORNIA .5. EFFECTS UPON HONEY BEES', None)\n",
"(13078, '1974', '11', '18', 'MICROSOMAL OXIDASES IN HONEY BEE, APIS-MELLIFERA (L)', None)\n",
"(13077, '1974', '11', '18', 'ISOLATED HONEY BEE ABDOMENS FOR MONITORING EFFECTS OF STRESS IN AMERICAN COCKROACH', None)\n",
"(13079, '1975', '11', '18', 'INHIBITOR OF MICROSOMAL OXIDATION FROM GUT TISSUES OF HONEY BEE (APIS-MELLIFERA)', None)\n",
"(13080, '1975', '11', '18', 'REPELLENT ADDITIVES TO REDUCE PESTICIDE HAZARDS TO HONEY BEES HYMENOPTERA-APIDAE, APIS-MELLIFERA-L - FIELD-TESTS', None)\n",
"(13081, '1975', '11', '18', 'HONEYBEE ABDOMEN ASSAYS OF HEMOLYMPH FROM STRESSED AND EXTERNALLY POISONED AMERICAN COCKROACHES', None)\n",
"(13082, '1976', '11', '18', 'PROBLEM OF PESTICIDES NOT DANGEROUS TO BEES', None)\n",
"(13084, '1977', '11', '18', 'EFFECT OF SOME PESTICIDES ON A SOLITARY BEE (MEGACHILE-PACIFICA-PANZ) - (HYMENOPTERA, MEGACHILIDAE)', None)\n",
"(13085, '1977', '11', '18', 'METHOD FOR TESTING PESTICIDE TOXICITY WHICH IS SUITABLE FOR SOLITARY BEES AND ESPECIALLY FOR MEGACHILE-PACIFICA-PANZ - (HYMENOPTERA, MEGACHILIDAE)', None)\n"
]
}
],
"prompt_number": 35
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"query_date = \"\"\"\n",
" SELECT\n",
" metadata -> 'publication_year' as year,\n",
" metadata -> 'publication_month' as month, \n",
" metadata -> 'publication_day' as day,\n",
" COUNT(*)\n",
" FROM\n",
" node_node AS n\n",
" WHERE\n",
" n.parent_id = %d\n",
" GROUP BY\n",
" day, month, year\n",
" ORDER BY\n",
" year, month, day ASC\n",
" LIMIT\n",
" 20\n",
" OFFSET\n",
" %d\n",
"\"\"\" % (corpus.id, 0)\n",
"\n",
"cursor.execute(query_date)\n",
"\n",
"while True:\n",
" row = cursor.fetchone()\n",
" if row is None:\n",
" break\n",
" print('\\'' + row[0] + '/' + row[1] + '/' + row[2] + '\\'' \n",
" + ',' + '\\'' + str(row[3]) + '\\'' )"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"'1954/11/18','2'\n",
"'1958/11/18','1'\n",
"'1959/11/18','1'\n",
"'1968/11/18','1'\n",
"'1969/11/18','2'\n",
"'1971/11/18','4'\n",
"'1972/11/18','1'\n",
"'1974/11/18','2'\n",
"'1975/11/18','3'\n",
"'1976/11/18','1'\n",
"'1977/11/18','6'\n",
"'1978/11/18','11'\n",
"'1979/11/18','9'\n",
"'1980/11/18','6'\n",
"'1981/11/18','4'\n",
"'1982/11/18','7'\n",
"'1983/11/18','14'\n",
"'1984/11/18','17'\n",
"'1985/11/18','18'\n",
"'1986/02/21','1'\n"
]
}
],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cursor.execute(\"\"\"\n",
" SELECT\n",
" COUNT(*) AS occurrences,\n",
" ngX.terms\n",
" FROM\n",
" node_node AS n\n",
" INNER JOIN\n",
" node_node_ngram AS nngX ON nngX.node_id = n.id\n",
" INNER JOIN\n",
" node_ngram AS ngX ON ngX.id = nngX.ngram_id\n",
" WHERE\n",
" n.parent_id = %s\n",
" AND\n",
" ngX.n >= 2\n",
" GROUP BY\n",
" ngX.terms\n",
" Having\n",
" COUNT(*) > 7\n",
" ORDER BY\n",
" occurrences DESC\n",
" LIMIT\n",
" 100\n",
" \n",
"\"\"\", [corpus.id])\n",
"\n",
"while True:\n",
" row = cursor.fetchone()\n",
" if row is None:\n",
" break\n",
" print(row)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(138, 'honey bees')\n",
"(132, 'apis mellifera')\n",
"(69, 'honey bee')\n",
"(66, 'apis mellifera l')\n",
"(45, 'pesticide residues')\n",
"(39, 'gas chromatography')\n",
"(36, 'varroa destructor')\n",
"(36, 'honey bee colonies')\n",
"(30, 'sublethal effects')\n",
"(27, 'apidae )')\n",
"(21, 'neonicotinoid insecticides')\n",
"(21, 'honey bee ( hymenoptera')\n",
"(18, 'bee products')\n",
"(18, 'megachile rotundata')\n",
"(18, 'solid-phase extraction')\n",
"(18, 'simultaneous determination')\n",
"(18, 'mass spectrometric')\n",
"(15, 'case study')\n",
"(15, 'honey samples')\n",
"(15, 'liquid chromatography')\n",
"(15, 'high performance liquid chromatography')\n",
"(15, 'varroa mites')\n",
"(12, 'organochlorine pesticides')\n",
"(12, 'gas chromatography-mass spectrometry')\n",
"(12, 'liquid chromatography-mass spectrometry')\n",
"(12, 'colony health')\n",
"(12, 'gas chromatographic')\n",
"(12, 'colony collapse disorder')\n",
"(12, 'bumble bees')\n",
"(12, 'varroa jacobsoni')\n",
"(9, 'chemiluminescent elisa')\n",
"(9, 'diversionary plantings for reduction of pesticide related bee mortality')\n",
"(9, 'pesticides and law')\n",
"(9, 'plant protection products')\n",
"(9, 'nomia melanderi')\n",
"(9, 'electron-capture detection')\n",
"(9, 'managed pollinator cap coordinated agricultural project a national research')\n",
"(9, 'apis florea f')\n",
"(9, 'solid-phase microextraction')\n",
"(9, 'extension initiative')\n",
"(9, 'crop pollination')\n",
"(9, 'non-apis bees')\n",
"(9, 'honey bees ( apis mellifera')\n",
"(9, 'liquid chromatography-tandem mass spectrometry')\n",
"(9, 'bee pollen')\n",
"(9, 'foraging behavior')\n",
"(9, 'biological control')\n",
"(9, 'nosema ceranae')\n",
"(9, 'organophosphorus pesticides')\n",
"(9, 'field conditions')\n",
"(9, 'honey bee apis mellifera l')\n",
"(9, 'laboratory tests')\n",
"(9, 'beauveria bassiana')\n",
"(9, 'comparative toxicity')\n",
"(9, 'high levels')\n",
"(9, 'pesticide exposure')\n",
"(9, 'fluvalinate residues')\n",
"(9, 'insecticide residues')\n",
"(9, 'osmia lignaria')\n",
"(9, 'bombus impatiens')\n",
"(9, 'honey bee health')\n",
"(9, 'agricultural landscape')\n",
"(9, 'dispersive liquid-liquid microextraction')\n",
"(9, 'matrix solid-phase dispersion')\n"
]
}
],
"prompt_number": 28
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"Cooccurrences par ann\u00e9e"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cursor.execute(\"\"\"\n",
" SELECT\n",
" COUNT(*) AS occurrences,\n",
" n.metadata->'publication_year' AS year,\n",
" ngX.terms\n",
" FROM\n",
" node_node AS n\n",
" INNER JOIN\n",
" node_node_ngram AS nngX ON nngX.node_id = n.id\n",
" INNER JOIN\n",
" node_ngram AS ngX ON ngX.id = nngX.ngram_id\n",
" WHERE\n",
" n.parent_id = %s\n",
" GROUP BY\n",
" terms,\n",
" year\n",
" ORDER BY\n",
" occurrences DESC\n",
" LIMIT\n",
" 20\n",
"\"\"\", [corpus.id])\n",
"\n",
"while True:\n",
" row = cursor.fetchone()\n",
" if row is None:\n",
" break\n",
" print(row)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(24, '2014', 'patients')\n",
"(22, '2005', 'patients')\n",
"(18, '2005', 'study')\n",
"(15, '2014', 'voice')\n",
"(14, '2002', 'disease')\n",
"(14, '2013', 'patients')\n",
"(14, '2006', 'study')\n",
"(13, '2014', 'treatment')\n",
"(12, '2011', 'patients')\n",
"(12, '2004', 'voice')\n",
"(12, '2012', 'patients')\n",
"(12, '2003', 'patients')\n",
"(12, '2005', 'voice')\n",
"(11, '2002', 'patients')\n",
"(11, '2014', 'study')\n",
"(10, '2007', 'patients')\n",
"(10, '2006', 'patients')\n",
"(10, '2004', 'study')\n",
"(10, '2001', 'patients')\n",
"(10, '2014', 'phase')\n"
]
}
],
"prompt_number": 105
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"Cr\u00e9ation d'une liste de synonymes"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ngramsCache = NgramsCache(Language.objects.get(iso2='fr'))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 17
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"synonymePairs = [\n",
" ['danger', 'risques'],\n",
" ['risque', 'risques'],\n",
" ['R\u00e9gent', 'R\u00e9gent TS']\n",
"]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 18
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"try:\n",
" typeSynonyme = NodeType.objects.get(name='Synonyme')\n",
"except:\n",
" typeSynonyme = NodeType(name='Synonyme')\n",
" typeSynonyme.save()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 19
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"listSynonymes = Node(name='Syno abeilles', type=typeSynonyme, user=me)\n",
"listSynonymes.save()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 22
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for synonymePair in synonymePairs:\n",
" NodeNgramNgram(\n",
" ngramx = ngramsCache[synonymePair[0]],\n",
" ngramy = ngramsCache[synonymePair[1]],\n",
" node = listSynonymes,\n",
" score = 1.\n",
" ).save()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 23
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"listSynonymes.id"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 24,
"text": [
"61297"
]
}
],
"prompt_number": 24
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"Occurrences avec synonymes"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"'''cursor.execute(\"\"\"\n",
" SELECT\n",
" COUNT(*) AS occurrences,\n",
" ngx.terms\n",
" FROM\n",
" node_node AS n\n",
" INNER JOIN\n",
" node_node_ngram AS nngX ON nngX.node_id = n.id\n",
" INNER JOIN\n",
" node_ngram AS ngX ON ngX.id = nngX.ngram_id\n",
" WHERE\n",
" n.parent_id = %s\n",
" GROUP BY\n",
" ngX.terms\n",
" ORDER BY\n",
" occurrences DESC\n",
" LIMIT\n",
" 20\n",
"\"\"\")'''\n",
"cursor.execute(\"\"\"\n",
" SELECT\n",
" n.id\n",
" FROM\n",
" node_node AS n\n",
" INNER JOIN\n",
" node_node_ngram AS nngx ON nngx.node_id = n.id\n",
" INNER JOIN\n",
" node_nodengramngram AS nngng ON nngng.ngramx_id = nngx.ngram_id\n",
" INNER JOIN\n",
" node_node_ngram AS nngy ON nngy.id = nngng.ngramy_id\n",
" WHERE\n",
" n.parent_id = %s\n",
"\"\"\", [corpus.id])\n",
"#\u00a0\"\"\" % [listSynonymes.id])\n",
"\n",
"while True:\n",
" row = cursor.fetchone()\n",
" if row is None:\n",
" break\n",
" print(row)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 26
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"Cooccurrences"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cursor.execute(\"\"\"\n",
" SELECT\n",
" COUNT(*) AS cooccurrences,\n",
" ngX.terms,\n",
" ngY.terms\n",
" FROM\n",
" node_node AS n\n",
" \n",
" INNER JOIN\n",
" node_node_ngram AS nngX ON nngX.node_id = n.id\n",
" INNER JOIN\n",
" node_ngram AS ngX ON ngX.id = nngX.ngram_id\n",
" \n",
" INNER JOIN\n",
" node_node_ngram AS nngY ON nngY.node_id = n.id\n",
" INNER JOIN\n",
" node_ngram AS ngY ON ngY.id = nngY.ngram_id\n",
" \n",
" WHERE\n",
" n.parent_id = %s\n",
" AND\n",
" nngX.ngram_id in (select id from node_node_ngram WHERE node_id = 61298 )\n",
" AND\n",
" nngY.ngram_id in (select id from node_node_ngram WHERE node_id = 61298 )\n",
" AND\n",
" nngX.ngram_id <> nngY.ngram_id\n",
" \n",
" GROUP BY\n",
" ngX.id,\n",
" ngX.terms,\n",
" ngY.id,\n",
" ngY.terms\n",
" ORDER BY\n",
" cooccurrences DESC\n",
" LIMIT\n",
" 200\n",
"\"\"\", [corpus.id])\n",
"\n",
"while True:\n",
" row = cursor.fetchone()\n",
" if row is None:\n",
" break\n",
" print(row)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cursor.execute(\"\"\"\n",
" SELECT\n",
" COUNT(*) AS cooccurrences,\n",
" ngX.terms,\n",
" ngY.terms\n",
" FROM\n",
" node_node AS n\n",
" \n",
" INNER JOIN\n",
" node_node_ngram AS nngX ON nngX.node_id = n.id\n",
" INNER JOIN\n",
" node_ngram AS ngX ON ngX.id = nngX.ngram_id\n",
" \n",
" INNER JOIN\n",
" node_node_ngram AS nngY ON nngY.node_id = n.id\n",
" INNER JOIN\n",
" node_ngram AS ngY ON ngY.id = nngY.ngram_id\n",
"\n",
" WHERE\n",
" n.parent_id = %s\n",
" AND\n",
" nngX.ngram_id <> nngY.ngram_id\n",
" \n",
" GROUP BY\n",
" ngX.id,\n",
" ngX.terms,\n",
" ngY.id,\n",
" ngY.terms\n",
" ORDER BY\n",
" cooccurrences DESC\n",
" LIMIT\n",
" 20\n",
"\"\"\", [corpus.id])\n",
"\n",
"while True:\n",
" row = cursor.fetchone()\n",
" if row is None:\n",
" break\n",
" print(row)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
"ERROR: An unexpected error occurred while tokenizing input\n",
"The following traceback may be corrupted or invalid\n",
"The error message is: ('EOF in multi-line string', (1, 0))\n",
"\n"
]
},
{
"ename": "OperationalError",
"evalue": "arr\u00eat des connexions suite \u00e0 la demande de l'administrateur\nSSL connection has been closed unexpectedly\n",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mOperationalError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-11-752593da5735>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 31\u001b[0m \u001b[0mLIMIT\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 32\u001b[0m \u001b[1;36m20\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 33\u001b[1;33m \"\"\", [corpus.id])\n\u001b[0m\u001b[0;32m 34\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 35\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m 67\u001b[0m \u001b[0mstart\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 68\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 69\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mCursorDebugWrapper\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 70\u001b[0m \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 71\u001b[0m \u001b[0mstop\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 52\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 53\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 54\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 55\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mexecutemany\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparam_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/utils.py\u001b[0m in \u001b[0;36m__exit__\u001b[1;34m(self, exc_type, exc_value, traceback)\u001b[0m\n\u001b[0;32m 97\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdj_exc_type\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mDataError\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mIntegrityError\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 98\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrapper\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merrors_occurred\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 99\u001b[1;33m \u001b[0msix\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreraise\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdj_exc_type\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdj_exc_value\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtraceback\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 100\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 101\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/utils/six.py\u001b[0m in \u001b[0;36mreraise\u001b[1;34m(tp, value, tb)\u001b[0m\n\u001b[0;32m 547\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mreraise\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtb\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 548\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__traceback__\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mtb\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 549\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 550\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 551\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 52\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 53\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 54\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 55\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mexecutemany\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparam_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mOperationalError\u001b[0m: arr\u00eat des connexions suite \u00e0 la demande de l'administrateur\nSSL connection has been closed unexpectedly\n"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -11,7 +11,7 @@ def create_blacklist(user, corpus): ...@@ -11,7 +11,7 @@ def create_blacklist(user, corpus):
def create_synonymes(user, corpus): def create_synonymes(user, corpus):
pass pass
def create_whitelist(user, corpus): def create_whitelist(user, corpus, size=100):
cursor = connection.cursor() cursor = connection.cursor()
try: try:
...@@ -60,16 +60,16 @@ def create_whitelist(user, corpus): ...@@ -60,16 +60,16 @@ def create_whitelist(user, corpus):
ORDER BY ORDER BY
occurrences DESC occurrences DESC
LIMIT LIMIT
100 %d
; ;
""" % (white_list.id, corpus.id, type_document.id) """ % (white_list.id, corpus.id, type_document.id, size)
cursor.execute(query_whitelist) cursor.execute(query_whitelist)
return white_list return white_list
#def create_cooc(user, corpus, whitelist, blacklist, synonymes): #def create_cooc(user, corpus, whitelist, blacklist, synonymes):
def create_cooc(user=None, corpus=None, whitelist=None, size=400): def create_cooc(user=None, corpus=None, whitelist=None, size=150):
cursor = connection.cursor() cursor = connection.cursor()
try: try:
...@@ -133,7 +133,7 @@ def create_cooc(user=None, corpus=None, whitelist=None, size=400): ...@@ -133,7 +133,7 @@ def create_cooc(user=None, corpus=None, whitelist=None, size=400):
cursor.execute(query_cooc) cursor.execute(query_cooc)
return cooc return cooc
def get_cooc(request=None, corpus_id=None, cooc_id=None, type="node_link"): def get_cooc(request=None, corpus_id=None, cooc_id=None, type="node_link", n=150):
import pandas as pd import pandas as pd
from copy import copy from copy import copy
import numpy as np import numpy as np
...@@ -152,9 +152,9 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type="node_link"): ...@@ -152,9 +152,9 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type="node_link"):
if Node.objects.filter(type=type_cooc, parent=corpus).first() is None: if Node.objects.filter(type=type_cooc, parent=corpus).first() is None:
print("Coocurrences do not exist yet, create it.") print("Coocurrences do not exist yet, create it.")
whitelist = create_whitelist(request.user, corpus) whitelist = create_whitelist(request.user, corpus, size=n)
cooccurrence_node = create_cooc(user=request.user, corpus=corpus, whitelist=whitelist) cooccurrence_node = create_cooc(user=request.user, corpus=corpus, whitelist=whitelist, size=n)
print(cooccurrence_matrix.id, "Cooc created") print(cooccurrence_node.id, "Cooc created")
else: else:
cooccurrence_node = Node.objects.filter(type=type_cooc, parent=corpus).first() cooccurrence_node = Node.objects.filter(type=type_cooc, parent=corpus).first()
...@@ -177,7 +177,7 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type="node_link"): ...@@ -177,7 +177,7 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type="node_link"):
threshold = min(x.max(axis=1)) threshold = min(x.max(axis=1))
matrix_filtered = np.where(x >= threshold, 1, 0) matrix_filtered = np.where(x >= threshold, 1, 0)
#matrix_filtered = np.where(x > threshold, x, 0) #matrix_filtered = np.where(x > threshold, x, 0)
#matrix_filtered = matrix_filtered.resize((90,90))
G = nx.from_numpy_matrix(matrix_filtered) G = nx.from_numpy_matrix(matrix_filtered)
G = nx.relabel_nodes(G, dict(enumerate([ labels[label] for label in list(df.columns)]))) G = nx.relabel_nodes(G, dict(enumerate([ labels[label] for label in list(df.columns)])))
#G = nx.relabel_nodes(G, dict(enumerate(df.columns))) #G = nx.relabel_nodes(G, dict(enumerate(df.columns)))
...@@ -189,22 +189,37 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type="node_link"): ...@@ -189,22 +189,37 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type="node_link"):
partition = best_partition(G) partition = best_partition(G)
for node in G.nodes():
try:
#node,type(labels[node])
G.node[node]['label'] = node
G.node[node]['name'] = node
G.node[node]['size'] = weight[node]
G.node[node]['group'] = partition[node]
# G.node[node]['color'] = '19,180,300'
G.add_edge(node, partition[node], weight=3)
except Exception as error:
print(error)
if type == "node_link": if type == "node_link":
for node in G.nodes():
try:
#node,type(labels[node])
G.node[node]['label'] = node
G.node[node]['name'] = node
G.node[node]['size'] = weight[node]
G.node[node]['group'] = partition[node]
#G.add_edge(node, partition[node], weight=3)
# G.node[node]['color'] = '19,180,300'
except Exception as error:
print(error)
data = json_graph.node_link_data(G) data = json_graph.node_link_data(G)
elif type == "adjacency": elif type == "adjacency":
data = json_graph.adjacency_data(G) for node in G.nodes():
try:
#node,type(labels[node])
#G.node[node]['label'] = node
G.node[node]['name'] = node
#G.node[node]['size'] = weight[node]
G.node[node]['group'] = partition[node]
#G.add_edge(node, partition[node], weight=3)
# G.node[node]['color'] = '19,180,300'
except Exception as error:
print(error)
data = json_graph.node_link_data(G)
# data = json_graph.node_link_data(G, attrs={\ # data = json_graph.node_link_data(G, attrs={\
# 'source':'source',\ # 'source':'source',\
# 'target':'target',\ # 'target':'target',\
......
...@@ -3,33 +3,104 @@ from django.core.exceptions import PermissionDenied, SuspiciousOperation ...@@ -3,33 +3,104 @@ from django.core.exceptions import PermissionDenied, SuspiciousOperation
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
from django.db.models import Avg, Max, Min, Count, Sum from django.db.models import Avg, Max, Min, Count, Sum
from node.models import NodeType, Node, Node_Ngram, Ngram
from django.db import connection
# from node.models import Language, ResourceType, Resource # from node.models import Language, ResourceType, Resource
# from node.models import Node, NodeType, Node_Resource, Project, Corpus # from node.models import Node, NodeType, Node_Resource, Project, Corpus
# from node.admin import CorpusForm, ProjectForm, ResourceForm
from sqlalchemy import text, distinct
_sql_cte = ''' from sqlalchemy.sql import func
WITH RECURSIVE cte ("depth", "path", "ordering", "id") AS ( from sqlalchemy.orm import aliased
SELECT 1 AS depth,
array[T."id"] AS path, import node.models
array[T."id"] AS ordering, NodeType = node.models.NodeType.sa
T."id" Node = node.models.Node.sa
FROM %s T Node_Ngram = node.models.Node_Ngram.sa
WHERE T."parent_id" IS NULL Ngram = node.models.Ngram.sa
Metadata = node.models.Metadata.sa
UNION ALL Node_Metadata = node.models.Node_Metadata.sa
SELECT cte.depth + 1 AS depth, # for debugging only
cte.path || T."id", def literalquery(statement, dialect=None):
cte.ordering || array[T."id"], """Generate an SQL expression string with bound parameters rendered inline
T."id" for the given SQLAlchemy statement.
FROM %s T
JOIN cte ON T."parent_id" = cte."id" WARNING: This method of escaping is insecure, incomplete, and for debugging
) purposes only. Executing SQL statements with inline-rendered user values is
''' % (Node._meta.db_table, Node._meta.db_table, ) extremely insecure.
"""
from datetime import datetime
import sqlalchemy.orm
if isinstance(statement, sqlalchemy.orm.Query):
if dialect is None:
dialect = statement.session.get_bind(
statement._mapper_zero_or_none()
).dialect
statement = statement.statement
if dialect is None:
dialect = getattr(statement.bind, 'dialect', None)
if dialect is None:
from sqlalchemy.dialects import mysql
dialect = mysql.dialect()
Compiler = type(statement._compiler(dialect))
class LiteralCompiler(Compiler):
visit_bindparam = Compiler.render_literal_bindparam
def render_literal_value(self, value, type_):
return "'" + str(value) + "'"
# if isinstance(value, (float, int)):
# return str(value)
# elif isinstance(value, datetime):
# return repr(str(value))
# else: # fallback
# value = super(LiteralCompiler, self).render_literal_value(
# value, type_,
# )
# if isinstance(value, unicode):
# return value.encode('UTF-8')
# else:
# return value
return LiteralCompiler(dialect, statement)
# these might be used for SQLAlchemy
def get_session():
import sqlalchemy.orm
from django.db import connections
from sqlalchemy.orm import sessionmaker
from aldjemy.core import get_engine
alias = 'default'
connection = connections[alias]
engine = get_engine()
Session = sessionmaker(bind=engine)
return Session()
def get_connection():
from aldjemy.core import get_engine
engine = get_engine()
return engine.connect()
# for recursive queries
# _sql_cte = '''
# WITH RECURSIVE cte ("depth", "path", "ordering", "id") AS (
# SELECT 1 AS depth,
# array[T."id"] AS path,
# array[T."id"] AS ordering,
# T."id"
# FROM %s T
# WHERE T."parent_id" IS NULL
# UNION ALL
# SELECT cte.depth + 1 AS depth,
# cte.path || T."id",
# cte.ordering || array[T."id"],
# T."id"
# FROM %s T
# JOIN cte ON T."parent_id" = cte."id"
# )
# ''' % (Node._meta.db_table, Node._meta.db_table, )
def DebugHttpResponse(data): def DebugHttpResponse(data):
return HttpResponse('<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), )) return HttpResponse('<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), ))
...@@ -38,7 +109,7 @@ import json ...@@ -38,7 +109,7 @@ import json
def JsonHttpResponse(data, status=200): def JsonHttpResponse(data, status=200):
return HttpResponse( return HttpResponse(
content = json.dumps(data, indent=4), content = json.dumps(data, indent=4),
content_type = "application/json", content_type = 'application/json; charset=utf-8',
status = status status = status
) )
Http400 = SuspiciousOperation Http400 = SuspiciousOperation
...@@ -57,15 +128,364 @@ def CsvHttpResponse(data, headers=None, status=200): ...@@ -57,15 +128,364 @@ def CsvHttpResponse(data, headers=None, status=200):
writer.writerow(row) writer.writerow(row)
return response return response
Http400 = SuspiciousOperation
Http403 = PermissionDenied
_ngrams_order_columns = { _ngrams_order_columns = {
"frequency" : "-count", "frequency" : "-count",
"alphabetical" : "terms" "alphabetical" : "terms"
} }
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework.exceptions import APIException
_APIException = APIException
class APIException(_APIException):
def __init__(self, message, code=500):
self.status_code = code
self.detail = message
_operators = {
"=": lambda field, value: (field == value),
"!=": lambda field, value: (field != value),
"<": lambda field, value: (field < value),
">": lambda field, value: (field > value),
"<=": lambda field, value: (field <= value),
">=": lambda field, value: (field >= value),
"in": lambda field, value: (field.in_(value)),
"contains": lambda field, value: (field.contains(value)),
"startswith": lambda field, value: (field.startswith(value)),
}
from rest_framework.decorators import api_view
@api_view(('GET',))
def Root(request, format=None):
return Response({
'users': reverse('user-list', request=request, format=format),
'snippets': reverse('snippet-list', request=request, format=format)
})
class NodesChildrenMetatadata(APIView):
def get(self, request, node_id):
# query metadata keys
ParentNode = aliased(Node)
metadata_query = (Metadata
.query(Metadata)
.join(Node_Metadata, Node_Metadata.metadata_id == Metadata.id)
.join(Node, Node.id == Node_Metadata.node_id)
.filter(Node.parent_id == node_id)
.group_by(Metadata)
)
# build a collection with the metadata keys
collection = []
for metadata in metadata_query:
valuesCount = 0
values = None
# count values and determine their span
values_count = None
values_from = None
values_to = None
if metadata.type != 'text':
value_column = getattr(Node_Metadata, 'value_' + metadata.type)
node_metadata_query = (Node_Metadata
.query(value_column)
.join(Node, Node.id == Node_Metadata.node_id)
.filter(Node.parent_id == node_id)
.filter(Node_Metadata.metadata_id == metadata.id)
.group_by(value_column)
.order_by(value_column)
)
values_count = node_metadata_query.count()
# values_count, values_from, values_to = node_metadata_query.first()
# if there is less than 32 values, retrieve them
values = None
if isinstance(values_count, int) and values_count <= 48:
values = [row[0] for row in node_metadata_query.all()]
if metadata.type == 'datetime':
values = []
values = map(lambda x: x.isoformat(), values)
# adding this metadata to the collection
collection.append({
'key': metadata.name,
'type': metadata.type,
'values': values,
'valuesFrom': values_from,
'valuesTo': values_to,
'valuesCount': values_count,
})
return JsonHttpResponse({
'collection': collection,
})
class NodesChildrenQueries(APIView):
def _parse_filter(self, filter):
# validate filter keys
filter_keys = {'field', 'operator', 'value'}
if set(filter) != filter_keys:
raise APIException('Every filter should have exactly %d keys: "%s"'% (len(filter_keys), '", "'.join(filter_keys)), 400)
field, operator, value = filter['field'], filter['operator'], filter['value']
# validate operator
if operator not in _operators:
raise APIException('Invalid operator: "%s"'% (operator, ), 400)
# validate value, depending on the operator
if operator == 'in':
if not isinstance(value, list):
raise APIException('Parameter "value" should be an array when using operator "%s"'% (operator, ), 400)
for v in value:
if not isinstance(v, (int, float, str)):
raise APIException('Parameter "value" should be an array of numbers or strings when using operator "%s"'% (operator, ), 400)
else:
if not isinstance(value, (int, float, str)):
raise APIException('Parameter "value" should be a number or string when using operator "%s"'% (operator, ), 400)
# parse field
field_objects = {
'metadata': None,
'ngrams': ['terms', 'n'],
}
field = field.split('.')
if len(field) < 2 or field[0] not in field_objects:
raise APIException('Parameter "field" should be a in the form "object.key", where "object" takes one of the following values: "%s". "%s" was found instead' % ('", "'.join(field_objects), '.'.join(field)), 400)
if field_objects[field[0]] is not None and field[1] not in field_objects[field[0]]:
raise APIException('Invalid key for "%s" in parameter "field", should be one of the following values: "%s". "%s" was found instead' % (field[0], '", "'.join(field_objects[field[0]]), field[1]), 400)
# return value
return field, _operators[operator], value
def post(self, request, node_id):
""" Query the children of the given node.
Example #1
----------
Input:
{
"pagination": {
"offset": 0,
"limit": 10
},
"retrieve": {
"type": "fields",
"list": ["name", "metadata.publication_date"]
},
"filters": [
{"field": "metadata.publication_date", "operator": ">", "value": "2010-01-01 00:00:00"},
{"field": "ngrams.terms", "operator": "in", "value": ["bee", "bees"]}
],
"sort": ["name"]
}
Output:
{
"pagination": {
"offset": 0,
"limit": 10
},
"retrieve": {
"type": "fields",
"list": ["name", "metadata.publication_date"]
},
"results": [
{"id": 12, "name": "A document about bees", "publication_date": "2014-12-03 10:00:00"},
...,
]
}
"""
metadata_aliases = {}
# validate query
query_fields = {'pagination', 'retrieve', 'sort', 'filters'}
for key in request.DATA:
if key not in query_fields:
raise APIException('Unrecognized field "%s" in query object. Accepted fields are: "%s"' % (key, '", "'.join(query_fields)), 400)
# selecting info
if 'retrieve' not in request.DATA:
raise APIException('The query should have a "retrieve" parameter.', 400)
retrieve = request.DATA['retrieve']
retrieve_types = {'fields', 'aggregates'}
if 'type' not in retrieve:
raise APIException('In the query\'s "retrieve" parameter, a "type" should be specified. Possible values are: "%s".' % ('", "'.join(retrieve_types), ), 400)
if 'list' not in retrieve or not isinstance(retrieve['list'], list):
raise APIException('In the query\'s "retrieve" parameter, a "list" should be provided as an array', 400)
if retrieve['type'] not in retrieve_types:
raise APIException('Unrecognized "type": "%s" in the query\'s "retrieve" parameter. Possible values are: "%s".' % (retrieve['type'], '", "'.join(retrieve_types), ), 400)
if retrieve['type'] == 'fields':
fields_names = ['id'] + list(set(retrieve['list']) - {'id'})
elif retrieve['type'] == 'aggregates':
fields_names = list(set(retrieve['list']))
fields_list = []
for field_name in fields_names:
split_field_name = field_name.split('.')
if split_field_name[0] == 'metadata':
metadata = Metadata.query(Metadata).filter(Metadata.name == split_field_name[1]).first()
if metadata is None:
metadata_query = Metadata.query(Metadata.name).order_by(Metadata.name)
metadata_names = [metadata.name for metadata in metadata_query.all()]
raise APIException('Invalid key for "%s" in parameter "field", should be one of the following values: "%s". "%s" was found instead' % (field[0], '", "'.join(metadata_names), field[1]), 400)
# check or create Node_Metadata alias; join if necessary
if metadata.id in metadata_aliases:
metadata_alias = metadata_aliases[metadata.id]
else:
metadata_alias = metadata_aliases[metadata.id] = aliased(Node_Metadata)
field = getattr(metadata_alias, 'value_' + metadata.type)
# operation on field
if len(split_field_name) > 2:
# datetime truncation
if metadata.type == 'datetime':
datepart = split_field_name[2]
accepted_dateparts = ['year', 'month', 'day', 'hour', 'minute']
if datepart not in accepted_dateparts:
raise APIException('Invalid date truncation for "%s": "%s". Accepted values are: "%s".' % (split_field_name[1], split_field_name[2], '", "'.join(accepted_dateparts), ), 400)
# field = extract(datepart, field)
field = func.date_trunc(datepart, field)
# field = func.date_trunc(text('"%s"'% (datepart,)), field)
else:
authorized_field_names = {'id', 'name', }
if retrieve['type'] == 'aggregates' and field_name == 'count':
field = func.count(Node.id)
elif field_name not in authorized_field_names:
raise APIException('Unrecognized "field": "%s" in the query\'s "retrieve" parameter. Possible values are: "%s".' % (field_name, '", "'.join(authorized_field_names), ))
else:
field = getattr(Node, field_name)
fields_list.append(
field.label(field_name)
)
# starting the query!
document_type_id = NodeType.query(NodeType.id).filter(NodeType.name == 'Document').scalar()
query = (get_session()
.query(*fields_list)
.select_from(Node)
.filter(Node.type_id == document_type_id)
.filter(Node.parent_id == node_id)
)
# join aliases
for metadata_id, metadata_alias in metadata_aliases.items():
query = (query
.join(metadata_alias, metadata_alias.node_id == Node.id)
.filter(metadata_alias.metadata_id == metadata_id)
)
# filtering
for filter in request.DATA.get('filters', []):
# parameters extraction & validation
field, operator, value = self._parse_filter(filter)
#
if field[0] == 'metadata':
# which metadata?
metadata = Metadata.query(Metadata).filter(Metadata.name == field[1]).first()
if metadata is None:
metadata_query = Metadata.query(Metadata.name).order_by(Metadata.name)
metadata_names = [metadata.name for metadata in metadata_query.all()]
raise APIException('Invalid key for "%s" in parameter "field", should be one of the following values: "%s". "%s" was found instead' % (field[0], '", "'.join(metadata_names), field[1]), 400)
# check or create Node_Metadata alias; join if necessary
if metadata.id in metadata_aliases:
metadata_alias = metadata_aliases[metadata.id]
else:
metadata_alias = metadata_aliases[metadata.id] = aliased(Node_Metadata)
query = (query
.join(metadata_alias, metadata_alias.node_id == Node.id)
.filter(metadata_alias.metadata_id == metadata.id)
)
# filter query
query = query.filter(operator(
getattr(metadata_alias, 'value_' + metadata.type),
value
))
elif field[0] == 'ngrams':
query = query.filter(
Node.id.in_(Node_Metadata
.query(Node_Ngram.node_id)
.filter(Node_Ngram.ngram_id == Ngram.id)
.filter(operator(
getattr(Ngram, field[1]),
value
))
)
)
# TODO: date_trunc (psql) -> index also
# groupping
authorized_aggregates = {'count': func.count(Node.id)}
for field_name in fields_names:
if field_name not in authorized_aggregates:
# query = query.group_by(text(field_name))
query = query.group_by('"%s"' % (field_name, ))
# sorting
sort_fields_names = request.DATA.get('sort', ['id'])
if not isinstance(sort_fields_names, list):
raise APIException('The query\'s "sort" parameter should be an array', 400)
sort_fields_list = []
for sort_field_name in sort_fields_names:
try:
desc = sort_field_name[0] == '-'
if sort_field_name[0] in {'-', '+'}:
sort_field_name = sort_field_name[1:]
field = fields_list[fields_names.index(sort_field_name)]
if desc:
field = field.desc()
sort_fields_list.append(field)
except:
raise APIException('Unrecognized field "%s" in the query\'s "sort" parameter. Accepted values are: "%s"' % (sort_field_name, '", "'.join(fields_names)), 400)
query = query.order_by(*sort_fields_list)
# pagination
pagination = request.DATA.get('pagination', {})
for key, value in pagination.items():
if key not in {'limit', 'offset'}:
raise APIException('Unrecognized parameter in "pagination": "%s"' % (key, ), 400)
if not isinstance(value, int):
raise APIException('In "pagination", "%s" should be an integer.' % (key, ), 400)
if 'offset' not in pagination:
pagination['offset'] = 0
if 'limit' not in pagination:
pagination['limit'] = 0
# respond to client!
# return DebugHttpResponse(str(query))
# return DebugHttpResponse(literalquery(query))
results = [
dict(zip(fields_names, row))
for row in (
query[pagination["offset"]:pagination["offset"]+pagination["limit"]]
if pagination['limit']
else query[pagination["offset"]:]
)
]
pagination["total"] = query.count()
return Response({
"pagination": pagination,
"retrieve": fields_names,
"sorted": sort_fields_names,
"results": results,
}, 201)
class NodesController: class NodesController:
@classmethod @classmethod
...@@ -111,122 +531,41 @@ class CorpusController: ...@@ -111,122 +531,41 @@ class CorpusController:
@classmethod @classmethod
def ngrams(cls, request, corpus_id): def ngrams(cls, request, node_id):
# parameters retrieval and validation # parameters retrieval and validation
corpus = cls.get(corpus_id) startwith = request.GET.get('startwith', '').replace("'", "\\'")
order = request.GET.get('order', 'frequency')
if order not in _ngrams_order_columns: # build query
raise ValidationError('The order parameter should take one of the following values: ' + ', '.join(_ngrams_order_columns), 400) ParentNode = aliased(Node)
order_column = _ngrams_order_columns[order] query = (Ngram
# query building .query(Ngram.terms, func.count('*'))
cursor = connection.cursor() .join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
cursor.execute(_sql_cte + ''' .join(Node, Node.id == Node_Ngram.node_id)
SELECT ngram.terms, COUNT(*) AS occurrences .join(ParentNode, ParentNode.id == Node.parent_id)
FROM cte .filter(ParentNode.id == node_id)
INNER JOIN %s AS node ON node.id = cte.id .filter(Ngram.terms.like('%s%%' % (startwith, )))
INNER JOIN %s AS nodetype ON nodetype.id = node.type_id .group_by(Ngram.terms)
INNER JOIN %s AS node_ngram ON node_ngram.node_id = node.id .order_by(func.count('*').desc())
INNER JOIN %s AS ngram ON ngram.id = node_ngram.ngram_id )
WHERE (NOT cte.id = \'%d\') AND (\'%d\' = ANY(cte."path"))
AND nodetype.name = 'Document'
AND ngram.terms LIKE '%s%%'
GROUP BY ngram.terms
ORDER BY occurrences DESC
''' % (
Node._meta.db_table,
NodeType._meta.db_table,
Node_Ngram._meta.db_table,
Ngram._meta.db_table,
corpus.id,
corpus.id,
request.GET.get('startwith', '').replace("'", "\\'"),
))
# # response building
# return JsonHttpResponse({
# "list" : [row[0] for row in cursor.fetchall()],
# })
# response building # response building
format = request.GET.get('format', 'json') format = request.GET.get('format', 'json')
if format == 'json': if format == 'json':
return JsonHttpResponse({ return JsonHttpResponse({
"list": [{ "collection": [{
'terms': row[0], 'terms': row[0],
'occurrences': row[1] 'occurrences': row[1]
} for row in cursor.fetchall()], } for row in query.all()],
}) })
elif format == 'csv': elif format == 'csv':
return CsvHttpResponse( return CsvHttpResponse(
[['terms', 'occurences']] + [row for row in cursor.fetchall()] [['terms', 'occurences']] + [row for row in query.all()]
) )
else: else:
raise ValidationError('Unrecognized "format=%s", should be "csv" or "json"' % (format, )) raise ValidationError('Unrecognized "format=%s", should be "csv" or "json"' % (format, ))
@classmethod
def metadata(cls, request, corpus_id):
# parameters retrieval and validation
corpus = cls.get(corpus_id)
# query building
cursor = connection.cursor()
# cursor.execute(_sql_cte + '''
# SELECT key
# FROM (
# SELECT skeys(metadata) AS key, COUNT(*)
# FROM cte
# INNER JOIN %s AS node ON node.id = cte.id
# WHERE (NOT cte.id = \'%d\') AND (\'%d\' = ANY(cte."path"))
# ) AS keys
# GROUP BY key
# ORDER BY COUNT(*) DESC
# ''' % (Node._meta.db_table, corpus.id, corpus.id, ))
cursor.execute('''
SELECT key, COUNT(*) AS count, (
SELECT COUNT(DISTINCT metadata->key) FROM %s
) AS values
FROM (
SELECT skeys(metadata) AS key
FROM %s
WHERE parent_id = \'%d\'
) AS keys
GROUP BY key
ORDER BY count
''' % (Node._meta.db_table, Node._meta.db_table, corpus.id, ))
# response building
collection = []
for row in cursor.fetchall():
type = 'string'
key = row[0]
split_key = key.split('_')
name = split_key[0]
if len(split_key) == 2:
if split_key[1] == 'date':
name = split_key[0]
type = 'datetime'
elif row[0] == 'language_fullname':
name = 'language'
type = 'string'
else:
continue
values = None
if row[2] < 32:
cursor.execute('''
SELECT DISTINCT metadata->'%s'
FROM %s
WHERE parent_id = %s
AND metadata ? '%s'
ORDER BY metadata->'%s'
''' % (key, Node._meta.db_table, corpus.id, key, key, ))
values = [row[0] for row in cursor.fetchall()]
collection.append({
'key': key,
'text': name,
'documents': row[1],
'valuesCount': row[2],
'values': values,
'type': type,
})
return JsonHttpResponse(collection)
@classmethod @classmethod
def data(cls, request, corpus_id): def data(cls, request, corpus_id):
# parameters retrieval and validation # parameters retrieval and validation
......
...@@ -68,6 +68,8 @@ INSTALLED_APPS = ( ...@@ -68,6 +68,8 @@ INSTALLED_APPS = (
'ngram', 'ngram',
'django_hstore', 'django_hstore',
'djcelery', 'djcelery',
'aldjemy',
'rest_framework',
) )
MIDDLEWARE_CLASSES = ( MIDDLEWARE_CLASSES = (
......
...@@ -35,19 +35,21 @@ urlpatterns = patterns('', ...@@ -35,19 +35,21 @@ urlpatterns = patterns('',
# Getting data # Getting data
url(r'^chart/corpus/(\d+)/data.csv$', views.send_csv), url(r'^chart/corpus/(\d+)/data.csv$', views.send_csv),
url(r'^corpus/(\d+)/node_link.json$', views.node_link), url(r'^corpus/(\d+)/node_link.json$', views.node_link),
url(r'^corpus/(\d+)/adjacency.json$', views.adjacency), url(r'^corpus/(\d+)/adjacency.json$', views.node_link),
url(r'^api$', gargantext_web.api.Root),
url(r'^api/nodes/(\d+)/children/metadata$', gargantext_web.api.NodesChildrenMetatadata.as_view()),
url(r'^api/nodes/(\d+)/children/queries$', gargantext_web.api.NodesChildrenQueries.as_view()),
# REST views
url(r'^api/nodes$', gargantext_web.api.NodesController.get), url(r'^api/nodes$', gargantext_web.api.NodesController.get),
url(r'^api/corpus/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams), url(r'^api/nodes/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),
url(r'^api/corpus/(\d+)/metadata$', gargantext_web.api.CorpusController.metadata), url(r'^api/nodes/(\d+)/data$', gargantext_web.api.CorpusController.data),
url(r'^api/corpus/(\d+)/data$', gargantext_web.api.CorpusController.data),
# Tests (to be removed soon)
url(r'^graph-it$', views.graph_it), url(r'^graph-it$', views.graph_it),
url(r'^ngrams$', views.ngrams), url(r'^ngrams$', views.ngrams),
) )
from django.conf import settings from django.conf import settings
if settings.DEBUG: if settings.DEBUG:
urlpatterns += patterns('', urlpatterns += patterns('',
......
...@@ -367,7 +367,6 @@ def corpus(request, project_id, corpus_id): ...@@ -367,7 +367,6 @@ def corpus(request, project_id, corpus_id):
return HttpResponse(html) return HttpResponse(html)
def delete_project(request, node_id): def delete_project(request, node_id):
Node.objects.filter(id=node_id).all().delete() Node.objects.filter(id=node_id).all().delete()
return HttpResponseRedirect('/projects/') return HttpResponseRedirect('/projects/')
......
...@@ -30,3 +30,5 @@ sudo apt-get install libopenblas-dev ...@@ -30,3 +30,5 @@ sudo apt-get install libopenblas-dev
sudo apt-get install liblapack-dev sudo apt-get install liblapack-dev
source /srv/gargantext_env/bin/activate
pip install git+https://github.com/mathieurodic/aldjemy.git
...@@ -6,7 +6,7 @@ Pillow==2.5.3 ...@@ -6,7 +6,7 @@ Pillow==2.5.3
Pygments==1.6 Pygments==1.6
SQLAlchemy==0.9.8 SQLAlchemy==0.9.8
South==1.0 South==1.0
aldjemy==0.3.51 aldjemy==0.3.10
amqp==1.4.6 amqp==1.4.6
anyjson==0.3.3 anyjson==0.3.3
billiard==3.3.0.18 billiard==3.3.0.18
...@@ -26,13 +26,14 @@ django-hstore==1.3.1 ...@@ -26,13 +26,14 @@ django-hstore==1.3.1
django-mptt==0.6.1 django-mptt==0.6.1
django-nested-inlines==0.1 django-nested-inlines==0.1
django-treebeard==2.0 django-treebeard==2.0
djangorestframework==3.0.0
graphviz==0.4 graphviz==0.4
ipython==2.2.0 ipython==2.2.0
kombu==3.0.23 kombu==3.0.23
lxml==3.3.6 lxml==3.3.6
#matplotlib==1.4.0 matplotlib==1.4.0
networkx==1.9 networkx==1.9
#nltk==3.0a4 nltk==3.0a4
nose==1.3.4 nose==1.3.4
numpy==1.8.2 numpy==1.8.2
pandas==0.14.1 pandas==0.14.1
......
...@@ -75,6 +75,23 @@ class NodeQuerySet(CTENodeManager.CTEQuerySet): ...@@ -75,6 +75,23 @@ class NodeQuerySet(CTENodeManager.CTEQuerySet):
ngramscaches = NgramsCaches() ngramscaches = NgramsCaches()
for node in self: for node in self:
node.extract_ngrams(keys, ngramsextractorscache, ngramscaches) node.extract_ngrams(keys, ngramsextractorscache, ngramscaches)
def make_metadata_filterable(self):
metadata_cache = {metadata.name: metadata for metadata in Metadata.objects.all()}
data = []
for node in self:
print(node.id)
for key, value in node.metadata.items():
if key in metadata_cache:
metadata = metadata_cache[key]
if metadata.type == 'string':
value = value[:255]
data.append(Node_Metadata(**{
'node_id' : node.id,
'metadata_id' : metadata.id,
('value_'+metadata.type) : value,
}))
Node_Metadata.objects.bulk_create(data)
class NodeManager(CTENodeManager): class NodeManager(CTENodeManager):
"""Methods available from Node.object.""" """Methods available from Node.object."""
...@@ -85,6 +102,10 @@ class NodeManager(CTENodeManager): ...@@ -85,6 +102,10 @@ class NodeManager(CTENodeManager):
if name.startswith("_"): if name.startswith("_"):
raise AttributeError raise AttributeError
return getattr(self.get_queryset(), name, *args) return getattr(self.get_queryset(), name, *args)
class Metadata(models.Model):
name = models.CharField(max_length=32, db_index=True)
type = models.CharField(max_length=16, db_index=True)
class Node(CTENode): class Node(CTENode):
"""The node.""" """The node."""
...@@ -137,7 +158,7 @@ class Node(CTENode): ...@@ -137,7 +158,7 @@ class Node(CTENode):
return resource return resource
@current_app.task(filter=task_method) @current_app.task(filter=task_method)
def parse_resources(self): def parse_resources(self, verbose=False):
# parse all resources into a list of metadata # parse all resources into a list of metadata
metadata_list = [] metadata_list = []
for node_resource in self.node_resource.filter(parsed=False): for node_resource in self.node_resource.filter(parsed=False):
...@@ -151,22 +172,33 @@ class Node(CTENode): ...@@ -151,22 +172,33 @@ class Node(CTENode):
'europress_english' : EuropressFileParser, 'europress_english' : EuropressFileParser,
})[resource.type.name]() })[resource.type.name]()
metadata_list += parser.parse(str(resource.file)) metadata_list += parser.parse(str(resource.file))
# insert the new resources in the database! # retrieve info from the database
type = NodeType.objects.get(name='Document') type_id = NodeType.objects.get(name='Document').id
langages_cache = LanguagesCache() langages_cache = LanguagesCache()
Node.objects.bulk_create([ user_id = self.user.id
# insert the new resources in the database!
for i, metadata_values in enumerate(metadata_list):
if verbose:
print(i, end='\r', flush=True)
name = metadata_values.get('title', '')[:200]
language = langages_cache[metadata_values['language_iso2']] if 'language_iso2' in metadata_values else None,
if isinstance(language, tuple):
language = language[0]
Node( Node(
user = self.user, user_id = user_id,
type = type, type_id = type_id,
name = metadata['title'][0:199] if 'title' in metadata else '', name = name,
parent = self, parent = self,
language = langages_cache[metadata['language_iso2']] if 'language_iso2' in metadata else None, language_id = language.id if language else None,
metadata = metadata, metadata = metadata_values
) ).save()
for metadata in metadata_list
]) # make metadata filterable
self.children.all().make_metadata_filterable()
# mark the resources as parsed for this node # mark the resources as parsed for this node
self.node_resource.update(parsed=True) self.node_resource.update(parsed=True)
@current_app.task(filter=task_method) @current_app.task(filter=task_method)
def extract_ngrams(self, keys, ngramsextractorscache=None, ngramscaches=None): def extract_ngrams(self, keys, ngramsextractorscache=None, ngramscaches=None):
...@@ -203,6 +235,15 @@ class Node(CTENode): ...@@ -203,6 +235,15 @@ class Node(CTENode):
for ngram_text, weight in associations.items() for ngram_text, weight in associations.items()
]) ])
class Node_Metadata(models.Model):
node = models.ForeignKey(Node)
metadata = models.ForeignKey(Metadata)
value_int = models.IntegerField(null=True, db_index=True)
value_float = models.FloatField(null=True, db_index=True)
value_string = models.CharField(max_length=255, null=True, db_index=True)
value_datetime = models.DateTimeField(null=True, db_index=True)
value_text = models.TextField(null=True)
class Node_Resource(models.Model): class Node_Resource(models.Model):
node = models.ForeignKey(Node, related_name='node_resource') node = models.ForeignKey(Node, related_name='node_resource')
resource = models.ForeignKey(Resource) resource = models.ForeignKey(Resource)
......
...@@ -75,9 +75,10 @@ class LanguagesCache(defaultdict): ...@@ -75,9 +75,10 @@ class LanguagesCache(defaultdict):
self[str(language.iso2.lower())] = language self[str(language.iso2.lower())] = language
self[str(language.iso3.lower())] = language self[str(language.iso3.lower())] = language
self[str(language.fullname.lower())] = language self[str(language.fullname.lower())] = language
betterKey = key.strip().lower() if key not in self.keys():
self[key] = self[betterKey] if betterKey in self.keys() else None betterKey = key.strip().lower()
return self[betterKey] self[key] = self[betterKey] if betterKey in self.keys() else None
return self[key]
......
No preview for this file type
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
width="48px"
height="48px"
id="svg4362"
version="1.1"
inkscape:version="0.48.5 r10040"
sodipodi:docname="logo.svg">
<defs
id="defs4364" />
<sodipodi:namedview
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageopacity="0.0"
inkscape:pageshadow="2"
inkscape:zoom="5.6897594"
inkscape:cx="-11.235831"
inkscape:cy="3.8560006"
inkscape:current-layer="layer1"
showgrid="true"
inkscape:grid-bbox="true"
inkscape:document-units="px"
inkscape:window-width="1360"
inkscape:window-height="762"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:window-maximized="0" />
<metadata
id="metadata4367">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title></dc:title>
</cc:Work>
</rdf:RDF>
</metadata>
<g
id="layer1"
inkscape:label="Layer 1"
inkscape:groupmode="layer">
<rect
style="fill:#fffcfc;fill-opacity:1;stroke:none"
id="rect3755"
width="29.70249"
height="31.108515"
x="0"
y="-0.1566938"
inkscape:export-filename="/srv/gargantext/static/img/logo.png"
inkscape:export-xdpi="53"
inkscape:export-ydpi="53" />
<g
inkscape:export-ydpi="53.799999"
inkscape:export-xdpi="53.799999"
inkscape:export-filename="/srv/gargantext/static/img/logo.png"
style="fill:#ff8080;fill-opacity:0.82014388"
id="g3835"
transform="matrix(0.2422549,0,0,0.23374214,-49.789462,-7.9055988)">
<path
inkscape:export-ydpi="100"
inkscape:export-xdpi="100"
inkscape:export-filename="/home/alexandre/projets/gargantext.py/gargantext_core/shared/LogoSimple.png"
id="path3837"
d="m 206.24721,35.28586 0,129.5 67.78125,0 0,-8.625 c -9.86526,-0.47262 -18.57934,-2.63259 -25.5625,-6.28125 -18.65918,-9.74237 -29.875,-28.26535 -29.875,-49.1875 0,-31.71741 21.11877,-52.8149 55.4375,-55.1875 l 0,-10.21875 -67.78125,0 z m 67.78125,10.21875 0,8.5 c 1.74191,-0.16369 3.53543,-0.28125 5.37499,-0.28125 6.91081,0 13.295,1.44116 19.6875,4.15625 l 2.40625,2.875 2.59375,14.53125 9.6875,0 0,-25.375 c -11.40283,-3.03451 -22.61727,-4.65625 -33.15625,-4.65625 -2.24526,0 -4.44959,0.10177 -6.59374,0.25 z m 0,8.5 c -23.28864,2.18852 -37.65625,18.81513 -37.65625,45.562503 0,27.600037 14.44681,45.025437 37.65625,47.812497 l 0,-93.375 z m 0,93.375 0,8.78125 c 1.36224,0.0653 2.75177,0.0937 4.15624,0.0937 10.19344,0 22.1324,-1.88915 35.78125,-5.5625 l 0,-38.1875 2.9375,-2.21875 9.5,-0.8125 0,-6.5625 -43.21875,0 0,6.5625 12.28125,0.8125 2.9375,2.21875 0,33.21875 c -6.73804,1.4374 -12.61466,2.09375 -17.625,2.09375 -2.32322,0 -4.57592,-0.17643 -6.74999,-0.4375 z"
style="font-size:166.11251831px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#ff8080;fill-opacity:0.82014388;stroke:none;font-family:Bitstream Charter;-inkscape-font-specification:Bitstream Charter"
inkscape:connector-curvature="0" />
<path
inkscape:export-ydpi="100"
inkscape:export-xdpi="100"
transform="translate(611.62306,-400.10238)"
sodipodi:open="true"
sodipodi:end="6.1660663"
sodipodi:start="0"
d="m -312.87112,480.17926 c 0,4.97881 -4.03612,9.01493 -9.01493,9.01493 -4.97881,0 -9.01493,-4.03612 -9.01493,-9.01493 0,-4.97881 4.03612,-9.01493 9.01493,-9.01493 4.57131,0 8.41901,3.42153 8.95317,7.96152"
sodipodi:ry="9.0149298"
sodipodi:rx="9.0149298"
sodipodi:cy="480.17926"
sodipodi:cx="-321.88605"
id="path3839"
style="fill:#ff8080;fill-opacity:0.82014388;stroke:none"
sodipodi:type="arc" />
</g>
</g>
</svg>
{"nodes":[{"name":"Myriel","group":1},{"name":"Napoleon","group":1},{"name":"Mlle.Baptistine","group":1},{"name":"Mme.Magloire","group":1},{"name":"CountessdeLo","group":1},{"name":"Geborand","group":1},{"name":"Champtercier","group":1},{"name":"Cravatte","group":1},{"name":"Count","group":1},{"name":"OldMan","group":1},{"name":"Labarre","group":2},{"name":"Valjean","group":2},{"name":"Marguerite","group":3},{"name":"Mme.deR","group":2},{"name":"Isabeau","group":2},{"name":"Gervais","group":2},{"name":"Tholomyes","group":3},{"name":"Listolier","group":3},{"name":"Fameuil","group":3},{"name":"Blacheville","group":3},{"name":"Favourite","group":3},{"name":"Dahlia","group":3},{"name":"Zephine","group":3},{"name":"Fantine","group":3},{"name":"Mme.Thenardier","group":4},{"name":"Thenardier","group":4},{"name":"Cosette","group":5},{"name":"Javert","group":4},{"name":"Fauchelevent","group":0},{"name":"Bamatabois","group":2},{"name":"Perpetue","group":3},{"name":"Simplice","group":2},{"name":"Scaufflaire","group":2},{"name":"Woman1","group":2},{"name":"Judge","group":2},{"name":"Champmathieu","group":2},{"name":"Brevet","group":2},{"name":"Chenildieu","group":2},{"name":"Cochepaille","group":2},{"name":"Pontmercy","group":4},{"name":"Boulatruelle","group":6},{"name":"Eponine","group":4},{"name":"Anzelma","group":4},{"name":"Woman2","group":5},{"name":"MotherInnocent","group":0},{"name":"Gribier","group":0},{"name":"Jondrette","group":7},{"name":"Mme.Burgon","group":7},{"name":"Gavroche","group":8},{"name":"Gillenormand","group":5},{"name":"Magnon","group":5},{"name":"Mlle.Gillenormand","group":5},{"name":"Mme.Pontmercy","group":5},{"name":"Mlle.Vaubois","group":5},{"name":"Lt.Gillenormand","group":5},{"name":"Marius","group":8},{"name":"BaronessT","group":5},{"name":"Mabeuf","group":8},{"name":"Enjolras","group":8},{"name":"Combeferre","group":8},{"name":"Prouvaire","group":8},{"name":"Feuilly","group":8},{"name":"Courfeyrac","group":8},{"name":"Bahorel","group":8},{"name":"Bossuet","group":8},{"name":"Joly","group":8},{"name":"Grantaire","group":8},{"name":"MotherPlutarch","group":9},{"name":"Gueulemer","group":4},{"name":"Babet","group":4},{"name":"Claquesous","group":4},{"name":"Montparnasse","group":4},{"name":"Toussaint","group":5},{"name":"Child1","group":10},{"name":"Child2","group":10},{"name":"Brujon","group":4},{"name":"Mme.Hucheloup","group":8}],"links":[{"source":1,"target":0,"value":1},{"source":2,"target":0,"value":8},{"source":3,"target":0,"value":10},{"source":3,"target":2,"value":6},{"source":4,"target":0,"value":1},{"source":5,"target":0,"value":1},{"source":6,"target":0,"value":1},{"source":7,"target":0,"value":1},{"source":8,"target":0,"value":2},{"source":9,"target":0,"value":1},{"source":11,"target":10,"value":1},{"source":11,"target":3,"value":3},{"source":11,"target":2,"value":3},{"source":11,"target":0,"value":5},{"source":12,"target":11,"value":1},{"source":13,"target":11,"value":1},{"source":14,"target":11,"value":1},{"source":15,"target":11,"value":1},{"source":17,"target":16,"value":4},{"source":18,"target":16,"value":4},{"source":18,"target":17,"value":4},{"source":19,"target":16,"value":4},{"source":19,"target":17,"value":4},{"source":19,"target":18,"value":4},{"source":20,"target":16,"value":3},{"source":20,"target":17,"value":3},{"source":20,"target":18,"value":3},{"source":20,"target":19,"value":4},{"source":21,"target":16,"value":3},{"source":21,"target":17,"value":3},{"source":21,"target":18,"value":3},{"source":21,"target":19,"value":3},{"source":21,"target":20,"value":5},{"source":22,"target":16,"value":3},{"source":22,"target":17,"value":3},{"source":22,"target":18,"value":3},{"source":22,"target":19,"value":3},{"source":22,"target":20,"value":4},{"source":22,"target":21,"value":4},{"source":23,"target":16,"value":3},{"source":23,"target":17,"value":3},{"source":23,"target":18,"value":3},{"source":23,"target":19,"value":3},{"source":23,"target":20,"value":4},{"source":23,"target":21,"value":4},{"source":23,"target":22,"value":4},{"source":23,"target":12,"value":2},{"source":23,"target":11,"value":9},{"source":24,"target":23,"value":2},{"source":24,"target":11,"value":7},{"source":25,"target":24,"value":13},{"source":25,"target":23,"value":1},{"source":25,"target":11,"value":12},{"source":26,"target":24,"value":4},{"source":26,"target":11,"value":31},{"source":26,"target":16,"value":1},{"source":26,"target":25,"value":1},{"source":27,"target":11,"value":17},{"source":27,"target":23,"value":5},{"source":27,"target":25,"value":5},{"source":27,"target":24,"value":1},{"source":27,"target":26,"value":1},{"source":28,"target":11,"value":8},{"source":28,"target":27,"value":1},{"source":29,"target":23,"value":1},{"source":29,"target":27,"value":1},{"source":29,"target":11,"value":2},{"source":30,"target":23,"value":1},{"source":31,"target":30,"value":2},{"source":31,"target":11,"value":3},{"source":31,"target":23,"value":2},{"source":31,"target":27,"value":1},{"source":32,"target":11,"value":1},{"source":33,"target":11,"value":2},{"source":33,"target":27,"value":1},{"source":34,"target":11,"value":3},{"source":34,"target":29,"value":2},{"source":35,"target":11,"value":3},{"source":35,"target":34,"value":3},{"source":35,"target":29,"value":2},{"source":36,"target":34,"value":2},{"source":36,"target":35,"value":2},{"source":36,"target":11,"value":2},{"source":36,"target":29,"value":1},{"source":37,"target":34,"value":2},{"source":37,"target":35,"value":2},{"source":37,"target":36,"value":2},{"source":37,"target":11,"value":2},{"source":37,"target":29,"value":1},{"source":38,"target":34,"value":2},{"source":38,"target":35,"value":2},{"source":38,"target":36,"value":2},{"source":38,"target":37,"value":2},{"source":38,"target":11,"value":2},{"source":38,"target":29,"value":1},{"source":39,"target":25,"value":1},{"source":40,"target":25,"value":1},{"source":41,"target":24,"value":2},{"source":41,"target":25,"value":3},{"source":42,"target":41,"value":2},{"source":42,"target":25,"value":2},{"source":42,"target":24,"value":1},{"source":43,"target":11,"value":3},{"source":43,"target":26,"value":1},{"source":43,"target":27,"value":1},{"source":44,"target":28,"value":3},{"source":44,"target":11,"value":1},{"source":45,"target":28,"value":2},{"source":47,"target":46,"value":1},{"source":48,"target":47,"value":2},{"source":48,"target":25,"value":1},{"source":48,"target":27,"value":1},{"source":48,"target":11,"value":1},{"source":49,"target":26,"value":3},{"source":49,"target":11,"value":2},{"source":50,"target":49,"value":1},{"source":50,"target":24,"value":1},{"source":51,"target":49,"value":9},{"source":51,"target":26,"value":2},{"source":51,"target":11,"value":2},{"source":52,"target":51,"value":1},{"source":52,"target":39,"value":1},{"source":53,"target":51,"value":1},{"source":54,"target":51,"value":2},{"source":54,"target":49,"value":1},{"source":54,"target":26,"value":1},{"source":55,"target":51,"value":6},{"source":55,"target":49,"value":12},{"source":55,"target":39,"value":1},{"source":55,"target":54,"value":1},{"source":55,"target":26,"value":21},{"source":55,"target":11,"value":19},{"source":55,"target":16,"value":1},{"source":55,"target":25,"value":2},{"source":55,"target":41,"value":5},{"source":55,"target":48,"value":4},{"source":56,"target":49,"value":1},{"source":56,"target":55,"value":1},{"source":57,"target":55,"value":1},{"source":57,"target":41,"value":1},{"source":57,"target":48,"value":1},{"source":58,"target":55,"value":7},{"source":58,"target":48,"value":7},{"source":58,"target":27,"value":6},{"source":58,"target":57,"value":1},{"source":58,"target":11,"value":4},{"source":59,"target":58,"value":15},{"source":59,"target":55,"value":5},{"source":59,"target":48,"value":6},{"source":59,"target":57,"value":2},{"source":60,"target":48,"value":1},{"source":60,"target":58,"value":4},{"source":60,"target":59,"value":2},{"source":61,"target":48,"value":2},{"source":61,"target":58,"value":6},{"source":61,"target":60,"value":2},{"source":61,"target":59,"value":5},{"source":61,"target":57,"value":1},{"source":61,"target":55,"value":1},{"source":62,"target":55,"value":9},{"source":62,"target":58,"value":17},{"source":62,"target":59,"value":13},{"source":62,"target":48,"value":7},{"source":62,"target":57,"value":2},{"source":62,"target":41,"value":1},{"source":62,"target":61,"value":6},{"source":62,"target":60,"value":3},{"source":63,"target":59,"value":5},{"source":63,"target":48,"value":5},{"source":63,"target":62,"value":6},{"source":63,"target":57,"value":2},{"source":63,"target":58,"value":4},{"source":63,"target":61,"value":3},{"source":63,"target":60,"value":2},{"source":63,"target":55,"value":1},{"source":64,"target":55,"value":5},{"source":64,"target":62,"value":12},{"source":64,"target":48,"value":5},{"source":64,"target":63,"value":4},{"source":64,"target":58,"value":10},{"source":64,"target":61,"value":6},{"source":64,"target":60,"value":2},{"source":64,"target":59,"value":9},{"source":64,"target":57,"value":1},{"source":64,"target":11,"value":1},{"source":65,"target":63,"value":5},{"source":65,"target":64,"value":7},{"source":65,"target":48,"value":3},{"source":65,"target":62,"value":5},{"source":65,"target":58,"value":5},{"source":65,"target":61,"value":5},{"source":65,"target":60,"value":2},{"source":65,"target":59,"value":5},{"source":65,"target":57,"value":1},{"source":65,"target":55,"value":2},{"source":66,"target":64,"value":3},{"source":66,"target":58,"value":3},{"source":66,"target":59,"value":1},{"source":66,"target":62,"value":2},{"source":66,"target":65,"value":2},{"source":66,"target":48,"value":1},{"source":66,"target":63,"value":1},{"source":66,"target":61,"value":1},{"source":66,"target":60,"value":1},{"source":67,"target":57,"value":3},{"source":68,"target":25,"value":5},{"source":68,"target":11,"value":1},{"source":68,"target":24,"value":1},{"source":68,"target":27,"value":1},{"source":68,"target":48,"value":1},{"source":68,"target":41,"value":1},{"source":69,"target":25,"value":6},{"source":69,"target":68,"value":6},{"source":69,"target":11,"value":1},{"source":69,"target":24,"value":1},{"source":69,"target":27,"value":2},{"source":69,"target":48,"value":1},{"source":69,"target":41,"value":1},{"source":70,"target":25,"value":4},{"source":70,"target":69,"value":4},{"source":70,"target":68,"value":4},{"source":70,"target":11,"value":1},{"source":70,"target":24,"value":1},{"source":70,"target":27,"value":1},{"source":70,"target":41,"value":1},{"source":70,"target":58,"value":1},{"source":71,"target":27,"value":1},{"source":71,"target":69,"value":2},{"source":71,"target":68,"value":2},{"source":71,"target":70,"value":2},{"source":71,"target":11,"value":1},{"source":71,"target":48,"value":1},{"source":71,"target":41,"value":1},{"source":71,"target":25,"value":1},{"source":72,"target":26,"value":2},{"source":72,"target":27,"value":1},{"source":72,"target":11,"value":1},{"source":73,"target":48,"value":2},{"source":74,"target":48,"value":2},{"source":74,"target":73,"value":3},{"source":75,"target":69,"value":3},{"source":75,"target":68,"value":3},{"source":75,"target":25,"value":3},{"source":75,"target":48,"value":1},{"source":75,"target":41,"value":1},{"source":75,"target":70,"value":1},{"source":75,"target":71,"value":1},{"source":76,"target":64,"value":1},{"source":76,"target":65,"value":1},{"source":76,"target":66,"value":1},{"source":76,"target":63,"value":1},{"source":76,"target":62,"value":1},{"source":76,"target":48,"value":1},{"source":76,"target":58,"value":1}]}
\ No newline at end of file
...@@ -116,9 +116,9 @@ ...@@ -116,9 +116,9 @@
<div class="jumbotron"> <div class="jumbotron">
<h3><a href="/ngrams">Dictionaries</a></h3> <h3><a href="/ngrams">Dictionaries</a></h3>
<ol> <ol>
<li>White Lists</li>
<li>Black Lists</li>
<li>Synonyms</li> <li>Synonyms</li>
<li>Black Lists</li>
<li>White Lists</li>
</ol> </ol>
</div> </div>
</div> </div>
...@@ -129,7 +129,7 @@ ...@@ -129,7 +129,7 @@
<ol> <ol>
<li><a href="/corpus/{{ corpus.id }}/matrix">Adjacency matrix</a></li> <li><a href="/corpus/{{ corpus.id }}/matrix">Adjacency matrix</a></li>
<li><a href="/corpus/{{ corpus.id }}/explorer">Static maps</a></li> <li><a href="/corpus/{{ corpus.id }}/explorer">Static maps</a></li>
<li>Dynamics maps</li> <li>Dynamic maps</li>
</ol> </ol>
</div> </div>
</div> </div>
......
...@@ -33,7 +33,9 @@ text.active { ...@@ -33,7 +33,9 @@ text.active {
</div> </div>
</div> </div>
<div id="graphid" style="visibility: hidden;">/corpus/{{ corpus.id }}/node_link.json</div>
<div class="container">
<div id="graphid" style="visibility: hidden;">/corpus/{{ corpus.id }}/adjacency.json</div>
<script src="{% static "js/jquery/jquery.min.js" %}" type="text/javascript"></script> <script src="{% static "js/jquery/jquery.min.js" %}" type="text/javascript"></script>
...@@ -46,6 +48,8 @@ text.active { ...@@ -46,6 +48,8 @@ text.active {
</select> </select>
</p> </p>
</div>
<script> <script>
var margin = {top: 80, right: 0, bottom: 10, left: 80}, var margin = {top: 80, right: 0, bottom: 10, left: 80},
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
</head> </head>
{% load staticfiles %}
<body> <body>
<!-- Fixed navbar --> <!-- Fixed navbar -->
<div id="dafixedtop" class="navbar navbar-inverse navbar-fixed-top" role="navigation"> <div id="dafixedtop" class="navbar navbar-inverse navbar-fixed-top" role="navigation">
...@@ -16,7 +17,7 @@ ...@@ -16,7 +17,7 @@
<span class="icon-bar"></span> <span class="icon-bar"></span>
<span class="icon-bar"></span> <span class="icon-bar"></span>
</button> </button>
<a class="navbar-brand" href="/">Gargantext</a> <a class="navbar-brand" style="line-height:15px; height:10px; padding: 10px 10px;" href="/"><img src="{% static "img/logo.svg" %}"></a>
</div> </div>
<div class="navbar-collapse collapse"> <div class="navbar-collapse collapse">
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment