Commit 3abddbc1 authored by Mathieu Rodic's avatar Mathieu Rodic

Merge branch 'master' of ssh://delanoe.org:1979/gargantext

parents a0adfd88 21417615
__pycache__/
parsing/Taggers/treetagger/
{
"metadata": {
"name": "",
"signature": "sha256:7c80ed9f4b088e13444efb451a1ee46e5727247be14aaf30ddf0236a49ac461b"
"signature": "sha256:0383da299037d14e20f4be4cd7703cfddbdf0f947ee8f93f051f2ed6b7fe0cb5"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": []
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pycountry\n",
"from node.models import Language"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pycountry\n",
"\n",
"for language in pycountry.languages:\n",
" try:\n",
" implemented = 1 if language.alpha2 in ['en', 'fr'] else 0\n",
" Language(iso2=language.alpha2, iso3=language.terminology, fullname=language.name, implemented=implemented).save()\n",
" except:\n",
" pass\n",
" "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Language.objects.filter(implemented=1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"[<Language: English>, <Language: French>]"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for language in Language.objects.all():\n",
" print(language)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Afar\n",
"Abkhazian\n",
"Afrikaans\n",
"Akan\n",
"Albanian\n",
"Amharic\n",
"Arabic\n",
"Aragonese\n",
"Armenian\n",
"Assamese\n",
"Avaric\n",
"Avestan\n",
"Aymara\n",
"Azerbaijani\n",
"Bashkir\n",
"Bambara\n",
"Basque\n",
"Belarusian\n",
"Bengali\n",
"Bihari languages\n",
"Bislama\n",
"Bosnian\n",
"Breton\n",
"Bulgarian\n",
"Burmese\n",
"Catalan; Valencian\n",
"Chamorro\n",
"Chechen\n",
"Chinese\n",
"Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic\n",
"Chuvash\n",
"Cornish\n",
"Corsican\n",
"Cree\n",
"Czech\n",
"Danish\n",
"Divehi; Dhivehi; Maldivian\n",
"Dutch; Flemish\n",
"Dzongkha\n",
"English\n",
"Esperanto\n",
"Estonian\n",
"Ewe\n",
"Faroese\n",
"Fijian\n",
"Finnish\n",
"French\n",
"Western Frisian\n",
"Fulah\n",
"Georgian\n",
"German\n",
"Gaelic; Scottish Gaelic\n",
"Irish\n",
"Galician\n",
"Manx\n",
"Greek, Modern (1453-)\n",
"Guarani\n",
"Gujarati\n",
"Haitian; Haitian Creole\n",
"Hausa\n",
"Hebrew\n",
"Herero\n",
"Hindi\n",
"Hiri Motu\n",
"Croatian\n",
"Hungarian\n",
"Igbo\n",
"Icelandic\n",
"Ido\n",
"Sichuan Yi; Nuosu\n",
"Inuktitut\n",
"Interlingue; Occidental\n",
"Interlingua (International Auxiliary Language Association)\n",
"Indonesian\n",
"Inupiaq\n",
"Italian\n",
"Javanese\n",
"Japanese\n",
"Kalaallisut; Greenlandic\n",
"Kannada\n",
"Kashmiri\n",
"Kanuri\n",
"Kazakh\n",
"Central Khmer\n",
"Kikuyu; Gikuyu\n",
"Kinyarwanda\n",
"Kirghiz; Kyrgyz\n",
"Komi\n",
"Kongo\n",
"Korean\n",
"Kuanyama; Kwanyama\n",
"Kurdish\n",
"Lao\n",
"Latin\n",
"Latvian\n",
"Limburgan; Limburger; Limburgish\n",
"Lingala\n",
"Lithuanian\n",
"Luxembourgish; Letzeburgesch\n",
"Luba-Katanga\n",
"Ganda\n",
"Macedonian\n",
"Marshallese\n",
"Malayalam\n",
"Maori\n",
"Marathi\n",
"Malay\n",
"Malagasy\n",
"Maltese\n",
"Moldavian; Moldovan\n",
"Mongolian\n",
"Nauru\n",
"Navajo; Navaho\n",
"Ndebele, South; South Ndebele\n",
"Ndebele, North; North Ndebele\n",
"Ndonga\n",
"Nepali\n",
"Norwegian Nynorsk; Nynorsk, Norwegian\n",
"Bokm\u00e5l, Norwegian; Norwegian Bokm\u00e5l\n",
"Norwegian\n",
"Chichewa; Chewa; Nyanja\n",
"Occitan (post 1500)\n",
"Ojibwa\n",
"Oriya\n",
"Oromo\n",
"Ossetian; Ossetic\n",
"Panjabi; Punjabi\n",
"Persian\n",
"Pali\n",
"Polish\n",
"Portuguese\n",
"Pushto; Pashto\n",
"Quechua\n",
"Romansh\n",
"Romanian\n",
"Rundi\n",
"Russian\n",
"Sango\n",
"Sanskrit\n",
"Sinhala; Sinhalese\n",
"Slovak\n",
"Slovenian\n",
"Northern Sami\n",
"Samoan\n",
"Shona\n",
"Sindhi\n",
"Somali\n",
"Sotho, Southern\n",
"Spanish; Castilian\n",
"Sardinian\n",
"Serbian\n",
"Swati\n",
"Sundanese\n",
"Swahili\n",
"Swedish\n",
"Tahitian\n",
"Tamil\n",
"Tatar\n",
"Telugu\n",
"Tajik\n",
"Tagalog\n",
"Thai\n",
"Tibetan\n",
"Tigrinya\n",
"Tonga (Tonga Islands)\n",
"Tswana\n",
"Tsonga\n",
"Turkmen\n",
"Turkish\n",
"Twi\n",
"Uighur; Uyghur\n",
"Ukrainian\n",
"Urdu\n",
"Uzbek\n",
"Venda\n",
"Vietnamese\n",
"Volap\u00fck\n",
"Welsh\n",
"Walloon\n",
"Wolof\n",
"Xhosa\n",
"Yiddish\n",
"Yoruba\n",
"Zhuang; Chuang\n",
"Zulu\n"
]
}
],
"prompt_number": 11
}
],
"metadata": {}
}
]
}
\ No newline at end of file
{
"metadata": {
"name": "",
"signature": "sha256:61ddb09ee5403d49059e3152719d000f65e90207d8cef75dd6d0dab23af8cd8b"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from node.models import Node, Ngram, Node_Ngram, NodeType, NodeNgramNgram\n",
"from django.contrib.auth.models import User"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"user = User.objects.get(username='alexandre')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"corpus = Node.objects.get(name='PubMed')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cr\u00e9ation des Listes"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"whitelist_type = NodeType.objects.get(name='WhiteList')\n",
"blacklist_type = NodeType.objects.get(name='BlackList')\n",
"\n",
"white_node = Node.objects.create(name='WhiteList Pubmed', user=user, parent=corpus, type=whitelist_type)\n",
"black_node = Node.objects.create(name='BlackList Pubmed', user=user, parent=corpus, type=blacklist_type)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node_Ngram.objects.filter(node=white_node).count()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 97,
"text": [
"6111"
]
}
],
"prompt_number": 97
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cr\u00e9ation de la white list"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"with transaction.atomic():\n",
" for node_ngram_object in Node_Ngram.objects.all()[:100]:\n",
" Node_Ngram.objects.create(node=white_node, ngram=node_ngram_object.ngram, occurences=1)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 131
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node_Ngram.objects.filter(node=white_node)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 132,
"text": [
"[<Node_Ngram: WhiteList Pubmed: infectious diseases>, <Node_Ngram: WhiteList Pubmed: animal kingdoms>, <Node_Ngram: WhiteList Pubmed: plant>, <Node_Ngram: WhiteList Pubmed: tobacco ringspot>, <Node_Ngram: WhiteList Pubmed: host populations>, <Node_Ngram: WhiteList Pubmed: bee hemolymph>, <Node_Ngram: WhiteList Pubmed: virions>, <Node_Ngram: WhiteList Pubmed: infections>, <Node_Ngram: WhiteList Pubmed: transkingdom host alteration>, <Node_Ngram: WhiteList Pubmed: virus>, <Node_Ngram: WhiteList Pubmed: phylogenetic analysis>, <Node_Ngram: WhiteList Pubmed: negative impact>, <Node_Ngram: WhiteList Pubmed: varroa mites>, <Node_Ngram: WhiteList Pubmed: significant source>, <Node_Ngram: WhiteList Pubmed: winter>, <Node_Ngram: WhiteList Pubmed: gastric cecum>, <Node_Ngram: WhiteList Pubmed: intracellular life cycle>, <Node_Ngram: WhiteList Pubmed: threat>, <Node_Ngram: WhiteList Pubmed: trsv>, <Node_Ngram: WhiteList Pubmed: spread>, '...(remaining elements truncated)...']"
]
}
],
"prompt_number": 132
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cr\u00e9ation de la black list"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"with transaction.atomic():\n",
" for node_ngram_object in Node_Ngram.objects.all()[101:150]:\n",
" Node_Ngram.objects.create(node=black_node, ngram=node_ngram_object.ngram, occurences=1)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node_Ngram.objects.filter(node=black_node)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 12,
"text": [
"[<Node_Ngram: BlackList Pubmed: complete cessation>, <Node_Ngram: BlackList Pubmed: bee viruses>, <Node_Ngram: BlackList Pubmed: honey bee colonies>, <Node_Ngram: BlackList Pubmed: virus resistance>, <Node_Ngram: BlackList Pubmed: several honey bee viruses>, <Node_Ngram: BlackList Pubmed: experimental protocol>, <Node_Ngram: BlackList Pubmed: triggers>, <Node_Ngram: BlackList Pubmed: rna viruses>, <Node_Ngram: BlackList Pubmed: molecular pattern>, <Node_Ngram: BlackList Pubmed: correlates>, <Node_Ngram: BlackList Pubmed: honey bees>, <Node_Ngram: BlackList Pubmed: ccd>, <Node_Ngram: BlackList Pubmed: colonies>, <Node_Ngram: BlackList Pubmed: pathogens>, <Node_Ngram: BlackList Pubmed: viral pathogen>, <Node_Ngram: BlackList Pubmed: numerous agricultural crops>, <Node_Ngram: BlackList Pubmed: our results>, <Node_Ngram: BlackList Pubmed: infection>, <Node_Ngram: BlackList Pubmed: administration>, <Node_Ngram: BlackList Pubmed: work>, '...(remaining elements truncated)...']"
]
}
],
"prompt_number": 12
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cr\u00e9ation des synonymes"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"syno_type = NodeType.objects.get(name='Synonyme')\n",
"syno_node = Node.objects.create(name='Syno Pubmed',\n",
" user=user, \n",
" parent=corpus, \n",
" type=syno_type)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"synonyme1, synonyme2 = Node_Ngram.objects.filter(node=white_node)[3:5]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 23
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"NodeNgramNgram.objects.create(node=syno_node, ngramX=synonyme1.ngram, ngramY=synonyme2.ngram)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 24,
"text": [
"<NodeNgramNgram: Syno Pubmed: onset / process>"
]
}
],
"prompt_number": 24
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cooccurrence"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"white_node.children.count()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 87,
"text": [
"0"
]
}
],
"prompt_number": 87
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"black_node.pk"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 33,
"text": [
"174"
]
}
],
"prompt_number": 33
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cooc_type = NodeType.objects.get(name='Cooccurrence')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 26
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cooc = Node.objects.create(user=user, parent=corpus, type=cooc_type, name=\"Cooccurrences calcul Alpha\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cooc.pk"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 29,
"text": [
"177"
]
}
],
"prompt_number": 29
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"white_node.children.all().delete()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 35
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from django.db import connection\n",
"cursor = connection.cursor()\n",
"# LOCK TABLE documents_ngramtemporary IN EXCLUSIVE MODE;\n",
"query_string = \"\"\"\n",
"INSERT INTO node_nodengramngram (node_id, \"ngramX_id\", \"ngramY_id\", score)\n",
"\n",
"SELECT \n",
"%d as node_id, x.ngram_id, y.ngram_id, COUNT(*) AS score\n",
"\n",
"FROM\n",
"node_node_ngram AS x\n",
"\n",
"INNER JOIN \n",
"node_node_ngram AS y \n",
"ON x.node_id = y.node_id\n",
"\n",
"\n",
"WHERE\n",
"x.id in (select id from node_node_ngram WHERE node_id = %d )\n",
"AND\n",
"y.id in (select id from node_node_ngram WHERE node_id = %d )\n",
"AND\n",
"x.ngram_id <> y.ngram_id\n",
"\n",
"\n",
"GROUP BY\n",
"x.ngram_id, y.ngram_id\n",
"\n",
"LIMIT 100\n",
"\n",
" \"\"\" % (cooc.pk, white_node.pk, white_node.pk)\n",
"\n",
"cursor.execute(query_string)\n",
"\n",
"try:\n",
" while True:\n",
" row = cursor.fetchone()\n",
" if row is None:\n",
" break\n",
" print(row)\n",
"except:\n",
" pass"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "IntegrityError",
"evalue": "ERREUR: une instruction insert ou update sur la table \u00ab node_nodengramngram \u00bb viole la contrainte de cl\u00e9\n\u00e9trang\u00e8re \u00ab node_nodengramngram_node_id_fkey \u00bb\nDETAIL: La cl\u00e9 (node_id)=(6409333) n'est pas pr\u00e9sente dans la table \u00ab node_node \u00bb.\n",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mIntegrityError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-133-26412084c03e>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 31\u001b[0m \"\"\" % (cooc.pk, white_node.pk, white_node.pk)\n\u001b[0;32m 32\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 33\u001b[1;33m \u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mquery_string\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 34\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 35\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m 67\u001b[0m \u001b[0mstart\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 68\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 69\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mCursorDebugWrapper\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 70\u001b[0m \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 71\u001b[0m \u001b[0mstop\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 52\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 53\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 54\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 55\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mexecutemany\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparam_list\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/utils.py\u001b[0m in \u001b[0;36m__exit__\u001b[1;34m(self, exc_type, exc_value, traceback)\u001b[0m\n\u001b[0;32m 97\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdj_exc_type\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mDataError\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mIntegrityError\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 98\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrapper\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merrors_occurred\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 99\u001b[1;33m \u001b[0msix\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreraise\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdj_exc_type\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdj_exc_value\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtraceback\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 100\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 101\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/utils/six.py\u001b[0m in \u001b[0;36mreraise\u001b[1;34m(tp, value, tb)\u001b[0m\n\u001b[0;32m 547\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mreraise\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtb\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 548\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__traceback__\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mtb\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 549\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 550\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 551\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/backends/util.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self, sql, params)\u001b[0m\n\u001b[0;32m 49\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdb\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrap_database_errors\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 50\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mparams\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 51\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 52\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msql\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mIntegrityError\u001b[0m: ERREUR: une instruction insert ou update sur la table \u00ab node_nodengramngram \u00bb viole la contrainte de cl\u00e9\n\u00e9trang\u00e8re \u00ab node_nodengramngram_node_id_fkey \u00bb\nDETAIL: La cl\u00e9 (node_id)=(6409333) n'est pas pr\u00e9sente dans la table \u00ab node_node \u00bb.\n"
]
}
],
"prompt_number": 133
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 47
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 52
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"import pandas as pd\n",
"from collections import defaultdict\n",
"matrix = defaultdict(lambda : defaultdict(float))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 107
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 98,
"text": [
"[<NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / animal kingdoms>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / plant>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / tobacco ringspot>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / host populations>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / bee hemolymph>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / virions>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / infections>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / transkingdom host alteration>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / virus>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / phylogenetic analysis>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / animal kingdoms>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / plant>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / tobacco ringspot>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / host populations>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / bee hemolymph>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / virions>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / infections>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / transkingdom host alteration>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / virus>, <NodeNgramNgram: Cooccurrences calcul Alpha: infectious diseases / phylogenetic analysis>, '...(remaining elements truncated)...']"
]
}
],
"prompt_number": 98
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for cooc in NodeNgramNgram.objects.filter(node=cooc):\n",
" if cooc.score > 10:\n",
" #print(x.ngramX.terms, x.ngramY.terms)\n",
" matrix[cooc.ngramX.terms][cooc.ngramY.terms] = x.score\n",
" "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 125
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix = pd.DataFrame(matrix).T.fillna(0)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 126
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>animal kingdoms</th>\n",
" <th>infectious diseases</th>\n",
" <th>plant</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>animal kingdoms</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>apis mellifera</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>bees</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>cause</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ccd</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ceranae</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>collapse</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>colonies</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>colony collapse disorder</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>colony collapse disorder ( ccd )</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deformed wing</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dwv</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>evidence</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>furthermore</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>health</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hives</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>honey bee</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>honey bee colonies</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>honey bees</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>honeybees</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>iapv</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>in</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>infection</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>infections</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>infectious diseases</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>israeli acute paralysis</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>losses</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>n</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>nosema</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>nosema ceranae</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>pathogens</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>pesticides</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>plant</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>presence</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>prevalence</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>rna viruses</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>samples</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>study</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>transmission</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>united states</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>varroa</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>virus</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>viruses</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>winter</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 127,
"text": [
" animal kingdoms infectious diseases plant\n",
"animal kingdoms 0 0 0\n",
"apis mellifera 0 0 0\n",
"bees 0 0 0\n",
"cause 0 0 0\n",
"ccd 0 0 0\n",
"ceranae 0 0 0\n",
"collapse 0 0 0\n",
"colonies 0 0 0\n",
"colony collapse disorder 0 0 0\n",
"colony collapse disorder ( ccd ) 0 0 0\n",
"deformed wing 0 0 0\n",
"dwv 0 0 0\n",
"evidence 0 0 0\n",
"furthermore 0 0 0\n",
"health 0 0 0\n",
"hives 0 0 0\n",
"honey bee 0 0 0\n",
"honey bee colonies 0 0 0\n",
"honey bees 0 0 0\n",
"honeybees 0 0 0\n",
"iapv 0 0 0\n",
"in 0 0 0\n",
"infection 0 0 0\n",
"infections 0 0 0\n",
"infectious diseases 0 0 0\n",
"israeli acute paralysis 0 0 0\n",
"losses 0 0 0\n",
"n 0 0 0\n",
"nosema 0 0 0\n",
"nosema ceranae 0 0 0\n",
"pathogens 0 0 0\n",
"pesticides 0 0 0\n",
"plant 0 0 0\n",
"presence 0 0 0\n",
"prevalence 0 0 0\n",
"rna viruses 0 0 0\n",
"samples 0 0 0\n",
"study 0 0 0\n",
"transmission 0 0 0\n",
"united states 0 0 0\n",
"varroa 0 0 0\n",
"virus 0 0 0\n",
"viruses 0 0 0\n",
"winter 0 0 0"
]
}
],
"prompt_number": 127
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix /= matrix.sum(axis=1)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 123
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"matrix"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>animal kingdoms</th>\n",
" <th>apis mellifera</th>\n",
" <th>bees</th>\n",
" <th>cause</th>\n",
" <th>ccd</th>\n",
" <th>ceranae</th>\n",
" <th>collapse</th>\n",
" <th>colonies</th>\n",
" <th>colony collapse disorder</th>\n",
" <th>colony collapse disorder ( ccd )</th>\n",
" <th>...</th>\n",
" <th>prevalence</th>\n",
" <th>rna viruses</th>\n",
" <th>samples</th>\n",
" <th>study</th>\n",
" <th>transmission</th>\n",
" <th>united states</th>\n",
" <th>varroa</th>\n",
" <th>virus</th>\n",
" <th>viruses</th>\n",
" <th>winter</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>animal kingdoms</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>infectious diseases</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>plant</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3 rows \u00d7 44 columns</p>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 124,
"text": [
" animal kingdoms apis mellifera bees cause ccd \\\n",
"animal kingdoms NaN NaN NaN NaN NaN \n",
"infectious diseases NaN NaN NaN NaN NaN \n",
"plant NaN NaN NaN NaN NaN \n",
"\n",
" ceranae collapse colonies colony collapse disorder \\\n",
"animal kingdoms NaN NaN NaN NaN \n",
"infectious diseases NaN NaN NaN NaN \n",
"plant NaN NaN NaN NaN \n",
"\n",
" colony collapse disorder ( ccd ) \\\n",
"animal kingdoms NaN \n",
"infectious diseases NaN \n",
"plant NaN \n",
"\n",
" ... prevalence \\\n",
"animal kingdoms ... NaN \n",
"infectious diseases ... NaN \n",
"plant ... NaN \n",
"\n",
" rna viruses samples study transmission united states \\\n",
"animal kingdoms NaN NaN NaN NaN NaN \n",
"infectious diseases NaN NaN NaN NaN NaN \n",
"plant NaN NaN NaN NaN NaN \n",
"\n",
" varroa virus viruses winter \n",
"animal kingdoms NaN NaN NaN NaN \n",
"infectious diseases NaN NaN NaN NaN \n",
"plant NaN NaN NaN NaN \n",
"\n",
"[3 rows x 44 columns]"
]
}
],
"prompt_number": 124
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
\ No newline at end of file
{
"metadata": {
"name": "",
"signature": "sha256:01cc276bb358d5a00a128d39a90a02b0b45e8e9a43ce5670d06fd8b0657bdab5"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Testing if the ISI file parser works"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from parsing.FileParsers import *"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"parser = IsiFileParser(filepath='/home/mat/projects/gargantext/data_samples/isi.txt')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"parser.parse()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"{'publication_month': '10', 'authors': 'Rust, J, Singh, H, Rana, RS, McCann, T, Singh, L, Anderson, K, Sarkar, N, Nascimbene, PC, Stebner, F, Thomas, JC, Kraemer, MS, Williams, CJ, Engel, MS, Sahni, A, Grimaldi, D', 'publication_year': '2010', 'publication_minute': '00', 'language': 'English', 'title': 'Biogeographic and evolutionary implications of a diverse paleobiota in amber from the early Eocene of India', 'abstract': 'For nearly 100 million years, the India subcontinent drifted from Gondwana until its collision with Asia some 50 Ma, during which time the landmass presumably evolved a highly endemic biota. Recent excavations of rich outcrops of 50-52-million-year-old amber with diverse inclusions from the Cambay Shale of Gujarat, western India address this issue. Cambay amber occurs in lignitic and muddy sediments concentrated by near-shore chenier systems; its chemistry and the anatomy of associated fossil wood indicates a definitive source of Dipterocarpaceae. The amber is very partially polymerized and readily dissolves in organic solvents, thus allowing extraction of whole insects whose cuticle retains microscopic fidelity. Fourteen orders and more than 55 families and 100 species of arthropod inclusions have been discovered thus far, which have affinities to taxa from the Eocene of northern Europe, to the Recent of Australasia, and the Miocene to Recent of tropical America. Thus, India just prior to or immediately following contact shows little biological insularity. A significant diversity of eusocial insects are fossilized, including corbiculate bees, rhinotermitid termites, and modern subfamilies of ants (Formicidae), groups that apparently radiated during the contemporaneous Early Eocene Climatic Optimum or just prior to it during the Paleocene-Eocene Thermal Maximum. Cambay amber preserves a uniquely diverse and early biota of a modern-type of broad-leaf tropical forest, revealing 50 Ma of stasis and change in biological communities of the dipterocarp primary forests that dominate southeastern Asia today.', 'doi': '10.1073/pnas.1007407107', 'publication_day': '26', 'publication_date': '2010-10-26 00:00:00', 'publication_hour': '00', 'publication_second': '00'}\n",
"\n",
"{'publication_month': '10', 'authors': 'Cornman, SR, Schatz, MC, Johnston, SJ, Chen, YP, Pettis, J, Hunt, G, Bourgeois, L, Elsik, C, Anderson, D, Grozinger, CM, Evans, JD', 'publication_year': '2010', 'publication_minute': '00', 'language': 'English', 'title': 'Genomic survey of the ectoparasitic mite Varroa destructor, a major pest of the honey bee Apis mellifera', 'abstract': 'Background: The ectoparasitic mite Varroa destructor has emerged as the primary pest of domestic honey bees (Apis mellifera). Here we present an initial survey of the V. destructor genome carried out to advance our understanding of Varroa biology and to identify new avenues for mite control. This sequence survey provides immediate resources for molecular and population-genetic analyses of Varroa-Apis interactions and defines the challenges ahead for a comprehensive Varroa genome project. Results: The genome size was estimated by flow cytometry to be 565 Mbp, larger than most sequenced insects but modest relative to some other Acari. Genomic DNA pooled from similar to 1,000 mites was sequenced to 4.3x coverage with 454 pyrosequencing. The 2.4 Gbp of sequencing reads were assembled into 184,094 contigs with an N50 of 2,262 bp, totaling 294 Mbp of sequence after filtering. Genic sequences with homology to other eukaryotic genomes were identified on 13,031 of these contigs, totaling 31.3 Mbp. Alignment of protein sequence blocks conserved among V. destructor and four other arthropod genomes indicated a higher level of sequence divergence within this mite lineage relative to the tick Ixodes scapularis. A number of microbes potentially associated with V. destructor were identified in the sequence survey, including similar to 300 Kbp of sequence deriving from one or more bacterial species of the Actinomycetales. The presence of this bacterium was confirmed in individual mites by PCR assay, but varied significantly by age and sex of mites. Fragments of a novel virus related to the Baculoviridae were also identified in the survey. The rate of single nucleotide polymorphisms (SNPs) in the pooled mites was estimated to be 6.2 x 10(-5)per bp, a low rate consistent with the historical demography and life history of the species. Conclusions: This survey has provided general tools for the research community and novel directions for investigating the biology and control of Varroa mites. Ongoing development of Varroa genomic resources will be a boon for comparative genomics of under-represented arthropods, and will further enhance the honey bee and its associated pathogens as a model system for studying host-pathogen interactions.', 'doi': '10.1186/1471-2164-11-602', 'publication_day': '25', 'publication_date': '2010-10-25 00:00:00', 'publication_hour': '00', 'publication_second': '00'}\n",
"\n",
"{'publication_month': '10', 'authors': 'Gadagkar, R', 'publication_day': '25', 'publication_year': '2010', 'publication_minute': '00', 'language': 'English', 'title': 'Sociobiology in turmoil again', 'publication_date': '2010-10-25 00:00:00', 'abstract': \"Altruism is defined as any behaviour that lowers the Darwinian fitness of the actor while increasing that of the recipient. Such altruism (especially in the form of lifetime sterility exhibited by sterile workers in eusocial insects such as ants, bees, wasps and termites) has long been considered a major difficulty for the theory of natural selection. In the 1960s W. D. Hamilton potentially solved this problem by defining a new measure of fitness that he called inclusive fitness, which also included the effect of an individual's action on the fitness of genetic relatives. This has come to be known as inclusive fitness theory, Hamilton's rule or kin selection. E. O. Wilson almost single-handedly popularized this new approach in the 1970s and thus helped create a large body of new empirical research and a large community of behavioural ecologists and kin selectionists. Adding thrill and drama to our otherwise sombre lives, Wilson is now leading a frontal attack on Hamilton's approach, claiming that the inclusive fitness theory is not as mathematically general as the standard natural selection theory, has led to no additional biological insights and should therefore be abandoned. The world cannot but sit up and take notice.\", 'publication_hour': '00', 'publication_second': '00'}\n",
"\n",
"{'publication_month': '10', 'authors': 'Nemesio, A', 'publication_day': '25', 'publication_year': '2010', 'publication_minute': '00', 'language': 'English', 'title': 'The orchid-bee fauna (Hymenoptera: Apidae) of a forest remnant in northeastern Brazil, with new geographic records and an identification key to the known species of the Atlantic Forest of northeastern Brazil', 'publication_date': '2010-10-25 00:00:00', 'abstract': 'The orchid bee fauna of Estacao Ecologica de Murici (ESEC Murici), in the state of Alagoas, one of the largest remnants of the Atlantic Rain Forest in northeastern Brazil, was surveyed for the first time. Seven hundred and twenty-one orchid-bee males belonging to 17 species were collected from the 3(rd) to the 10(th) of September, 2009. Besides the recently described Eulaema (Apeulaema) felipei Nemesio, 2010, three other species recorded at ESEC Murici deserve further attention: Euglossa amazonica Dressler, 1982b, recorded for the first time outside the Amazon Basin; Euglossa milenae Bembe, 2007 and Euglossa analis Westwood, 1840, both recorded for the first time in the Atlantic Forest of northeastern Brazil north to Sao Francisco river. These results together with previous samplings in the state of Alagoas reveal that at least 22 orchid-bee species are now known to occur there. Three other species not recorded for Alagoas yet are known from the neighbor states of Sergipe, Pernambuco, and Paraiba. An identification key to all 25 species of Euglossina known to occur in the states of Alagoas, Sergipe, Pernambuco, Paraiba, and Rio Grande do Norte is provided.', 'publication_hour': '00', 'publication_second': '00'}\n",
"\n",
"{'publication_month': '10', 'authors': 'Rozen, JG', 'publication_day': '22', 'publication_year': '2010', 'publication_minute': '00', 'language': 'English', 'title': 'Immatures of the Old World Oil-Collecting Bee Ctenoplectra cornuta (Apoidea: Apidae: Apinae: Ctenoplectrini)', 'publication_date': '2010-10-22 00:00:00', 'abstract': 'The mature oocyte, all five larval instars, and the pupa of Ctenoplectra cornuta Gribodo are described based upon specimens from Taiwan. Its mature larva though larger is compared with, and found similar to, that of the African Ctenoplectra armata Magretti, the only other larval ctenoplectrine studied to date. The egg index was similar to that of the African C. albolimbata Magretti. Although Ctenoplectra shares certain larval and pupal similarities with Tetrapedia (Tetrapediini), a broader study including representatives of all apine tribes needs to be considered for evaluating tribal relationships.', 'publication_hour': '00', 'publication_second': '00'}\n",
"\n",
"{'publication_month': '10', 'authors': 'Maisonnasse, A, Lenoir, JC, Beslay, D, Crauser, D, Le Conte, Y', 'publication_year': '2010', 'publication_minute': '00', 'language': 'English', 'title': 'E-beta-Ocimene, a Volatile Brood Pheromone Involved in Social Regulation in the Honey Bee Colony (Apis mellifera)', 'abstract': 'Background: In honey bee colony, the brood is able to manipulate and chemically control the workers in order to sustain their own development. A brood ester pheromone produced primarily by old larvae (4 and 5 days old larvae) was first identified as acting as a contact pheromone with specific effects on nurses in the colony. More recently a new volatile brood pheromone has been identified: E-beta-ocimene, which partially inhibits ovary development in workers. Methodology and Principal Finding: Our analysis of E-beta-ocimene production revealed that young brood (newly hatched to 3 days old) produce the highest quantity of E-beta-ocimene relative to their body weight. By testing the potential action of this molecule as a non-specific larval signal, due to its high volatility in the colony, we demonstrated that in the presence of E-beta-ocimene nest workers start to forage earlier in life, as seen in the presence of real brood. Conclusions/Significance: In this way, young larvae are able to assign precedence to the task of foraging by workers in order to increase food stores for their own development. Thus, in the complexity of honey bee chemical communication, E-beta-ocimene, a pheromone of young larvae, provides the brood with the means to express their nutritional needs to the workers.', 'doi': '10.1371/journal.pone.0013531', 'publication_day': '21', 'publication_date': '2010-10-21 00:00:00', 'publication_hour': '00', 'publication_second': '00'}\n",
"\n",
"{'publication_month': '10', 'authors': 'Li, JK, Wu, J, Rundassa, DB, Song, FF, Zheng, AJ, Fang, Y', 'publication_year': '2010', 'publication_minute': '00', 'language': 'English', 'title': 'Differential Protein Expression in Honeybee (Apis mellifera L.) Larvae: Underlying Caste Differentiation', 'abstract': 'Honeybee (Apis mellifera) exhibits divisions in both morphology and reproduction. The queen is larger in size and fully developed sexually, while the worker bees are smaller in size and nearly infertile. To better understand the specific time and underlying molecular mechanisms of caste differentiation, the proteomic profiles of larvae intended to grow into queen and worker castes were compared at 72 and 120 hours using two dimensional electrophoresis (2-DE), network, enrichment and quantitative PCR analysis. There were significant differences in protein expression between the two larvae castes at 72 and 120 hours, suggesting the queen and the worker larvae have already decided their fate before 72 hours. Specifically, at 72 hours, queen intended larvae over-expressed transketolase, aldehyde reductase, and enolase proteins which are involved in carbohydrate metabolism and energy production, imaginal disc growth factor 4 which is a developmental related protein, long-chain-fatty-acid CoA ligase and proteasome subunit alpha type 5 which metabolize fatty and amino acids, while worker intended larvae over-expressed ATP synthase beta subunit, aldehyde dehydrogenase, thioredoxin peroxidase 1 and peroxiredoxin 2540, lethal (2) 37 and 14-3-3 protein epsilon, fatty acid binding protein, and translational controlled tumor protein. This differential protein expression between the two caste intended larvae was more pronounced at 120 hours, with particular significant differences in proteins associated with carbohydrate metabolism and energy production. Functional enrichment analysis suggests that carbohydrate metabolism and energy production and anti-oxidation proteins play major roles in the formation of caste divergence. The constructed network and validated gene expression identified target proteins for further functional study. This new finding is in contrast to the existing notion that 72 hour old larvae has bipotential and can develop into either queen or worker based on epigenetics and can help us to gain new insight into the time of departure as well as caste trajectory influencing elements at the molecular level.', 'doi': '10.1371/journal.pone.0013455', 'publication_day': '20', 'publication_date': '2010-10-20 00:00:00', 'publication_hour': '00', 'publication_second': '00'}\n",
"\n",
"{'publication_month': '10', 'authors': 'Ramirez, GP, Martinez, AS, Fernandez, VM, Bielsa, GC, Farina, WM', 'publication_year': '2010', 'publication_minute': '00', 'language': 'English', 'title': 'The Influence of Gustatory and Olfactory Experiences on Responsiveness to Reward in the Honeybee', 'abstract': 'Background: Honeybees (Apis mellifera) exhibit an extraordinarily tuned division of labor that depends on age polyethism. This adjustment is generally associated with the fact that individuals of different ages display different response thresholds to given stimuli, which determine specific behaviors. For instance, the sucrose-response threshold (SRT) which largely depends on genetic factors may also be affected by the nectar sugar content. However, it remains unknown whether SRTs in workers of different ages and tasks can differ depending on gustatory and olfactory experiences. Methodology: Groups of worker bees reared either in an artificial environment or else in a queen-right colony, were exposed to different reward conditions at different adult ages. Gustatory response scores (GRSs) and odor-memory retrieval were measured in bees that were previously exposed to changes in food characteristics. Principal Findings: Results show that the gustatory responses of pre-foraging-aged bees are affected by changes in sucrose solution concentration and also to the presence of an odor provided it is presented as scented sucrose solution. In contrast no differences in worker responses were observed when presented with odor only in the rearing environment. Fast modulation of GRSs was observed in older bees (12-16 days of age) which are commonly involved in food processing tasks within the hive, while slower modulation times were observed in younger bees (commonly nurse bees, 6-9 days of age). This suggests that older food-processing bees have a higher plasticity when responding to fluctuations in resource information than younger hive bees. Adjustments in the number of trophallaxis events were also found when scented food circulated inside the nest, and this was positively correlated with the differences in timing observed in gustatory responsiveness and memory retention for hive bees of different age classes. Conclusions: This work demonstrates the accessibility of chemosensory information in the honeybee colonies with respect to incoming nectar. The modulation of the sensory-response systems within the hive can have important effects on the dynamics of food transfer and information propagation.', 'doi': '10.1371/journal.pone.0013498', 'publication_day': '20', 'publication_date': '2010-10-20 00:00:00', 'publication_hour': '00', 'publication_second': '00'}\n",
"\n",
"{'publication_month': '10', 'authors': 'Munch, D, Baker, N, Kreibich, CD, Braten, AT, Amdam, GV', 'publication_year': '2010', 'publication_minute': '00', 'language': 'English', 'title': 'In the Laboratory and during Free-Flight: Old Honey Bees Reveal Learning and Extinction Deficits that Mirror Mammalian Functional Decline', 'abstract': 'Loss of brain function is one of the most negative and feared aspects of aging. Studies of invertebrates have taught us much about the physiology of aging and how this progression may be slowed. Yet, how aging affects complex brain functions, e.g., the ability to acquire new memory when previous experience is no longer valid, is an almost exclusive question of studies in humans and mammalian models. In these systems, age related cognitive disorders are assessed through composite paradigms that test different performance tasks in the same individual. Such studies could demonstrate that afflicted individuals show the loss of several and often-diverse memory faculties, and that performance usually varies more between aged individuals, as compared to conspecifics from younger groups. No comparable composite surveying approaches are established yet for invertebrate models in aging research. Here we test whether an insect can share patterns of decline similar to those that are commonly observed during mammalian brain aging. Using honey bees, we combine restrained learning with free-flight assays. We demonstrate that reduced olfactory learning performance correlates with a reduced ability to extinguish the spatial memory of an abandoned nest location ( spatial memory extinction). Adding to this, we show that learning performance is more variable in old honey bees. Taken together, our findings point to generic features of brain aging and provide the prerequisites to model individual aspects of learning dysfunction with insect models.', 'doi': '10.1371/journal.pone.0013504', 'publication_day': '19', 'publication_date': '2010-10-19 00:00:00', 'publication_hour': '00', 'publication_second': '00'}\n",
"\n"
]
}
],
"prompt_number": 4
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Basic tests"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print(\"RE abcdefgh\\n\"[3:-1])\n",
"print(b\"english\".decode())"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"abcdefgh\n",
"english\n"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print(\"publication_date\"[-5:])\n",
"print(\"publication_date\"[:-5])"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import dateutil.parser"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"d = dateutil.parser.parse(\"2014 OCT 11 1:2:3\")"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"d.strftime(\"%Y-%m-%d %H:%M:%S\")"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"t = d.timetuple()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"d.strftime(\"%H\")\n"
],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
\ No newline at end of file
{
"metadata": {
"name": "",
"signature": "sha256:e0c3b2efe7c205a29dc4e028b10ffb7b9d0569f35c4b426febdf523069abffdb"
"signature": "sha256:d03d3f5dbf9a1dbfc43deb947718f31529d3d67b0901f8e743b23ce28a9f3205"
},
"nbformat": 3,
"nbformat_minor": 0,
......@@ -12,11 +12,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
"from pprint import pprint\n",
"from node.models import Node, NodeType, Language, Ngram\n",
"from django.contrib.auth.models import User\n",
"import parsing\n",
"from parsing.FileParsers import *"
"from parsing.NgramsExtractors import NgramsExtractorsCache"
],
"language": "python",
"metadata": {},
......@@ -27,21 +23,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
"# Define user\n",
"try:\n",
" user = User.objects.get(username='Mat')\n",
"except:\n",
" user = User(username='Mat', password='0123', email='mathieu@rodic.fr')\n",
" user.save()\n",
"\n",
"# Define document types\n",
"nodetypes = {}\n",
"for name in ['Corpus', 'Document']:\n",
" try:\n",
" nodetypes[name] = NodeType.objects.get(name=name)\n",
" except:\n",
" nodetypes[name] = NodeType(name=name)\n",
" nodetypes[name].save()"
"c = NgramsExtractorsCache()"
],
"language": "python",
"metadata": {},
......@@ -52,70 +34,159 @@
"cell_type": "code",
"collapsed": false,
"input": [
"Node.objects.all().delete()\n",
"corpus = Node(name='PubMed corpus', user=user, type=nodetypes['Corpus'])\n",
"corpus.save()"
"c[\"en\"]"
],
"language": "python",
"metadata": {},
"outputs": [],
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"<parsing.NgramsExtractors.EnglishNgramsExtractor.EnglishNgramsExtractor at 0x7fc3aa431f98>"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fileparser = PubmedFileParser.PubmedFileParser(file='/home/mat/projects/gargantext/data_samples/pubmed.zip')"
"c[\"fre\"]"
],
"language": "python",
"metadata": {},
"outputs": []
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"p = c[\"fr\"]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fileparser.parse(corpus)\n",
"print('Ok!')"
"p.extract_ngrams(\"En voil\u00e0 un beau parseur !\")"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"metadata": {},
"output_type": "pyout",
"prompt_number": 8,
"text": [
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
"[[('beau', 'NN'), ('parseur', 'NN')]]"
]
},
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"fr\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
]
}
]
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for node_ngram in corpus.children.first().node_ngram_set.all():\n",
" print(node_ngram.ngram.terms)"
"c[\"french\"]"
],
"language": "python",
"metadata": {},
"outputs": []
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
]
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"german\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 11,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"dutch\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 12,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
]
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"input": [
"c[\"]"
],
"language": "python",
"metadata": {},
"outputs": []
......
{
"metadata": {
"name": "",
"signature": "sha256:3345ac991b0346b1dfd82386fdc2a59f39b2de9bf32d03ddfbeb565927cfe7ab"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from node.models import Node, NodeType, Language\n",
"import parsing\n",
"from parsing.FileParsers import *"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#from node.models import Language\n",
"#import pycountry\n",
"#for lang in pycountry.languages:\n",
"# try:\n",
"# Language(iso2=lang.alpha2, iso3=lang.terminology, fullname=lang.name, implemented=1).save()\n",
"# except:\n",
"# pass\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"node = Node.objects.get(name=\"PubMed corpus\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fileparser = PubmedFileParser.PubmedFileParser(file='/var/www/gargantext/media/' + node.fichier.name)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fileparser.parse(node)\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
"[<Node: Systemic spread and propagation of a plant-pathogenic virus in European honeybees, Apis mellifera.>,\n",
" <Node: A Causal Analysis of Observed Declines in Managed Honey Bees (Apis mellifera).>,\n",
" <Node: Non-specific dsRNA-mediated antiviral response in the honey bee.>,\n",
" <Node: In vitro infection of pupae with Israeli acute paralysis virus suggests disturbance of transcriptional homeostasis in honey bees (Apis mellifera).>,\n",
" <Node: Nosema ceranae has been present in Brazil for more than three decades infecting Africanized honey bees.>,\n",
" <Node: Return of the natives.>,\n",
" <Node: The road to pollinator health.>,\n",
" <Node: Do the honeybee pathogens Nosema ceranae and deformed wing virus act synergistically?>,\n",
" <Node: Essential oil from Eupatorium buniifolium leaves as potential varroacide.>,\n",
" <Node: Animal behaviour: brain food.>,\n",
" <Node: What's the buzz?>,\n",
" <Node: Nosema ceranae induced mortality in honey bees (Apis mellifera) depends on infection methods.>,\n",
" <Node: Rates of honeybee sting hypersensitivity in San Antonio during honeybee colony collapse disorder.>,\n",
" <Node: Healing power of honey.>,\n",
" <Node: Honey constituents up-regulate detoxification and immunity genes in the western honey bee Apis mellifera.>,\n",
" <Node: Nosema spp. infection and its negative effects on honey bees (Apis mellifera iberiensis) at the colony level.>,\n",
" <Node: Flight behavior and pheromone changes associated to Nosema ceranae infection of honey bee workers (Apis mellifera) in field conditions.>,\n",
" <Node: Clinical signs of deformed wing virus infection are predictive markers for honey bee colony losses.>,\n",
" <Node: The microsporidian parasites Nosema ceranae and Nosema apis are widespread in honeybee (Apis mellifera) colonies across Scotland.>,\n",
" <Node: A potential link among biogenic amines-based pesticides, learning and memory, and colony collapse disorder: a unique hypothesis.>,\n",
" <Node: Comment on \"A common pesticide decreases foraging success and survival in honey bees\".>,\n",
" <Node: Idiopathic brood disease syndrome and queen events as precursors of colony mortality in migratory beekeeping operations in the eastern United States.>,\n",
" <Node: Pathogen webs in collapsing honey bee colonies.>,\n",
" <Node: Asymptomatic presence of Nosema spp. in Spanish commercial apiaries.>,\n",
" <Node: Synergistic parasite-pathogen interactions mediated by host immunity can drive the collapse of honeybee colonies.>,\n",
" <Node: Global honey bee viral landscape altered by a parasitic mite.>,\n",
" <Node: Paratransgenesis: an approach to improve colony health and molecular insight in honey bees (Apis mellifera)?>,\n",
" <Node: Agriculture. Field research on bees raises concern about low-dose pesticides.>,\n",
" <Node: A common pesticide decreases foraging success and survival in honey bees.>,\n",
" <Node: Symbionts as major modulators of insect health: lactic acid bacteria and honeybees.>,\n",
" <Node: The habitat disruption induces immune-suppression and oxidative stress in honey bees.>,\n",
" <Node: Predictive markers of honey bee colony collapse.>,\n",
" <Node: Colony collapse disorder in Europe.>,\n",
" <Node: Pesticide exposure in honey bees results in increased levels of the gut pathogen Nosema.>,\n",
" <Node: Bromenshenk et al (PLoS One, 2011, 5(10):e13181) have claimed to have found peptides from an invertebrate iridovirus in bees.>,\n",
" <Node: A new threat to honey bees, the parasitic phorid fly Apocephalus borealis.>,\n",
" <Node: Detection of pesticides in active and depopulated beehives in Uruguay.>,\n",
" <Node: Bees brought to their knees: microbes affecting honey bee health.>,\n",
" <Node: From elephants to bees.>,\n",
" <Node: Evidence of a novel immune responsive protein in the Hymenoptera.>,\n",
" <Node: Lack of evidence for an association between Iridovirus and colony collapse disorder.>,\n",
" <Node: First report of Israeli acute paralysis virus in asymptomatic hives of Argentina.>,\n",
" <Node: Temporal analysis of the honey bee microbiome reveals four novel viruses and seasonal prevalence of known viruses, Nosema, and Crithidia.>,\n",
" <Node: A quantitative model of honey bee colony population dynamics.>,\n",
" <Node: Detection of honey bee (Apis mellifera) viruses with an oligonucleotide microarray.>,\n",
" <Node: Interpretation of data underlying the link between colony collapse disorder (CCD) and an invertebrate iridescent virus.>,\n",
" <Node: RNA viruses in hymenopteran pollinators: evidence of inter-Taxa virus transmission via pollen and potential impact on non-Apis hymenopteran species.>,\n",
" <Node: Large-scale field application of RNAi technology reducing Israeli acute paralysis virus disease in honey bees (Apis mellifera, Hymenoptera: Apidae).>,\n",
" <Node: Weighing risk factors associated with bee colony collapse disorder by classification and regression tree analysis.>,\n",
" <Node: Iridovirus and microsporidian linked to honey bee colony decline.>,\n",
" <Node: Varroa destructor is an effective vector of Israeli acute paralysis virus in the honeybee, Apis mellifera.>,\n",
" <Node: The plight of the bees.>,\n",
" <Node: Sudden deaths and colony population decline in Greek honey bee colonies.>,\n",
" <Node: Colony Collapse Disorder in context.>,\n",
" <Node: Vanishing honey bees: Is the dying of adult worker bees a consequence of short telomeres and premature aging?>,\n",
" <Node: Prevention of Chinese sacbrood virus infection in Apis cerana using RNA interference.>,\n",
" <Node: Refined methodology for the determination of neonicotinoid pesticides and their metabolites in honey bees and bee products by liquid chromatography-tandem mass spectrometry (LC-MS/MS).>,\n",
" <Node: Ecology. Clarity on honey bee collapse?>,\n",
" <Node: Medium for development of bee cell cultures (Apis mellifera: Hymenoptera: Apidae).>,\n",
" <Node: Bee mystery continues.>,\n",
" <Node: Deformed wing virus.>,\n",
" <Node: The Acute bee paralysis virus-Kashmir bee virus-Israeli acute paralysis virus complex.>,\n",
" <Node: Translocation of neonicotinoid insecticides from coated seeds to seedling guttation drops: a novel way of intoxication for bees.>,\n",
" <Node: Deformed wing virus implicated in overwintering honeybee colony losses.>,\n",
" <Node: Changes in transcript abundance relating to colony collapse disorder in honey bees (Apis mellifera).>,\n",
" <Node: Colony collapse disorder: a descriptive study.>,\n",
" <Node: A PCR method of detecting American Foulbrood (Paenibacillus larvae) in winter beehive wax debris.>,\n",
" <Node: Honeybee colony collapse due to Nosema ceranae in professional apiaries.>,\n",
" <Node: IAPV, a bee-affecting virus associated with Colony Collapse Disorder can be silenced by dsRNA ingestion.>,\n",
" <Node: Energetic stress in the honeybee Apis mellifera from Nosema ceranae infection.>,\n",
" <Node: A survey of honey bee colony losses in the U.S., fall 2007 to spring 2008.>,\n",
" <Node: A qualitative model of mortality in honey bee (Apis mellifera) colonies infested with tracheal mites (Acarapis woodi).>,\n",
" <Node: First detection of Israeli acute paralysis virus (IAPV) in France, a dicistrovirus affecting honeybees (Apis mellifera).>,\n",
" <Node: Does pathogen spillover from commercially reared bumble bees threaten wild pollinators?>,\n",
" <Node: How natural infection by Nosema ceranae causes honeybee colony collapse.>,\n",
" <Node: Genetic analysis of Israel acute paralysis virus: distinct clusters are circulating in the United States.>,\n",
" <Node: The latest buzz about colony collapse disorder.>,\n",
" <Node: Native bees provide insurance against ongoing honey bee losses.>,\n",
" <Node: A metagenomic survey of microbes in honey bee colony collapse disorder.>,\n",
" <Node: Vertical-transmission routes for deformed wing virus of honeybees (Apis mellifera).>,\n",
" <Node: RT-PCR analysis of Deformed wing virus in honeybees (Apis mellifera) and mites (Varroa destructor).>]"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from node.models import Ngram"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" Ng"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"n = node.children.first()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for node_ngram in n.node_ngram_set.all():\n",
" print(node_ngram.ngram.terms)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n"
]
}
],
"prompt_number": 26
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
\ No newline at end of file
{
"metadata": {
"name": "",
"signature": "sha256:0383da299037d14e20f4be4cd7703cfddbdf0f947ee8f93f051f2ed6b7fe0cb5"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pycountry\n",
"from node.models import Language"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pycountry\n",
"\n",
"for language in pycountry.languages:\n",
" try:\n",
" implemented = 1 if language.alpha2 in ['en', 'fr'] else 0\n",
" Language(iso2=language.alpha2, iso3=language.terminology, fullname=language.name, implemented=implemented).save()\n",
" except:\n",
" pass\n",
" "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Language.objects.filter(implemented=1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"[<Language: English>, <Language: French>]"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for language in Language.objects.all():\n",
" print(language)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Afar\n",
"Abkhazian\n",
"Afrikaans\n",
"Akan\n",
"Albanian\n",
"Amharic\n",
"Arabic\n",
"Aragonese\n",
"Armenian\n",
"Assamese\n",
"Avaric\n",
"Avestan\n",
"Aymara\n",
"Azerbaijani\n",
"Bashkir\n",
"Bambara\n",
"Basque\n",
"Belarusian\n",
"Bengali\n",
"Bihari languages\n",
"Bislama\n",
"Bosnian\n",
"Breton\n",
"Bulgarian\n",
"Burmese\n",
"Catalan; Valencian\n",
"Chamorro\n",
"Chechen\n",
"Chinese\n",
"Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic\n",
"Chuvash\n",
"Cornish\n",
"Corsican\n",
"Cree\n",
"Czech\n",
"Danish\n",
"Divehi; Dhivehi; Maldivian\n",
"Dutch; Flemish\n",
"Dzongkha\n",
"English\n",
"Esperanto\n",
"Estonian\n",
"Ewe\n",
"Faroese\n",
"Fijian\n",
"Finnish\n",
"French\n",
"Western Frisian\n",
"Fulah\n",
"Georgian\n",
"German\n",
"Gaelic; Scottish Gaelic\n",
"Irish\n",
"Galician\n",
"Manx\n",
"Greek, Modern (1453-)\n",
"Guarani\n",
"Gujarati\n",
"Haitian; Haitian Creole\n",
"Hausa\n",
"Hebrew\n",
"Herero\n",
"Hindi\n",
"Hiri Motu\n",
"Croatian\n",
"Hungarian\n",
"Igbo\n",
"Icelandic\n",
"Ido\n",
"Sichuan Yi; Nuosu\n",
"Inuktitut\n",
"Interlingue; Occidental\n",
"Interlingua (International Auxiliary Language Association)\n",
"Indonesian\n",
"Inupiaq\n",
"Italian\n",
"Javanese\n",
"Japanese\n",
"Kalaallisut; Greenlandic\n",
"Kannada\n",
"Kashmiri\n",
"Kanuri\n",
"Kazakh\n",
"Central Khmer\n",
"Kikuyu; Gikuyu\n",
"Kinyarwanda\n",
"Kirghiz; Kyrgyz\n",
"Komi\n",
"Kongo\n",
"Korean\n",
"Kuanyama; Kwanyama\n",
"Kurdish\n",
"Lao\n",
"Latin\n",
"Latvian\n",
"Limburgan; Limburger; Limburgish\n",
"Lingala\n",
"Lithuanian\n",
"Luxembourgish; Letzeburgesch\n",
"Luba-Katanga\n",
"Ganda\n",
"Macedonian\n",
"Marshallese\n",
"Malayalam\n",
"Maori\n",
"Marathi\n",
"Malay\n",
"Malagasy\n",
"Maltese\n",
"Moldavian; Moldovan\n",
"Mongolian\n",
"Nauru\n",
"Navajo; Navaho\n",
"Ndebele, South; South Ndebele\n",
"Ndebele, North; North Ndebele\n",
"Ndonga\n",
"Nepali\n",
"Norwegian Nynorsk; Nynorsk, Norwegian\n",
"Bokm\u00e5l, Norwegian; Norwegian Bokm\u00e5l\n",
"Norwegian\n",
"Chichewa; Chewa; Nyanja\n",
"Occitan (post 1500)\n",
"Ojibwa\n",
"Oriya\n",
"Oromo\n",
"Ossetian; Ossetic\n",
"Panjabi; Punjabi\n",
"Persian\n",
"Pali\n",
"Polish\n",
"Portuguese\n",
"Pushto; Pashto\n",
"Quechua\n",
"Romansh\n",
"Romanian\n",
"Rundi\n",
"Russian\n",
"Sango\n",
"Sanskrit\n",
"Sinhala; Sinhalese\n",
"Slovak\n",
"Slovenian\n",
"Northern Sami\n",
"Samoan\n",
"Shona\n",
"Sindhi\n",
"Somali\n",
"Sotho, Southern\n",
"Spanish; Castilian\n",
"Sardinian\n",
"Serbian\n",
"Swati\n",
"Sundanese\n",
"Swahili\n",
"Swedish\n",
"Tahitian\n",
"Tamil\n",
"Tatar\n",
"Telugu\n",
"Tajik\n",
"Tagalog\n",
"Thai\n",
"Tibetan\n",
"Tigrinya\n",
"Tonga (Tonga Islands)\n",
"Tswana\n",
"Tsonga\n",
"Turkmen\n",
"Turkish\n",
"Twi\n",
"Uighur; Uyghur\n",
"Ukrainian\n",
"Urdu\n",
"Uzbek\n",
"Venda\n",
"Vietnamese\n",
"Volap\u00fck\n",
"Welsh\n",
"Walloon\n",
"Wolof\n",
"Xhosa\n",
"Yiddish\n",
"Yoruba\n",
"Zhuang; Chuang\n",
"Zulu\n"
]
}
],
"prompt_number": 11
}
],
"metadata": {}
}
]
}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
{
"metadata": {
"name": "",
"signature": "sha256:eac7c9b22e240bb0ef6d0aeec21261194d84a3f0ba53cd02af69f80d30ec5a17"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Testing if the ISI file parser works"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from parsing.FileParsers import *"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"parser = IsiFileParser(filepath='/home/mat/projects/gargantext/data_samples/isi.txt')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"parser.parse()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"{'title': 'Biogeographic and evolutionary implications of a diverse paleobiota in amber from the early Eocene of India', 'authors': 'Rust, J, Singh, H, Rana, RS, McCann, T, Singh, L, Anderson, K, Sarkar, N, Nascimbene, PC, Stebner, F, Thomas, JC, Kraemer, MS, Williams, CJ, Engel, MS, Sahni, A, Grimaldi, D', 'publication_year': '2010', 'publication_day': '26', 'doi': '10.1073/pnas.1007407107', 'abstract': 'For nearly 100 million years, the India subcontinent drifted from Gondwana until its collision with Asia some 50 Ma, during which time the landmass presumably evolved a highly endemic biota. Recent excavations of rich outcrops of 50-52-million-year-old amber with diverse inclusions from the Cambay Shale of Gujarat, western India address this issue. Cambay amber occurs in lignitic and muddy sediments concentrated by near-shore chenier systems; its chemistry and the anatomy of associated fossil wood indicates a definitive source of Dipterocarpaceae. The amber is very partially polymerized and readily dissolves in organic solvents, thus allowing extraction of whole insects whose cuticle retains microscopic fidelity. Fourteen orders and more than 55 families and 100 species of arthropod inclusions have been discovered thus far, which have affinities to taxa from the Eocene of northern Europe, to the Recent of Australasia, and the Miocene to Recent of tropical America. Thus, India just prior to or immediately following contact shows little biological insularity. A significant diversity of eusocial insects are fossilized, including corbiculate bees, rhinotermitid termites, and modern subfamilies of ants (Formicidae), groups that apparently radiated during the contemporaneous Early Eocene Climatic Optimum or just prior to it during the Paleocene-Eocene Thermal Maximum. Cambay amber preserves a uniquely diverse and early biota of a modern-type of broad-leaf tropical forest, revealing 50 Ma of stasis and change in biological communities of the dipterocarp primary forests that dominate southeastern Asia today.', 'fields': 'Multidisciplinary Sciences', 'publication_minute': '00', 'publication_month': '10', 'publication_hour': '00', 'publication_date': '2010-10-26 00:00:00', 'publication_second': '00', 'language': 'English'}\n",
"\n",
"{'title': 'Genomic survey of the ectoparasitic mite Varroa destructor, a major pest of the honey bee Apis mellifera', 'authors': 'Cornman, SR, Schatz, MC, Johnston, SJ, Chen, YP, Pettis, J, Hunt, G, Bourgeois, L, Elsik, C, Anderson, D, Grozinger, CM, Evans, JD', 'publication_year': '2010', 'publication_day': '25', 'doi': '10.1186/1471-2164-11-602', 'abstract': 'Background: The ectoparasitic mite Varroa destructor has emerged as the primary pest of domestic honey bees (Apis mellifera). Here we present an initial survey of the V. destructor genome carried out to advance our understanding of Varroa biology and to identify new avenues for mite control. This sequence survey provides immediate resources for molecular and population-genetic analyses of Varroa-Apis interactions and defines the challenges ahead for a comprehensive Varroa genome project. Results: The genome size was estimated by flow cytometry to be 565 Mbp, larger than most sequenced insects but modest relative to some other Acari. Genomic DNA pooled from similar to 1,000 mites was sequenced to 4.3x coverage with 454 pyrosequencing. The 2.4 Gbp of sequencing reads were assembled into 184,094 contigs with an N50 of 2,262 bp, totaling 294 Mbp of sequence after filtering. Genic sequences with homology to other eukaryotic genomes were identified on 13,031 of these contigs, totaling 31.3 Mbp. Alignment of protein sequence blocks conserved among V. destructor and four other arthropod genomes indicated a higher level of sequence divergence within this mite lineage relative to the tick Ixodes scapularis. A number of microbes potentially associated with V. destructor were identified in the sequence survey, including similar to 300 Kbp of sequence deriving from one or more bacterial species of the Actinomycetales. The presence of this bacterium was confirmed in individual mites by PCR assay, but varied significantly by age and sex of mites. Fragments of a novel virus related to the Baculoviridae were also identified in the survey. The rate of single nucleotide polymorphisms (SNPs) in the pooled mites was estimated to be 6.2 x 10(-5)per bp, a low rate consistent with the historical demography and life history of the species. Conclusions: This survey has provided general tools for the research community and novel directions for investigating the biology and control of Varroa mites. Ongoing development of Varroa genomic resources will be a boon for comparative genomics of under-represented arthropods, and will further enhance the honey bee and its associated pathogens as a model system for studying host-pathogen interactions.', 'fields': 'Biotechnology & Applied Microbiology; Genetics & Heredity', 'publication_minute': '00', 'publication_month': '10', 'publication_hour': '00', 'publication_date': '2010-10-25 00:00:00', 'publication_second': '00', 'language': 'English'}\n",
"\n",
"{'authors': 'Gadagkar, R', 'publication_year': '2010', 'publication_day': '25', 'title': 'Sociobiology in turmoil again', 'abstract': \"Altruism is defined as any behaviour that lowers the Darwinian fitness of the actor while increasing that of the recipient. Such altruism (especially in the form of lifetime sterility exhibited by sterile workers in eusocial insects such as ants, bees, wasps and termites) has long been considered a major difficulty for the theory of natural selection. In the 1960s W. D. Hamilton potentially solved this problem by defining a new measure of fitness that he called inclusive fitness, which also included the effect of an individual's action on the fitness of genetic relatives. This has come to be known as inclusive fitness theory, Hamilton's rule or kin selection. E. O. Wilson almost single-handedly popularized this new approach in the 1970s and thus helped create a large body of new empirical research and a large community of behavioural ecologists and kin selectionists. Adding thrill and drama to our otherwise sombre lives, Wilson is now leading a frontal attack on Hamilton's approach, claiming that the inclusive fitness theory is not as mathematically general as the standard natural selection theory, has led to no additional biological insights and should therefore be abandoned. The world cannot but sit up and take notice.\", 'fields': 'Multidisciplinary Sciences', 'publication_minute': '00', 'publication_month': '10', 'publication_hour': '00', 'publication_date': '2010-10-25 00:00:00', 'publication_second': '00', 'language': 'English'}\n",
"\n",
"{'authors': 'Nemesio, A', 'publication_year': '2010', 'publication_day': '25', 'title': 'The orchid-bee fauna (Hymenoptera: Apidae) of a forest remnant in northeastern Brazil, with new geographic records and an identification key to the known species of the Atlantic Forest of northeastern Brazil', 'abstract': 'The orchid bee fauna of Estacao Ecologica de Murici (ESEC Murici), in the state of Alagoas, one of the largest remnants of the Atlantic Rain Forest in northeastern Brazil, was surveyed for the first time. Seven hundred and twenty-one orchid-bee males belonging to 17 species were collected from the 3(rd) to the 10(th) of September, 2009. Besides the recently described Eulaema (Apeulaema) felipei Nemesio, 2010, three other species recorded at ESEC Murici deserve further attention: Euglossa amazonica Dressler, 1982b, recorded for the first time outside the Amazon Basin; Euglossa milenae Bembe, 2007 and Euglossa analis Westwood, 1840, both recorded for the first time in the Atlantic Forest of northeastern Brazil north to Sao Francisco river. These results together with previous samplings in the state of Alagoas reveal that at least 22 orchid-bee species are now known to occur there. Three other species not recorded for Alagoas yet are known from the neighbor states of Sergipe, Pernambuco, and Paraiba. An identification key to all 25 species of Euglossina known to occur in the states of Alagoas, Sergipe, Pernambuco, Paraiba, and Rio Grande do Norte is provided.', 'fields': 'Zoology', 'publication_minute': '00', 'publication_month': '10', 'publication_hour': '00', 'publication_date': '2010-10-25 00:00:00', 'publication_second': '00', 'language': 'English'}\n",
"\n",
"{'authors': 'Rozen, JG', 'publication_year': '2010', 'publication_day': '22', 'title': 'Immatures of the Old World Oil-Collecting Bee Ctenoplectra cornuta (Apoidea: Apidae: Apinae: Ctenoplectrini)', 'abstract': 'The mature oocyte, all five larval instars, and the pupa of Ctenoplectra cornuta Gribodo are described based upon specimens from Taiwan. Its mature larva though larger is compared with, and found similar to, that of the African Ctenoplectra armata Magretti, the only other larval ctenoplectrine studied to date. The egg index was similar to that of the African C. albolimbata Magretti. Although Ctenoplectra shares certain larval and pupal similarities with Tetrapedia (Tetrapediini), a broader study including representatives of all apine tribes needs to be considered for evaluating tribal relationships.', 'fields': 'Biodiversity Conservation; Zoology', 'publication_minute': '00', 'publication_month': '10', 'publication_hour': '00', 'publication_date': '2010-10-22 00:00:00', 'publication_second': '00', 'language': 'English'}\n",
"\n",
"{'title': 'E-beta-Ocimene, a Volatile Brood Pheromone Involved in Social Regulation in the Honey Bee Colony (Apis mellifera)', 'authors': 'Maisonnasse, A, Lenoir, JC, Beslay, D, Crauser, D, Le Conte, Y', 'publication_year': '2010', 'publication_day': '21', 'doi': '10.1371/journal.pone.0013531', 'abstract': 'Background: In honey bee colony, the brood is able to manipulate and chemically control the workers in order to sustain their own development. A brood ester pheromone produced primarily by old larvae (4 and 5 days old larvae) was first identified as acting as a contact pheromone with specific effects on nurses in the colony. More recently a new volatile brood pheromone has been identified: E-beta-ocimene, which partially inhibits ovary development in workers. Methodology and Principal Finding: Our analysis of E-beta-ocimene production revealed that young brood (newly hatched to 3 days old) produce the highest quantity of E-beta-ocimene relative to their body weight. By testing the potential action of this molecule as a non-specific larval signal, due to its high volatility in the colony, we demonstrated that in the presence of E-beta-ocimene nest workers start to forage earlier in life, as seen in the presence of real brood. Conclusions/Significance: In this way, young larvae are able to assign precedence to the task of foraging by workers in order to increase food stores for their own development. Thus, in the complexity of honey bee chemical communication, E-beta-ocimene, a pheromone of young larvae, provides the brood with the means to express their nutritional needs to the workers.', 'fields': 'Multidisciplinary Sciences', 'publication_minute': '00', 'publication_month': '10', 'publication_hour': '00', 'publication_date': '2010-10-21 00:00:00', 'publication_second': '00', 'language': 'English'}\n",
"\n",
"{'title': 'Differential Protein Expression in Honeybee (Apis mellifera L.) Larvae: Underlying Caste Differentiation', 'authors': 'Li, JK, Wu, J, Rundassa, DB, Song, FF, Zheng, AJ, Fang, Y', 'publication_year': '2010', 'publication_day': '20', 'doi': '10.1371/journal.pone.0013455', 'abstract': 'Honeybee (Apis mellifera) exhibits divisions in both morphology and reproduction. The queen is larger in size and fully developed sexually, while the worker bees are smaller in size and nearly infertile. To better understand the specific time and underlying molecular mechanisms of caste differentiation, the proteomic profiles of larvae intended to grow into queen and worker castes were compared at 72 and 120 hours using two dimensional electrophoresis (2-DE), network, enrichment and quantitative PCR analysis. There were significant differences in protein expression between the two larvae castes at 72 and 120 hours, suggesting the queen and the worker larvae have already decided their fate before 72 hours. Specifically, at 72 hours, queen intended larvae over-expressed transketolase, aldehyde reductase, and enolase proteins which are involved in carbohydrate metabolism and energy production, imaginal disc growth factor 4 which is a developmental related protein, long-chain-fatty-acid CoA ligase and proteasome subunit alpha type 5 which metabolize fatty and amino acids, while worker intended larvae over-expressed ATP synthase beta subunit, aldehyde dehydrogenase, thioredoxin peroxidase 1 and peroxiredoxin 2540, lethal (2) 37 and 14-3-3 protein epsilon, fatty acid binding protein, and translational controlled tumor protein. This differential protein expression between the two caste intended larvae was more pronounced at 120 hours, with particular significant differences in proteins associated with carbohydrate metabolism and energy production. Functional enrichment analysis suggests that carbohydrate metabolism and energy production and anti-oxidation proteins play major roles in the formation of caste divergence. The constructed network and validated gene expression identified target proteins for further functional study. This new finding is in contrast to the existing notion that 72 hour old larvae has bipotential and can develop into either queen or worker based on epigenetics and can help us to gain new insight into the time of departure as well as caste trajectory influencing elements at the molecular level.', 'fields': 'Multidisciplinary Sciences', 'publication_minute': '00', 'publication_month': '10', 'publication_hour': '00', 'publication_date': '2010-10-20 00:00:00', 'publication_second': '00', 'language': 'English'}\n",
"\n",
"{'title': 'The Influence of Gustatory and Olfactory Experiences on Responsiveness to Reward in the Honeybee', 'authors': 'Ramirez, GP, Martinez, AS, Fernandez, VM, Bielsa, GC, Farina, WM', 'publication_year': '2010', 'publication_day': '20', 'doi': '10.1371/journal.pone.0013498', 'abstract': 'Background: Honeybees (Apis mellifera) exhibit an extraordinarily tuned division of labor that depends on age polyethism. This adjustment is generally associated with the fact that individuals of different ages display different response thresholds to given stimuli, which determine specific behaviors. For instance, the sucrose-response threshold (SRT) which largely depends on genetic factors may also be affected by the nectar sugar content. However, it remains unknown whether SRTs in workers of different ages and tasks can differ depending on gustatory and olfactory experiences. Methodology: Groups of worker bees reared either in an artificial environment or else in a queen-right colony, were exposed to different reward conditions at different adult ages. Gustatory response scores (GRSs) and odor-memory retrieval were measured in bees that were previously exposed to changes in food characteristics. Principal Findings: Results show that the gustatory responses of pre-foraging-aged bees are affected by changes in sucrose solution concentration and also to the presence of an odor provided it is presented as scented sucrose solution. In contrast no differences in worker responses were observed when presented with odor only in the rearing environment. Fast modulation of GRSs was observed in older bees (12-16 days of age) which are commonly involved in food processing tasks within the hive, while slower modulation times were observed in younger bees (commonly nurse bees, 6-9 days of age). This suggests that older food-processing bees have a higher plasticity when responding to fluctuations in resource information than younger hive bees. Adjustments in the number of trophallaxis events were also found when scented food circulated inside the nest, and this was positively correlated with the differences in timing observed in gustatory responsiveness and memory retention for hive bees of different age classes. Conclusions: This work demonstrates the accessibility of chemosensory information in the honeybee colonies with respect to incoming nectar. The modulation of the sensory-response systems within the hive can have important effects on the dynamics of food transfer and information propagation.', 'fields': 'Multidisciplinary Sciences', 'publication_minute': '00', 'publication_month': '10', 'publication_hour': '00', 'publication_date': '2010-10-20 00:00:00', 'publication_second': '00', 'language': 'English'}\n",
"\n",
"{'title': 'In the Laboratory and during Free-Flight: Old Honey Bees Reveal Learning and Extinction Deficits that Mirror Mammalian Functional Decline', 'authors': 'Munch, D, Baker, N, Kreibich, CD, Braten, AT, Amdam, GV', 'publication_year': '2010', 'publication_day': '19', 'doi': '10.1371/journal.pone.0013504', 'abstract': 'Loss of brain function is one of the most negative and feared aspects of aging. Studies of invertebrates have taught us much about the physiology of aging and how this progression may be slowed. Yet, how aging affects complex brain functions, e.g., the ability to acquire new memory when previous experience is no longer valid, is an almost exclusive question of studies in humans and mammalian models. In these systems, age related cognitive disorders are assessed through composite paradigms that test different performance tasks in the same individual. Such studies could demonstrate that afflicted individuals show the loss of several and often-diverse memory faculties, and that performance usually varies more between aged individuals, as compared to conspecifics from younger groups. No comparable composite surveying approaches are established yet for invertebrate models in aging research. Here we test whether an insect can share patterns of decline similar to those that are commonly observed during mammalian brain aging. Using honey bees, we combine restrained learning with free-flight assays. We demonstrate that reduced olfactory learning performance correlates with a reduced ability to extinguish the spatial memory of an abandoned nest location ( spatial memory extinction). Adding to this, we show that learning performance is more variable in old honey bees. Taken together, our findings point to generic features of brain aging and provide the prerequisites to model individual aspects of learning dysfunction with insect models.', 'fields': 'Multidisciplinary Sciences', 'publication_minute': '00', 'publication_month': '10', 'publication_hour': '00', 'publication_date': '2010-10-19 00:00:00', 'publication_second': '00', 'language': 'English'}\n",
"\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Basic tests"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print(\"RE abcdefgh\\n\"[3:-1])\n",
"print(b\"english\".decode())"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"abcdefgh\n",
"english\n"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print(\"publication_date\"[-5:])\n",
"print(\"publication_date\"[:-5])"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import dateutil.parser"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"d = dateutil.parser.parse(\"2014 OCT 11 1:2:3\")"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"d.strftime(\"%Y-%m-%d %H:%M:%S\")"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"t = d.timetuple()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"d.strftime(\"%H\")\n"
],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
\ No newline at end of file
{
"metadata": {
"name": "",
"signature": "sha256:cb74945a57bed4d2ec124c7c05411b9346c7601e8339e613ddbc37fb950c4d86"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from parsing.NgramsExtractors import NgramsExtractorsCache"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c = NgramsExtractorsCache()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"en\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"<parsing.NgramsExtractors.EnglishNgramsExtractor.EnglishNgramsExtractor at 0x7f8d14947c88>"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"fre\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 4,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f8d24a97d30>"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"p = c[\"fr\"]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"p.extract_ngrams(\"En voil\u00e0 un beau parseur !\")"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"[[('beau', 'NN'), ('parseur', 'NN')]]"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"fr\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f8d24a97d30>"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"french\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 8,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f8d24a97d30>"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"german\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
"<parsing.NgramsExtractors.NgramsExtractor.NgramsExtractor at 0x7f8d24a979e8>"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"dutch\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
"<parsing.NgramsExtractors.NgramsExtractor.NgramsExtractor at 0x7f8d24a979e8>"
]
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"italian\"].extract_ngrams(\"Est-ce un texte ?\")"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Warning: parsing empty text\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 14,
"text": [
"[]"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
\ No newline at end of file
......@@ -3,23 +3,27 @@ from django.conf.urls import patterns, include, url
from django.contrib import admin
from gargantext_web.views import home, projects, project, corpus
from gargantext_web.views import add_corpus
from gargantext_web.views import add_corpus, delete_project, delete_corpus
admin.autodiscover()
urlpatterns = patterns('',
# Examples:
# url(r'^$', 'gargantext_web.views.home', name='home'),
# url(r'^blog/', include('blog.urls')),
url(r'^admin/', include(admin.site.urls)),
url(r'^login/', include(admin.site.urls)),
url(r'^grappelli/', include('grappelli.urls')), # grappelli URLS
url(r'^grappelli/', include('grappelli.urls')),
url(r'^$', home),
url(r'^add/corpus/$', add_corpus),
url(r'^projects/$', projects),
url(r'^project/(\d+)/delete/$', delete_project),
url(r'^project/(\d+)/$', project),
url(r'^project/(\d+)/add/$', add_corpus),
url(r'^project/(\d+)/corpus/(\d+)/$', corpus),
url(r'^project/(\d+)/corpus/(\d+)/delete$$', delete_corpus),
)
from django.conf import settings
......
......@@ -6,7 +6,10 @@ from django.template.loader import get_template
from django.template import Context
#from documents.models import Project, Corpus, Document
from node.models import Node, NodeType
from node.models import Language, DatabaseType, Resource
from node.models import Node, NodeType, Project, Corpus
from node.admin import CorpusForm, ProjectForm, ResourceForm
from django.contrib.auth.models import User
......@@ -18,6 +21,8 @@ from django import forms
from collections import defaultdict
from parsing.FileParsers import *
# SOME FUNCTIONS
def query_to_dicts(query_string, *query_args):
......@@ -82,18 +87,27 @@ def projects(request):
user = request.user
date = datetime.datetime.now()
project = NodeType.objects.get(name='Project')
projects = Node.objects.filter(user=user, type_id = project.id).order_by("-date")
project_type = NodeType.objects.get(name='Project')
projects = Node.objects.filter(user=user, type_id = project_type.id).order_by("-date")
number = len(projects)
form = ProjectForm()
if request.method == 'POST':
# form = ProjectForm(request.POST)
# TODO : protect from sql injection here
name = str(request.POST['name'])
if name != "" :
Project(name=name, type=project_type, user=user).save()
return HttpResponseRedirect('/projects/')
else:
form = ProjectForm()
html = t.render(Context({\
'user': user,\
'date': date,\
'projects': projects,\
'number': number,\
}))
return HttpResponse(html)
return render(request, 'projects.html', {
'date': date,
'form': form,
'number': number,
'projects': projects
})
def project(request, project_id):
if not request.user.is_authenticated():
......@@ -104,9 +118,7 @@ def project(request, project_id):
except ValueError:
raise Http404()
t = get_template('project.html')
user = request.user
date = datetime.datetime.now()
project = Node.objects.get(id=project_id)
......@@ -121,15 +133,68 @@ def project(request, project_id):
dashboard['count'] = corpus.children.count()
board.append(dashboard)
html = t.render(Context({\
'user': user,\
'date': date,\
'project': project,\
'board' : board,\
'number': number,\
}))
return HttpResponse(html)
if request.method == 'POST':
#form = CorpusForm(request.POST, request.FILES)
name = str(request.POST['name'])
try:
language = Language.objects.get(id=str(request.POST['language']))
except:
language = None
try:
bdd_type = DatabaseType.objects.get(id=str(request.POST['bdd_type']))
except:
bdd_type = None
try:
file = request.FILES['file']
except:
file = None
if language is not None and name != "" and bdd_type != None and file != None :
resource = Resource(user=request.user, guid=str(date), bdd_type=bdd_type, file=file)
resource.save()
node_type = NodeType.objects.get(name='Corpus')
parent = Node.objects.get(id=project_id)
node = Node(parent=parent, type=node_type, name=name, user=request.user, language=language)
node.save()
node.resource.add(resource)
try:
for resource in node.resource.all():
print(resource.bdd_type.name)
if resource.bdd_type.name == "PubMed":
fileparser = PubmedFileParser(file='/var/www/gargantext/media/' + str(resource.file))
fileparser.parse(node)
elif resource.bdd_type.name == "Web Of Science (WOS), ISI format":
fileparser = IsiParser(file='/var/www/gargantext/media/' + str(resource.file))
fileparser.parse(node)
elif node.bdd_type.name == "Europresse":
pass
except Exception as error:
print(error)
return HttpResponseRedirect('/project/' + str(project_id))
else:
form = CorpusForm(request=request)
formResource = ResourceForm()
else:
form = CorpusForm(request=request)
formResource = ResourceForm()
return render(request, 'project.html', {
'form': form,
'formResource': formResource,
'user': user,
'date': date,
'project': project,
'board' : board,
'number': number,
})
def corpus(request, project_id, corpus_id):
if not request.user.is_authenticated():
......@@ -191,6 +256,7 @@ def corpus(request, project_id, corpus_id):
try:
dates = dict()
# query_to_dicts('''select to_char(t1.date, '%s'), count(*)
# from documents_document as t1
# INNER JOIN documents_document_corpus as t2
......@@ -227,42 +293,45 @@ def corpus(request, project_id, corpus_id):
return HttpResponse(html)
from node.admin import CorpusForm
class NameForm(forms.Form):
your_name = forms.CharField(label='Your name', max_length=100)
sender = forms.EmailField()
message = forms.CharField(widget=forms.Textarea)
fichier = forms.FileField()
def add_corpus(request):
# if this is a POST request we need to process the form data
#print(request.method)
form = CorpusForm(request=request)
if request.method == 'POST':
# create a form instance and populate it with data from the request:
form = CorpusForm(request.POST, request.FILES)
# check whether it's valid:
if form.is_valid():
form.save()
# process the data in form.cleaned_data as required
# corpus.user = request.user
# print(form.cleaned_data['name'])
try:
print(type(form.cleaned_data['fichier']))
print("here we parse" + str(form.cleaned_data['fichier']))
except Exception as error:
print(error)
# redirect to a new URL:
return HttpResponseRedirect('/projects/')
#form = CorpusForm(request.POST, request.FILES)
name = str(request.POST['name'])
try:
#language = Language.objects.get(name=str(request.POST['language']))
language = Language.objects.get(name='French')
except Exception as e:
print(e)
language = None
if name != "" :
project_id = 1047
node_type = NodeType.objects.get(name='Corpus')
parent = Node.objects.get(id=project_id)
Corpus(parent=parent, type=node_type, name=name, user=request.user, language=language).save()
# try:
# for resource in node.resource.all():
# fileparser = PubmedFileParser.PubmedFileParser(file='/var/www/gargantext/media/' + str(resource.file))
# fileparser.parse(node)
#
# except Exception as error:
# print(error)
return HttpResponseRedirect('/project/' + str(project_id))
# if a GET (or any other method) we'll create a blank form
else:
form = CorpusForm(request=request)
return render(request, 'add_corpus.html', {'form': form})
print("5")
def delete_project(request, node_id):
Node.objects.filter(id=node_id).all().delete()
return HttpResponseRedirect('/projects/')
def delete_corpus(request, project_id, corpus_id):
Node.objects.filter(id=corpus_id).all().delete()
return HttpResponseRedirect('/project/' + project_id)
{
"metadata": {
"name": "",
"signature": "sha256:7d01da7300982ebf3acd799b54b93beda7dec63ba8f164356465e08a34dc3311"
"signature": "sha256:d0ac96b232bdca40d2b67ddfc85c941e41c3760733e29c981ec727196317e1a1"
},
"nbformat": 3,
"nbformat_minor": 0,
......@@ -1602,7 +1602,6 @@
"cell_type": "code",
"collapsed": false,
"input": [
"\n",
"SELECT t1.terms_id, t2.terms_id, COUNT(*) AS c, t3.project_id\n",
"FROM documents_ngramdocument AS t1\n",
"\n",
......@@ -1620,6 +1619,16 @@
"outputs": [],
"prompt_number": 20
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"select t1.terms_id , \n"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
......
......@@ -55,7 +55,8 @@ python manage.py syncdb
Start the Python Notebook server
--------------------------------
1) In Pyvenv: python manage.py shell_plus --notebook
1) In Pyvenv:
python manage.py shell_plus --notebook
2) Work from your browser!
......@@ -63,4 +64,5 @@ Start the Python Notebook server
Start the Django server
-----------------------
In Pyvenv:
python manage.py runserver
\ No newline at end of file
from django.contrib import admin
from ngram.models import Ngram, NodeNgram, NodeNgramNgram
admin.site.register(Ngram)
admin.site.register(NodeNgram)
admin.site.register(NodeNgramNgram)
#from ngram.models import Ngram, NodeNgram, NodeNgramNgram
#admin.site.register(Ngram)
#admin.site.register(NodeNgram)
#admin.site.register(NodeNgramNgram)
#
......@@ -3,30 +3,30 @@ from django.utils import timezone
from django.contrib.auth.models import User
from node.models import Node
class Ngram(models.Model):
terms = models.TextField(unique=True)
n = models.IntegerField()
def __str__(self):
return "[%d] %s" % (self.pk, self.terms)
class NodeNgram(models.Model):
node = models.ForeignKey(Node)
ngram = models.ForeignKey(Ngram, related_name="nodengram")
def __str__(self):
return "%s: %s" % (self.node.name, self.ngram.terms)
class NodeNgramNgram(models.Model):
node = models.ForeignKey(Node)
ngramX = models.ForeignKey(Ngram, related_name="nodengramngramx")
ngramY = models.ForeignKey(Ngram, related_name="nodengramngramy")
score = models.FloatField(default=0)
def __str__(self):
return "%s: %s / %s" % (self.node.name, self.ngramX.terms, self.ngramY.terms)
from node.models import Node, Language
#class Ngram(models.Model):
# language = models.ForeignKey(Language, blank=True, null=True, on_delete=models.SET_NULL)
# n = models.IntegerField()
# terms = models.CharField(max_length=255)
# def __str__(self):
# return "[%d] %s" % (self.pk, self.terms)
#
#class NodeNgram(models.Model):
# node = models.ForeignKey(Node)
# ngram = models.ForeignKey(Ngram, related_name="nodengram")
# def __str__(self):
# return "%s: %s" % (self.node.name, self.ngram.terms)
#
#class NodeNgramNgram(models.Model):
# node = models.ForeignKey(Node)
#
# ngramX = models.ForeignKey(Ngram, related_name="nodengramngramx")
# ngramY = models.ForeignKey(Ngram, related_name="nodengramngramy")
#
# score = models.FloatField(default=0)
#
# def __str__(self):
# return "%s: %s / %s" % (self.node.name, self.ngramX.terms, self.ngramY.terms)
#
#
......@@ -2,14 +2,14 @@ from django.contrib import admin
from django.forms import ModelForm, ModelChoiceField
from nested_inlines.admin import NestedModelAdmin, NestedStackedInline, NestedTabularInline
from node.models import NodeType, Language, Node, Project, Corpus, Document, DatabaseType, Resource
from node.models import NodeType, Language, Node, Project, Corpus, Document, DatabaseType, Resource, Node_Ngram
class ResourceInLine(admin.TabularInline):
model = Resource
extra = 0
class NodeAdmin(admin.ModelAdmin):
exclude = ('user', 'path', 'depth', 'numchild')
exclude = ('user', 'path', 'depth', 'numchild', 'ngrams')
list_display = ('name', 'date')
search_fields = ('name',)
# list_filter = ('type',)
......@@ -75,21 +75,35 @@ class ProjectAdmin(NodeAdmin):
from django.db.models.query import EmptyQuerySet
class ProjectForm(ModelForm):
class Meta:
model = Project
exclude = ['ngrams', 'metadata', 'resource', 'parent', 'user', 'type', 'language', 'date']
class ResourceForm(ModelForm):
class Meta:
model = Resource
exclude = ['user', 'guid']
class CorpusForm(ModelForm):
#parent = ModelChoiceField(EmptyQuerySet)
def __init__(self, *args, **kwargs):
try:
self.request = kwargs.pop('request', None)
super(CorpusForm, self).__init__(*args, **kwargs)
parent_type = NodeType.objects.get(name="Project")
#parent_type = NodeType.objects.get(name=self._parent_nodetype_name)
self.fields['parent'].queryset = Node.objects.filter(user_id=self.request.user.id, type_id=parent_type.id)
except:
pass
# self.fields['parent'].queryset = Node.objects.filter(
# user_id=self.request.user.id,
# type_id=parent_type.id
# )
self.fields['language'].queryset = Language.objects.filter(implemented=1)
except Exception as error:
print("Error with", error)
class Meta:
model = Corpus
model = Corpus
exclude = ['parent', 'user', 'type', 'ngrams', 'metadata', 'resource', 'date']
class CorpusAdmin(NodeAdmin):
_parent_nodetype_name = 'Project'
......@@ -123,4 +137,5 @@ admin.site.register(Project, ProjectAdmin)
admin.site.register(Corpus, CorpusAdmin)
admin.site.register(Document, DocumentAdmin)
admin.site.register(Node_Ngram)
......@@ -33,22 +33,28 @@ class DatabaseType(models.Model):
def __str__(self):
return self.name
class Ngram(models.Model):
language = models.ForeignKey(Language, blank=True, null=True, on_delete=models.SET_NULL)
n = models.IntegerField()
terms = models.CharField(max_length=255)
class Resource(models.Model):
user = models.ForeignKey(User)
guid = models.CharField(max_length=255)
bdd_type = models.ForeignKey(DatabaseType, blank=True, null=True)
file = models.FileField(upload_to=upload_to, blank=True)
def __str__(self):
return "%s => %s" % (self.bdd_type, self.file)
class NodeType(models.Model):
name = models.CharField(max_length=200)
def __str__(self):
return self.name
class Ngram(models.Model):
language = models.ForeignKey(Language, blank=True, null=True, on_delete=models.SET_NULL)
n = models.IntegerField()
terms = models.CharField(max_length=255)
def __str__(self):
return "[%d] %s" % (self.pk, self.terms)
class Node(CTENode):
objects = Manager()
......@@ -62,7 +68,7 @@ class Node(CTENode):
metadata = hstore.DictionaryField(blank=True)
resource = models.ManyToManyField(Resource, blank=True)
ngrams = models.ManyToManyField(Ngram, blank=True)
ngrams = models.ManyToManyField(Ngram, blank=True, help_text="Hold down")
def __str__(self):
......@@ -72,16 +78,18 @@ class Node(CTENode):
for noeud in Node.objects.filter(user=user):
print(noeud.depth * " " + "[%d] %d" % (noeud.pk, noeud.name))
class Node_Ngram(models.Model):
node = models.ForeignKey(Node, on_delete=models.CASCADE)
ngram = models.ForeignKey(Ngram, on_delete=models.CASCADE)
occurences = models.IntegerField()
class Project(Node):
class Meta:
proxy=True
class CorpusManager(models.Manager):
def get_query_set(self):
corpus_type = NodeType.objects.get(name='Corpus')
return super(CorpusManager, self).get_query_set().filter(type=corpus_type)
class Corpus(Node):
objects = CorpusManager()
class Meta:
proxy=True
verbose_name_plural = 'Corpora'
......@@ -90,4 +98,28 @@ class Document(Node):
class Meta:
proxy=True
############################
# NGRAMS
############################
class Node_Ngram(models.Model):
node = models.ForeignKey(Node, on_delete=models.CASCADE)
ngram = models.ForeignKey(Ngram, on_delete=models.CASCADE)
occurences = models.IntegerField()
def __str__(self):
return "%s: %s" % (self.node.name, self.ngram.terms)
class NodeNgramNgram(models.Model):
node = models.ForeignKey(Node)
ngramX = models.ForeignKey(Ngram, related_name="nodengramngramx", on_delete=models.CASCADE)
ngramY = models.ForeignKey(Ngram, related_name="nodengramngramy", on_delete=models.CASCADE)
score = models.FloatField(default=0)
def __str__(self):
return "%s: %s / %s" % (self.node.name, self.ngramX.terms, self.ngramY.terms)
#import FileParser
#
#class EuropressFileParser(FileParser, contents):
#
# def parse():
# pass
#
from parsing.FileParsers.FileParser import FileParser
class EuropressFileParser(FileParser):
def parse():
pass
import collections
from node.models import Node, NodeType, Language, Ngram, Node_Ngram
from parsing.NgramsExtractors import *
import collections
import dateutil.parser
class NgramCache:
"""
This allows the fast retrieval of ngram ids
......@@ -18,7 +21,7 @@ class NgramCache:
try:
ngram = Ngram.get(terms=terms, language=self._language)
except:
ngram = Ngram(terms=terms, n=len(terms), language=self._language)
ngram = Ngram(terms=terms, n=len(terms.split()), language=self._language)
ngram.save()
self._cache[terms] = ngram
return self._cache[terms]
......@@ -48,6 +51,7 @@ class FileParser:
self._extractors = dict()
self._document_nodetype = NodeType.objects.get(name='Document')
languages = Language.objects.all()
self._languages_fullname = {language.fullname.lower(): language for language in languages}
self._languages_iso2 = {language.iso2.lower(): language for language in languages}
self._languages_iso3 = {language.iso3.lower(): language for language in languages}
#self.parse()
......@@ -85,6 +89,7 @@ class FileParser:
"""Add a document to the database.
"""
def create_document(self, parentNode, title, contents, language, metadata, guid=None):
metadata = self.format_metadata(metadata)
# create or retrieve a resource for that document, based on its user id
# if guid is None:
# resource = Resource(guid=guid)
......@@ -98,6 +103,10 @@ class FileParser:
# if parentNode.descendants().filter(resource=resource).exists():
# return None
# create the document itself
if len(title) > 200:
title = title[:200]
childNode = Node(
user = parentNode.user,
type = self._document_nodetype,
......@@ -137,3 +146,51 @@ class FileParser:
def parse(self):
return list()
def format_metadata_dates(self, metadata):
"""Format the dates found in the metadata.
Example: {"publication_date": "2014-10-23 09:57:42"} -> {...}
"""
# First, check the split dates...
prefixes = [key[:-5] for key in metadata.keys() if key[-5:] == "_year"]
for prefix in prefixes:
date_string = metadata[prefix + "_year"]
key = prefix + "_month"
if key in metadata:
date_string += " " + metadata[key]
key = prefix + "_day"
if key in metadata:
date_string += " " + metadata[key]
key = prefix + "_hour"
if key in metadata:
date_string += " " + metadata[key]
key = prefix + "_minute"
if key in metadata:
date_string += ":" + metadata[key]
key = prefix + "_second"
if key in metadata:
date_string += ":" + metadata[key]
try:
metadata[prefix + "_date"] = dateutil.parser.parse(date_string).strftime("%Y-%m-%d %H:%M:%S")
except:
pass
# ...then parse all the "date" fields, to parse it into separate elements
prefixes = [key[:-5] for key in metadata.keys() if key[-5:] == "_date"]
for prefix in prefixes:
date = dateutil.parser.parse(metadata[prefix + "_date"])
metadata[prefix + "_year"] = date.strftime("%Y")
metadata[prefix + "_month"] = date.strftime("%m")
metadata[prefix + "_day"] = date.strftime("%d")
metadata[prefix + "_hour"] = date.strftime("%H")
metadata[prefix + "_minute"] = date.strftime("%M")
metadata[prefix + "_second"] = date.strftime("%S")
# finally, return the result!
return metadata
def format_metadata(self, metadata):
"""Format the metadata."""
metadata = self.format_metadata_dates(metadata)
return metadata
from django.db import transaction
from FileParser import FileParser
from parsing.FileParsers.RisFileParser import RisFileParser
class IsiFileParser(FileParser):
class IsiFileParser(RisFileParser):
def parse(self, parentNode):
# read the file, line by line
for line in self.__file:
# open the file as XML
xml_parser = etree.XMLParser(resolve_entities=False, recover=True)
xml = etree.parse(self._file, parser=xml_parser)
# parse all the articles, one by one
# all database operations should be performed within one transaction
xml_articles = xml.findall('PubmedArticle')
with transaction.atomic():
for xml_article in xml_articles:
# extract data from the document
date_year = int(xml_article.find('MedlineCitation/DateCreated/Year').text)
date_month = int(xml_article.find('MedlineCitation/DateCreated/Month').text)
date_day = int(xml_article.find('MedlineCitation/DateCreated/Day').text)
metadata = {
# other metadata should also be included:
# authors, submission date, etc.
"date_pub": datetime.date(year, month, day),
"journal": xml_article.find('MedlineCitation/Article/Journal/Title').text
"title": xml_article.find('MedlineCitation/Article/ArticleTitle').text
"language_iso3": xml_article.find('MedlineCitation/Article/Language').text
"doi": xml_article.find('PubmedData/ArticleIdList/ArticleId[type=doi]').text
}
contents = xml_article.find('MedlineCitation/Article/Abstract/AbstractText').text
# create the document in the database
yield self.create_document(
parentNode = parentNode
title = metadata["title"],
contents = contents,
language = self._languages_iso3[metadata["language"].lower()]
metadata = metadata,
guid = metadata["doi"],
)
_parameters = {
b"ER": {"type": "delimiter"},
b"TI": {"type": "metadata", "key": "title", "separator": " "},
b"AU": {"type": "metadata", "key": "authors", "separator": ", "},
b"DI": {"type": "metadata", "key": "doi"},
b"PY": {"type": "metadata", "key": "publication_year"},
b"PD": {"type": "metadata", "key": "publication_month"},
b"LA": {"type": "metadata", "key": "language"},
b"AB": {"type": "metadata", "key": "abstract", "separator": " "},
b"WC": {"type": "metadata", "key": "fields"},
}
......@@ -7,7 +7,7 @@ import datetime
class PubmedFileParser(FileParser):
def parse(self, parentNode, tag=True):
def parse(self, parentNode=None, tag=True):
# open the file as XML
xml_parser = etree.XMLParser(resolve_entities=False, recover=True)
documents = []
......@@ -16,7 +16,6 @@ class PubmedFileParser(FileParser):
with zipfile.ZipFile(self._file) as zipFile:
for filename in zipFile.namelist():
file = zipFile.open(filename, "r")
# print(file.read())
xml = etree.parse(file, parser=xml_parser)
# parse all the articles, one by one
......@@ -24,19 +23,17 @@ class PubmedFileParser(FileParser):
xml_articles = xml.findall('PubmedArticle')
for xml_article in xml_articles:
# extract data from the document
date_year = int(xml_article.find('MedlineCitation/DateCreated/Year').text)
date_month = int(xml_article.find('MedlineCitation/DateCreated/Month').text)
date_day = int(xml_article.find('MedlineCitation/DateCreated/Day').text)
metadata = {
"date_pub": '%s-%s-%s' % (date_year, date_month, date_day),
}
metadata = {}
metadata_path = {
"journal" : 'MedlineCitation/Article/Journal/Title',
"title" : 'MedlineCitation/Article/ArticleTitle',
"language_iso3" : 'MedlineCitation/Article/Language',
"doi" : 'PubmedData/ArticleIdList/ArticleId[type=doi]',
"abstract" : 'MedlineCitation/Article/Abstract/AbstractText'
}
"journal" : 'MedlineCitation/Article/Journal/Title',
"title" : 'MedlineCitation/Article/ArticleTitle',
"language_iso3" : 'MedlineCitation/Article/Language',
"doi" : 'PubmedData/ArticleIdList/ArticleId[type=doi]',
"abstract" : 'MedlineCitation/Article/Abstract/AbstractText',
"publication_year" : 'MedlineCitation/DateCreated/Year',
"publication_month" : 'MedlineCitation/DateCreated/Month',
"publication_day" : 'MedlineCitation/DateCreated/Day',
}
for key, path in metadata_path.items():
try:
node = xml_article.find(path)
......
from django.db import transaction
from parsing.FileParsers.FileParser import FileParser
class RisFileParser(FileParser):
_parameters = {
}
def _parse(self, parentNode, file):
metadata = {}
last_key = None
last_values = []
with transaction.atomic():
for line in self._file:
if len(line) > 2:
parameter_key = line[:2]
if parameter_key != b' ' and parameter_key != last_key:
if last_key in self._parameters:
parameter = self._parameters[last_key]
if parameter["type"] == "metadata":
separator = parameter["separator"] if "separator" in parameter else ""
metadata[parameter["key"]] = separator.join(last_values)
elif parameter["type"] == "delimiter":
language = self._languages_fullname[metadata["language"].lower()]
self.create_document(
parentNode = parentNode,
title = metadata["title"],
metadata = metadata,
guid = metadata["doi"]
)
# print(self.format_metadata(metadata))
# print()
metadata = {}
last_key = parameter_key
last_values = []
last_values.append(line[3:-1].decode())
self._file.close()
#from parsing.FileParsers import EuropressFileParser
from parsing.FileParsers import PubmedFileParser
from parsing.FileParsers.IsiFileParser import IsiFileParser
from parsing.FileParsers.PubmedFileParser import PubmedFileParser
from parsing.FileParsers.EuropressFileParser import EuropressFileParser
SELECT
177 as node_id, x.ngram_id as ngramX_id, y.ngram_id as ngramY_id, COUNT(*) AS score
FROM
node_node_ngram AS x
INNER JOIN
node_node_ngram AS y
ON x.node_id = y.node_id
WHERE
x.id NOT IN (SELECT id FROM node_node_ngram WHERE node_id = 174 )
AND
y.id NOT IN (SELECT id from node_node_ngram WHERE node_id = 174 )
AND
x.ngram_id <> y.ngram_id
GROUP BY
x.ngram_id, y.ngram_id
LIMIT 10
SELECT
100 as "NodeType Cooc", x.ngram_id, y.ngram_id, SQRT(SUM(x.occurences * y.occurences)) AS score
FROM
node_node_ngram AS x
INNER JOIN
node_node_ngram AS y
ON
x.node_id = y.node_id
AND
x.ngram_id <> y.ngram_id
GROUP BY
x.ngram_id, y.ngram_id
SELECT
id, 177 as node_id, x.ngram_id as ngramX_id, y.ngram_id as ngramY_id, COUNT(*) AS score
FROM
node_node_ngram AS x
INNER JOIN
node_node_ngram AS y
ON
x.node_id = y.node_id
AND
x.ngram_id <> y.ngram_id
GROUP BY
x.ngram_id, y.ngram_id
-- TODO Add count for synonyms
SELECT
177 as node_id, x.ngram_id as ngramX_id, y.ngram_id as ngramY_id, COUNT(*) AS score
FROM
node_node_ngram AS x
INNER JOIN
node_node_ngram AS y
ON x.node_id = y.node_id
WHERE
x.id IN (SELECT id FROM node_node_ngram WHERE node_id = 173 )
AND
y.id IN (SELECT id FROM node_node_ngram WHERE node_id = 173 )
AND
x.ngram_id <> y.ngram_id
GROUP BY
x.ngram_id, y.ngram_id
LIMIT 10
INSERT INTO node_nodengramngram (node_id, "ngramX_id", "ngramY_id", score)
SELECT
177 as node_id, x.ngram_id, y.ngram_id, COUNT(*) AS score
FROM
node_node_ngram AS x
INNER JOIN
node_node_ngram AS y
ON x.node_id = y.node_id
WHERE
x.id in (select id from node_node_ngram WHERE node_id = 173 )
AND
y.id in (select id from node_node_ngram WHERE node_id = 173 )
AND
x.ngram_id <> y.ngram_id
GROUP BY
x.ngram_id, y.ngram_id
LIMIT 1000
......@@ -17,8 +17,10 @@
<h1>Title</h1>
<form enctype="multipart/form-data" action="/add/corpus/" method="post">
{% csrf_token %}
{{ form.as_p }}
<input type="submit" value="Save" />
{{ form.non_field_errors }}
{{ form.as_p}}
<input type="submit" value="Save" />
</form>
</div>
</div>
......
{% extends "menu.html" %}
{% block css %}
{% load staticfiles %}
<link rel="stylesheet" href="{% static "css/bootstrap.css" %}">
<link rel="stylesheet" href="{% static "css/bootstrap-theme.min.css" %}">
{% endblock %}
{% block content %}
<div class="container theme-showcase" role="main">
<div class="jumbotron">
<h1>Title</h1>
<form enctype="multipart/form-data" action="/add/corpus/" method="post">
{% csrf_token %}
{{ form.non_field_errors }}
{{ form.as_p}}
<input type="submit" value="Save" />
</form>
</div>
</div>
{% endblock %}
{% extends "menu.html" %}
{% block css %}
{% load staticfiles %}
<link rel="stylesheet" href="{% static "css/bootstrap.css" %}">
<link rel="stylesheet" href="{% static "css/bootstrap-theme.min.css" %}">
{% endblock %}
{% block content %}
<div class="container theme-showcase" role="main">
<div class="jumbotron">
<h1>Title</h1>
<form enctype="multipart/form-data" action="/add/corpus/" method="post">
{% csrf_token %}
{{ form.non_field_errors }}
<div class="fieldWrapper">
<label for="id_date">Date :</label>
{{ form.date.errors }}
{{ form.date }}
</div>
<div class="fieldWrapper">
<label for="id_type">Type :</label>
{{ form.type.errors }}
{{ form.type }}
</div>
<div class="fieldWrapper">
<label for="id_user">User :</label>
{{ form.user.errors }}
{{ form.user }}
</div>
<div class="fieldWrapper">
<label for="id_name">Corpus name :</label>
{{ form.name.errors }}
{{ form.name }}
</div>
<div class="fieldWrapper">
<label for="id_parent">Parent :</label>
{{ form.parent.errors }}
<p>{{ form.parent }}</p>
</div>
<div class="fieldWrapper">
<label for="id_language">Language:</label>
{{ form.language.errors }}
<p>{{ form.language }}</p>
</div>
<div class="fieldWrapper">
<label for="id_metadata">Metadata:</label>
{{ form.metadata.errors }}
<p>{{ form.metadata }}</p>
</div>
<div class="fieldWrapper">
<label for="id_resource">Files :</label>
{{ form.resource.errors }}
<p>{{ form.resource }}</p>
</div>
<div class="fieldWrapper">
<label for="id_ngrams">{{ form.ngrams.label }}</label>
<p>
{{ form.ngrams.errors }}
{{ form.ngrams.help_text }}
{{ form.ngrams }}</p>
</div>
<input type="submit" value="Save" />
</form>
</div>
</div>
{% endblock %}
{% extends "menu.html" %}
{% block css %}
{% load staticfiles %}
<link rel="stylesheet" href="{% static "css/bootstrap.css" %}">
<link rel="stylesheet" href="{% static "css/bootstrap-theme.min.css" %}">
{% endblock %}
{% block content %}
<div class="container theme-showcase" role="main">
<div class="jumbotron">
<h1>Gargantext</h1>
<p>A web platform to explore text-mining</p>
</div>
</div>
<div class="container">
<div class="row">
<div class="col-md-4 content">
<h3>Presentation</h3>
<p>
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</div>
<div class="col-md-4 content">
<h3>Historic</h3>
<p>
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</div>
<div class="col-md-4 content">
<h3>Tutorials</h3>
<p>
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</div>
</div>
</div>
{% endblock %}
......@@ -22,7 +22,7 @@
<div class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<li><a href="/admin/">Admin</a></li>
<li><a href="/projects/">Projects</a></li>
<li><a href="/projects/">My Projects</a></li>
<li><a href="/contact/">Contact</a></li>
</ul>
......@@ -75,6 +75,16 @@ $(function() {
</script>
<script src="{% static "js/bootstrap.min.js" %}"></script>
<script>$(function () { $("[data-toggle='popover']").popover({
html:true,
title: function() {
return $("#popover-head").html();
},
content: function() {
return $("#popover-content").html();
}
});});</script>
</body>
</html>
......@@ -15,23 +15,56 @@
<div class="container theme-showcase" role="main">
<div class="jumbotron">
<div class="row">
<div class="col-md-3">
{% if project %}
<h1>{{ project.name }}</h1>
<h3> {{number}} corpora </h3>
<p>
<a class="btn btn-primary btn-lg" role="button" href="/add/corpus/">Add a corpus</a></p>
{% endif %}
</div>
<div class="col-md-4">
<button
type="button"
class="btn btn-primary btn-lg"
data-container="body"
data-toggle="popover"
data-placement="bottom"
>Add a corpus</button>
<div id="popover-content" class="hide">
<form enctype="multipart/form-data" action="/project/{{project.id}}/" method="post">
{% csrf_token %}
{{ form.non_field_errors }}
{{ form.as_p}}
{{ formResource.non_field_errors }}
{{ formResource.as_p}}
<input type="submit" class="btn" value="Add this corpus" />
</form>
</div>
</div>
</div>
</div>
</div>
</div>
<!-- Add jumbotron container for each type of coprus (presse, science etc.) --!>
<!-- Add jumbotron container for each type of corpus (presse, science etc.) --!>
<div class="container">
<div class="row">
{% if board %}
{% for corpus in board %}
<div class="col-md-4">
<h3><a href="/project/{{project.id}}/corpus/{{corpus.id}}">{{corpus.name}}</a></h3>
<h3><a href="/project/{{project.id}}/corpus/{{corpus.id}}">{{corpus.name}}</a>
<button type="button" class="btn btn-xs btn-default" data-container="body" data-toggle="popover" data-placement="bottom" data-content='<a href="/project/{{ project.id }}/corpus/{{ corpus.id}}/delete">Delete it!</a>'>Manage</button>
</h3>
<h4>{{ corpus.count }} Documents </h4>
<p>{{ corpus.language }} {{ corpus.database}}</p>
<h5>Activity:</h5>
......
......@@ -13,9 +13,26 @@
<div class="container theme-showcase" role="main">
<div class="jumbotron">
<div class="row">
<div class="col-md-3">
<h1>My {{number}} projects</h1>
<p>Template showing my working space</p>
<a class="btn btn-primary btn-lg" role="button" href="/admin/node/project/add/">Add a project</a>
</div>
<div class="col-md-4"></div>
<div class="col-md-4">
<button
type="button"
class="btn btn-primary btn-lg"
data-container="body"
data-toggle="popover"
data-placement="bottom"
>Add a project</button>
<div id="popover-content" class="hide">
<form enctype='multipart/form-data' action='/projects/' method='post'>{% csrf_token %}{{ form.non_field_errors }}{{ form.as_p}}<input type='submit' class="btn" value='Add this project !'/></form>
</div>
</div>
</div>
</div>
</div>
</div>
......@@ -25,7 +42,10 @@
{% for project in projects %}
<!--<div class="col-md-offset-7 col-md-4 content" style="background-color:grey">!-->
<div class="col-md-3 content">
<h3><a href="/project/{{ project.id }}">{{ project.name }}</a></h3>
<h3><a href="/project/{{ project.id }}">{{ project.name }}</a>
<button type="button" class="btn btn-xs btn-default" data-container="body" data-toggle="popover" data-placement="bottom" data-content='<a href="/project/{{ project.id }}/delete">Delete it!</a>'>Manage</button>
</h3>
<h4>{{ project.subtitle }}<h4>
<h5>Completed:</h5>
<div class="chart" barColor="#fffff" data-percent="75">75%</div>
......
{
"metadata": {
"name": "",
"signature": "sha256:6df2ce47b09a6203b244f7b4dc27f3346901261b85922dd46bc54d669d6469a6"
"signature": "sha256:471ecc2290c2a84d75008cf33cc7db2b8c74f4bea96be0f180e58bedfabceaa8"
},
"nbformat": 3,
"nbformat_minor": 0,
......@@ -833,6 +833,37 @@
],
"prompt_number": 26
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from autoslug import AutoSlugField"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 33
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"AutoSlugField()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 34,
"text": [
"<autoslug.fields.AutoSlugField>"
]
}
],
"prompt_number": 34
},
{
"cell_type": "code",
"collapsed": false,
......
{
"metadata": {
"name": "",
"signature": "sha256:3345ac991b0346b1dfd82386fdc2a59f39b2de9bf32d03ddfbeb565927cfe7ab"
"signature": "sha256:8c764ebc660400cc2f2dddafacfdb7082971d16cb2b75bac1470575d33428427"
},
"nbformat": 3,
"nbformat_minor": 0,
......@@ -31,7 +31,7 @@
"# try:\n",
"# Language(iso2=lang.alpha2, iso3=lang.terminology, fullname=lang.name, implemented=1).save()\n",
"# except:\n",
"# pass\n"
"# pass"
],
"language": "python",
"metadata": {},
......@@ -41,17 +41,32 @@
{
"cell_type": "code",
"collapsed": false,
"input": [],
"input": [
"corpus = Node.objects.get(name=\"OneMoreLife PubMed\")\n",
"print(corpus.resource.all())"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[<Resource: PubMed => corpora/alexandre/test_pkqLVdy.zip>]\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"node = Node.objects.get(name=\"PubMed corpus\")"
"from node.models import Project\n",
"from django.contrib.auth.models import User\n",
"user = User.objects.get(username=\"alexandre\")\n",
"project_type = NodeType.objects.get(name=\"Project\")\n",
"Project(user=user, type=project_type, name=\"Abeilles\").save()"
],
"language": "python",
"metadata": {},
......@@ -62,18 +77,80 @@
"cell_type": "code",
"collapsed": false,
"input": [
"fileparser = PubmedFileParser.PubmedFileParser(file='/var/www/gargantext/media/' + node.fichier.name)"
"node = Node.objects.filter(name=\"Abeilles\", user=user)[0]\n",
"print(node.pk)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"24\n"
]
}
],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"r = node.children.get(name=\"Pubmed\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 40
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"r = r.resource.all()[0]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 41
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"r.file"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 42,
"text": [
"<FieldFile: corpora/alexandre/pubmed_BwIXSzN.zip>"
]
}
],
"prompt_number": 42
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fileparser = PubmedFileParser.PubmedFileParser(file='/var/www/gargantext/media/' + str(r.file))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
"prompt_number": 43
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fileparser.parse(node)\n"
"fileparser.parse(node)"
],
"language": "python",
"metadata": {},
......@@ -179,7 +256,7 @@
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"prompt_number": 44,
"text": [
"[<Node: Systemic spread and propagation of a plant-pathogenic virus in European honeybees, Apis mellifera.>,\n",
" <Node: A Causal Analysis of Observed Declines in Managed Honey Bees (Apis mellifera).>,\n",
......@@ -265,7 +342,7 @@
]
}
],
"prompt_number": 5
"prompt_number": 44
},
{
"cell_type": "code",
......@@ -394,10 +471,156 @@
{
"cell_type": "code",
"collapsed": false,
"input": [],
"input": [
"Project.objects.all().delete()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 18
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from node.models import Corpus, Project\n",
"Project.objects.all()\n",
"Corpus.objects.all()"
],
"language": "python",
"metadata": {},
"outputs": []
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
"[<Corpus: Abeilles>]"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node.objects.filter(user=user, type=project_type)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
"[<Node: Abeilles>]"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"destruction = Node.objects.filter(id=1038).all()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 20
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"destruction.delete()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 21
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"destruction"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 22,
"text": [
"[]"
]
}
],
"prompt_number": 22
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"destruction.children.all().delete()\n",
"destruction.delete"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 14,
"text": [
"<bound method Node.delete of <Node: Encore un >>"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"help(destruction.delete)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Help on method delete in module cte_tree.models:\n",
"\n",
"delete(method=None, position=None, save=True) method of node.models.Node instance\n",
" Prepares the tree for deletion according to the deletion semantics\n",
" specified for the :class:`CTENode` Model, and then delegates to the\n",
" :class:`CTENode` superclass ``delete`` method.\n",
" \n",
" Default deletion `method` and `position` callable can be overridden\n",
" by being supplied as arguments to this method.\n",
" \n",
" :param method: optionally a particular deletion method, overriding\n",
" the default method specified for this model.\n",
" \n",
" :param position: optional callable to invoke prior to each move\n",
" operation, should the delete method require any moves.\n",
" \n",
" :param save: optional flag indicating whether this model's\n",
" :meth:`save` method should be invoked after each move operation,\n",
" should the delete method require any moves.\n",
"\n"
]
}
],
"prompt_number": 15
}
],
"metadata": {}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment