Commit 6c4a607c authored by Mathieu Rodic's avatar Mathieu Rodic

Merge branch 'mat' of ssh://delanoe.org:1979/gargantext

parents 75a84f95 2ca5116a
{
"metadata": {
"name": "",
"signature": "sha256:7c80ed9f4b088e13444efb451a1ee46e5727247be14aaf30ddf0236a49ac461b"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": []
}
\ No newline at end of file
{
"metadata": {
"name": "",
"signature": "sha256:490e1bc5ac44087c1b3f82ca74e40f42f49bd3910f79a088af19c708d73c63e0"
"signature": "sha256:a5146fbde2b6bf2e3ed4e2bdddfb62662f99272f26e82bf86110680ff3595332"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": []
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from node.models import Node, NodeType, Language\n",
"import parsing\n",
"from parsing.FileParsers import *"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"node = Node.objects.get(name=\"PubMed corpus\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fileparser = PubmedFileParser.PubmedFileParser(file='/var/www/gargantext/media/' + node.fichier.name)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fileparser.parse(node)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "Cannot assign \"24\": \"Node.user\" must be a \"User\" instance.",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-4-8c1443001599>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mfileparser\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32m/srv/gargantext/parsing/FileParsers/PubmedFileParser.py\u001b[0m in \u001b[0;36mparse\u001b[1;34m(self, parentNode, tag)\u001b[0m\n\u001b[0;32m 45\u001b[0m \u001b[0mlanguage\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_languages_iso3\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mmetadata\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"language_iso3\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlower\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 46\u001b[0m \u001b[0mmetadata\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmetadata\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 47\u001b[1;33m \u001b[0mguid\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmetadata\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"doi\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 48\u001b[0m )\n\u001b[0;32m 49\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdocument\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/srv/gargantext/parsing/FileParsers/FileParser.py\u001b[0m in \u001b[0;36mcreate_document\u001b[1;34m(self, parentNode, title, contents, language, metadata, guid)\u001b[0m\n\u001b[0;32m 100\u001b[0m \u001b[0mmetadata\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmetadata\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 101\u001b[0m \u001b[1;31m#resource = resource,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 102\u001b[1;33m \u001b[0mparent\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mparentNode\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 103\u001b[0m )\n\u001b[0;32m 104\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/models/base.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 403\u001b[0m \u001b[1;31m# \"user_id\") so that the object gets properly cached (and type\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 404\u001b[0m \u001b[1;31m# checked) by the RelatedObjectDescriptor.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 405\u001b[1;33m \u001b[0msetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfield\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrel_obj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 406\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 407\u001b[0m \u001b[0msetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfield\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mattname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mval\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/alexandre/projets/gargantext.py/env/lib/python3.4/site-packages/django/db/models/fields/related.py\u001b[0m in \u001b[0;36m__set__\u001b[1;34m(self, instance, value)\u001b[0m\n\u001b[0;32m 337\u001b[0m raise ValueError('Cannot assign \"%r\": \"%s.%s\" must be a \"%s\" instance.' %\n\u001b[0;32m 338\u001b[0m (value, instance._meta.object_name,\n\u001b[1;32m--> 339\u001b[1;33m self.field.name, self.field.rel.to._meta.object_name))\n\u001b[0m\u001b[0;32m 340\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0mvalue\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 341\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0minstance\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_state\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdb\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mValueError\u001b[0m: Cannot assign \"24\": \"Node.user\" must be a \"User\" instance."
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"node.children.all()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
\ No newline at end of file
{
"metadata": {
"name": "",
"signature": "sha256:e0c3b2efe7c205a29dc4e028b10ffb7b9d0569f35c4b426febdf523069abffdb"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from pprint import pprint\n",
"from node.models import Node, NodeType, Language, Ngram\n",
"from django.contrib.auth.models import User\n",
"import parsing\n",
"from parsing.FileParsers import *"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Define user\n",
"try:\n",
" user = User.objects.get(username='Mat')\n",
"except:\n",
" user = User(username='Mat', password='0123', email='mathieu@rodic.fr')\n",
" user.save()\n",
"\n",
"# Define document types\n",
"nodetypes = {}\n",
"for name in ['Corpus', 'Document']:\n",
" try:\n",
" nodetypes[name] = NodeType.objects.get(name=name)\n",
" except:\n",
" nodetypes[name] = NodeType(name=name)\n",
" nodetypes[name].save()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node.objects.all().delete()\n",
"corpus = Node(name='PubMed corpus', user=user, type=nodetypes['Corpus'])\n",
"corpus.save()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fileparser = PubmedFileParser.PubmedFileParser(file='/home/mat/projects/gargantext/data_samples/pubmed.zip')"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fileparser.parse(corpus)\n",
"print('Ok!')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
}
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for node_ngram in corpus.children.first().node_ngram_set.all():\n",
" print(node_ngram.ngram.terms)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
\ No newline at end of file
{
"metadata": {
"name": "",
"signature": "sha256:3345ac991b0346b1dfd82386fdc2a59f39b2de9bf32d03ddfbeb565927cfe7ab"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from node.models import Node, NodeType, Language\n",
"import parsing\n",
"from parsing.FileParsers import *"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#from node.models import Language\n",
"#import pycountry\n",
"#for lang in pycountry.languages:\n",
"# try:\n",
"# Language(iso2=lang.alpha2, iso3=lang.terminology, fullname=lang.name, implemented=1).save()\n",
"# except:\n",
"# pass\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"node = Node.objects.get(name=\"PubMed corpus\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fileparser = PubmedFileParser.PubmedFileParser(file='/var/www/gargantext/media/' + node.fichier.name)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fileparser.parse(node)\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
"[<Node: Systemic spread and propagation of a plant-pathogenic virus in European honeybees, Apis mellifera.>,\n",
" <Node: A Causal Analysis of Observed Declines in Managed Honey Bees (Apis mellifera).>,\n",
" <Node: Non-specific dsRNA-mediated antiviral response in the honey bee.>,\n",
" <Node: In vitro infection of pupae with Israeli acute paralysis virus suggests disturbance of transcriptional homeostasis in honey bees (Apis mellifera).>,\n",
" <Node: Nosema ceranae has been present in Brazil for more than three decades infecting Africanized honey bees.>,\n",
" <Node: Return of the natives.>,\n",
" <Node: The road to pollinator health.>,\n",
" <Node: Do the honeybee pathogens Nosema ceranae and deformed wing virus act synergistically?>,\n",
" <Node: Essential oil from Eupatorium buniifolium leaves as potential varroacide.>,\n",
" <Node: Animal behaviour: brain food.>,\n",
" <Node: What's the buzz?>,\n",
" <Node: Nosema ceranae induced mortality in honey bees (Apis mellifera) depends on infection methods.>,\n",
" <Node: Rates of honeybee sting hypersensitivity in San Antonio during honeybee colony collapse disorder.>,\n",
" <Node: Healing power of honey.>,\n",
" <Node: Honey constituents up-regulate detoxification and immunity genes in the western honey bee Apis mellifera.>,\n",
" <Node: Nosema spp. infection and its negative effects on honey bees (Apis mellifera iberiensis) at the colony level.>,\n",
" <Node: Flight behavior and pheromone changes associated to Nosema ceranae infection of honey bee workers (Apis mellifera) in field conditions.>,\n",
" <Node: Clinical signs of deformed wing virus infection are predictive markers for honey bee colony losses.>,\n",
" <Node: The microsporidian parasites Nosema ceranae and Nosema apis are widespread in honeybee (Apis mellifera) colonies across Scotland.>,\n",
" <Node: A potential link among biogenic amines-based pesticides, learning and memory, and colony collapse disorder: a unique hypothesis.>,\n",
" <Node: Comment on \"A common pesticide decreases foraging success and survival in honey bees\".>,\n",
" <Node: Idiopathic brood disease syndrome and queen events as precursors of colony mortality in migratory beekeeping operations in the eastern United States.>,\n",
" <Node: Pathogen webs in collapsing honey bee colonies.>,\n",
" <Node: Asymptomatic presence of Nosema spp. in Spanish commercial apiaries.>,\n",
" <Node: Synergistic parasite-pathogen interactions mediated by host immunity can drive the collapse of honeybee colonies.>,\n",
" <Node: Global honey bee viral landscape altered by a parasitic mite.>,\n",
" <Node: Paratransgenesis: an approach to improve colony health and molecular insight in honey bees (Apis mellifera)?>,\n",
" <Node: Agriculture. Field research on bees raises concern about low-dose pesticides.>,\n",
" <Node: A common pesticide decreases foraging success and survival in honey bees.>,\n",
" <Node: Symbionts as major modulators of insect health: lactic acid bacteria and honeybees.>,\n",
" <Node: The habitat disruption induces immune-suppression and oxidative stress in honey bees.>,\n",
" <Node: Predictive markers of honey bee colony collapse.>,\n",
" <Node: Colony collapse disorder in Europe.>,\n",
" <Node: Pesticide exposure in honey bees results in increased levels of the gut pathogen Nosema.>,\n",
" <Node: Bromenshenk et al (PLoS One, 2011, 5(10):e13181) have claimed to have found peptides from an invertebrate iridovirus in bees.>,\n",
" <Node: A new threat to honey bees, the parasitic phorid fly Apocephalus borealis.>,\n",
" <Node: Detection of pesticides in active and depopulated beehives in Uruguay.>,\n",
" <Node: Bees brought to their knees: microbes affecting honey bee health.>,\n",
" <Node: From elephants to bees.>,\n",
" <Node: Evidence of a novel immune responsive protein in the Hymenoptera.>,\n",
" <Node: Lack of evidence for an association between Iridovirus and colony collapse disorder.>,\n",
" <Node: First report of Israeli acute paralysis virus in asymptomatic hives of Argentina.>,\n",
" <Node: Temporal analysis of the honey bee microbiome reveals four novel viruses and seasonal prevalence of known viruses, Nosema, and Crithidia.>,\n",
" <Node: A quantitative model of honey bee colony population dynamics.>,\n",
" <Node: Detection of honey bee (Apis mellifera) viruses with an oligonucleotide microarray.>,\n",
" <Node: Interpretation of data underlying the link between colony collapse disorder (CCD) and an invertebrate iridescent virus.>,\n",
" <Node: RNA viruses in hymenopteran pollinators: evidence of inter-Taxa virus transmission via pollen and potential impact on non-Apis hymenopteran species.>,\n",
" <Node: Large-scale field application of RNAi technology reducing Israeli acute paralysis virus disease in honey bees (Apis mellifera, Hymenoptera: Apidae).>,\n",
" <Node: Weighing risk factors associated with bee colony collapse disorder by classification and regression tree analysis.>,\n",
" <Node: Iridovirus and microsporidian linked to honey bee colony decline.>,\n",
" <Node: Varroa destructor is an effective vector of Israeli acute paralysis virus in the honeybee, Apis mellifera.>,\n",
" <Node: The plight of the bees.>,\n",
" <Node: Sudden deaths and colony population decline in Greek honey bee colonies.>,\n",
" <Node: Colony Collapse Disorder in context.>,\n",
" <Node: Vanishing honey bees: Is the dying of adult worker bees a consequence of short telomeres and premature aging?>,\n",
" <Node: Prevention of Chinese sacbrood virus infection in Apis cerana using RNA interference.>,\n",
" <Node: Refined methodology for the determination of neonicotinoid pesticides and their metabolites in honey bees and bee products by liquid chromatography-tandem mass spectrometry (LC-MS/MS).>,\n",
" <Node: Ecology. Clarity on honey bee collapse?>,\n",
" <Node: Medium for development of bee cell cultures (Apis mellifera: Hymenoptera: Apidae).>,\n",
" <Node: Bee mystery continues.>,\n",
" <Node: Deformed wing virus.>,\n",
" <Node: The Acute bee paralysis virus-Kashmir bee virus-Israeli acute paralysis virus complex.>,\n",
" <Node: Translocation of neonicotinoid insecticides from coated seeds to seedling guttation drops: a novel way of intoxication for bees.>,\n",
" <Node: Deformed wing virus implicated in overwintering honeybee colony losses.>,\n",
" <Node: Changes in transcript abundance relating to colony collapse disorder in honey bees (Apis mellifera).>,\n",
" <Node: Colony collapse disorder: a descriptive study.>,\n",
" <Node: A PCR method of detecting American Foulbrood (Paenibacillus larvae) in winter beehive wax debris.>,\n",
" <Node: Honeybee colony collapse due to Nosema ceranae in professional apiaries.>,\n",
" <Node: IAPV, a bee-affecting virus associated with Colony Collapse Disorder can be silenced by dsRNA ingestion.>,\n",
" <Node: Energetic stress in the honeybee Apis mellifera from Nosema ceranae infection.>,\n",
" <Node: A survey of honey bee colony losses in the U.S., fall 2007 to spring 2008.>,\n",
" <Node: A qualitative model of mortality in honey bee (Apis mellifera) colonies infested with tracheal mites (Acarapis woodi).>,\n",
" <Node: First detection of Israeli acute paralysis virus (IAPV) in France, a dicistrovirus affecting honeybees (Apis mellifera).>,\n",
" <Node: Does pathogen spillover from commercially reared bumble bees threaten wild pollinators?>,\n",
" <Node: How natural infection by Nosema ceranae causes honeybee colony collapse.>,\n",
" <Node: Genetic analysis of Israel acute paralysis virus: distinct clusters are circulating in the United States.>,\n",
" <Node: The latest buzz about colony collapse disorder.>,\n",
" <Node: Native bees provide insurance against ongoing honey bee losses.>,\n",
" <Node: A metagenomic survey of microbes in honey bee colony collapse disorder.>,\n",
" <Node: Vertical-transmission routes for deformed wing virus of honeybees (Apis mellifera).>,\n",
" <Node: RT-PCR analysis of Deformed wing virus in honeybees (Apis mellifera) and mites (Varroa destructor).>]"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from node.models import Ngram"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" Ng"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"n = node.children.first()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for node_ngram in n.node_ngram_set.all():\n",
" print(node_ngram.ngram.terms)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n"
]
}
],
"prompt_number": 26
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
\ No newline at end of file
......@@ -23,9 +23,9 @@ PROJECT_PATH = os.path.abspath(PROJECT_PATH)
SECRET_KEY = 'bt)3n9v&a02cu7^^=+u_t2tmn8ex5fvx8$x4r*j*pb1yawd+rz'
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = False
DEBUG = True
TEMPLATE_DEBUG = False
TEMPLATE_DEBUG = True
TEMPLATE_DIRS = (
......
Install the requirements
------------------------
1) Install all the Debian packages listed in dependances.deb
(also: sudo apt-get install postgresql-contrib)
2) Create a virtual enironnement with pyvenv: apt-get install python-virtualenv
3) Type: source [your virtual environment directory]/bin/activate
4) Do your work!
5) Type: deactivate
Configure stuff
---------------
1) ln -s [the project folder] /srv/gargantext
2) ln -s [your folder for tree tagger] [the project folder]/parsing/Tagger/treetagger
Warning: for ln, path has to be absolute!
In PostreSQL
-------------
1) Ensure postgres is started: sudo /etc/init.d/postgresql start
2) sudo su postgres
3) psql
4) CREATE USER alexandre WITH PASSWORD 'C8kdcUrAQy66U';
(see gargantext_web/settings.py, DATABASES = { ... })
5) CREATE DATABASE gargandb WITH OWNER alexandre;
6) Ctrl + D
7) psql gargandb
6) CREATE EXTENSION hstore;
7) Ctrl + D
Populate the database
---------------------
python manage.py syncdb
Start the Python Notebook server
--------------------------------
1) In Pyvenv: python manage.py shell_plus --notebook
2) Work from your browser!
Start the Django server
-----------------------
python manage.py runserver
\ No newline at end of file
......@@ -2,7 +2,6 @@
psql -d gargandb -f init.sql
sleep 2
./manage.py syncdb
......
import collections
from node.models import Node, NodeType, Language, Ngram, Node_Ngram
from parsing.NgramsExtractors import *
# This allows the fast retrieval of ngram ids
# from the cache instead of using the database for every call
class NgramCache:
"""
This allows the fast retrieval of ngram ids
from the cache instead of using the database for every call
"""
def __init__(self, language):
self._cache = dict()
......@@ -13,9 +16,9 @@ class NgramCache:
terms = terms.strip().lower()
if terms not in self._cache:
try:
ngram = NGram.get(terms=terms, language=self._language)
ngram = Ngram.get(terms=terms, language=self._language)
except:
ngram = NGram(terms=terms, n=len(terms), language=self._language)
ngram = Ngram(terms=terms, n=len(terms), language=self._language)
ngram.save()
self._cache[terms] = ngram
return self._cache[terms]
......@@ -43,12 +46,11 @@ class FileParser:
self._ngramcaches = NgramCaches()
# extractors
self._extractors = dict()
self._document_nodetype = NodeType.get(name='Document')
with Language.objects.all() as languages:
self._languages_iso2 = {language.iso2.lower(): language for language in Language}
self._languages_iso3 = {language.iso3.lower(): language for language in Language}
# ...and parse!
self.parse()
self._document_nodetype = NodeType.objects.get(name='Document')
languages = Language.objects.all()
self._languages_iso2 = {language.iso2.lower(): language for language in languages}
self._languages_iso3 = {language.iso3.lower(): language for language in languages}
#self.parse()
"""Extract the ngrams from a given text.
"""
......@@ -65,45 +67,54 @@ class FileParser:
extractor = self._extractors[language]
# Extract the ngrams
if extractor:
tokens = []
for ngram in extractor.extract_ngrams(text):
ngram_text = ' '.join([token for token, tag in ngram])
tokens.append(ngram_text)
return collections.Counter(
[token for token, tag in extractor.extract_ngrams(text)]
# [token for token, tag in extractor.extract_ngrams(text)]
tokens
)
else:
return dict()
#TODO
# * make it possible to tag and parse separately
# * only tags some data (only titles, titles & abstracts, some chapters...)
"""Add a document to the database.
"""
def create_document(self, parentNode, title, contents, language, metadata, guid=None):
# create or retrieve a resource for that document, based on its user id
if guid is None:
resource = Resource(guid=guid)
else:
try:
resource = Resource.get(guid=guid)
except:
resource = Resource(guid=guid)
# If the parent node already has a child with this resource, pass
# (is it a good thing?)
if parentNode.descendants().filter(resource=resource).exists():
return None
# if guid is None:
# resource = Resource(guid=guid)
# else:
# try:
# resource = Resource.get(guid=guid)
# except:
# resource = Resource(guid=guid)
# # If the parent node already has a child with this resource, pass
# # (is it a good thing?)
# if parentNode.descendants().filter(resource=resource).exists():
# return None
# create the document itself
childNode = Node(
user = parentNode.pk,
user = parentNode.user,
type = self._document_nodetype,
name = title,
language = language,
metadata = metadata,
resource = resource,
#resource = resource,
parent = parentNode
)
childNode.save()
# parse it!
ngrams = self.extract_ngrams(contents, language)
# we are already in a transaction, so no use doing another one (or is there?)
ngramcache = self._ngramcaches[language]
for terms, occurences in ngrams.items():
ngram_text = ' '.join([term[0] for term in terms])
ngram = ngramcache[ngram_text]
ngram = ngramcache[terms]
Node_Ngram(
node = childNode,
ngram = ngram,
......@@ -111,7 +122,7 @@ class FileParser:
).save()
# return the created document
return document
return childNode
"""Useful method to detect the document encoding.
Not sure it should be here actually.
......
from django.db import transaction
from lxml import etree
from parsing.FileParsers.FileParser import FileParser
from parsing.NgramsExtractors import *
import zipfile
import datetime
class PubmedFileParser(FileParser):
def parse(self, parentNode):
def parse(self, parentNode, tag=True):
# open the file as XML
xml_parser = etree.XMLParser(resolve_entities=False, recover=True)
xml = etree.parse(self._file, parser=xml_parser)
documents = []
with transaction.atomic():
with zipfile.ZipFile(self._file) as zipFile:
for filename in zipFile.namelist():
file = zipFile.open(filename, "r")
# print(file.read())
xml = etree.parse(file, parser=xml_parser)
# parse all the articles, one by one
# all database operations should be performed within one transaction
xml_articles = xml.findall('PubmedArticle')
documents = []
with transaction.atomic():
for xml_article in xml_articles:
# extract data from the document
date_year = int(xml_article.find('MedlineCitation/DateCreated/Year').text)
date_month = int(xml_article.find('MedlineCitation/DateCreated/Month').text)
date_day = int(xml_article.find('MedlineCitation/DateCreated/Day').text)
metadata = {
# other metadata should also be included:
# authors, submission date, etc.
"date_pub": datetime.date(year, month, day),
"journal": xml_article.find('MedlineCitation/Article/Journal/Title').text,
"title": xml_article.find('MedlineCitation/Article/ArticleTitle').text,
"language_iso3": xml_article.find('MedlineCitation/Article/Language').text,
"doi": xml_article.find('PubmedData/ArticleIdList/ArticleId[type=doi]').text
"date_pub": '%s-%s-%s' % (date_year, date_month, date_day),
}
contents = xml_article.find('MedlineCitation/Article/Abstract/AbstractText').text
metadata_path = {
"journal" : 'MedlineCitation/Article/Journal/Title',
"title" : 'MedlineCitation/Article/ArticleTitle',
"language_iso3" : 'MedlineCitation/Article/Language',
"doi" : 'PubmedData/ArticleIdList/ArticleId[type=doi]',
"abstract" : 'MedlineCitation/Article/Abstract/AbstractText'
}
for key, path in metadata_path.items():
try:
node = xml_article.find(path)
metadata[key] = node.text
except:
metadata[key] = ""
contents = metadata["abstract"]
# create the document in the database
yield self.create_document(
parentNode = parentNode,
document = self.create_document(
parentNode = parentNode,
title = metadata["title"],
contents = contents,
language = self._languages_iso3[metadata["language"].lower()],
language = self._languages_iso3[metadata["language_iso3"].lower()],
metadata = metadata,
guid = metadata["doi"],
#guid = metadata["doi"],
)
if document:
documents.append(document)
......
from NgramsExtractors.NgramsExtractor import NgramsExtractor
from Taggers import NltkTagger
from parsing.NgramsExtractors.NgramsExtractor import NgramsExtractor
from parsing.Taggers import NltkTagger
class EnglishNgramsExtractor(NgramsExtractor):
......
from NgramsExtractors.NgramsExtractor import NgramsExtractor
from Taggers import TreeTagger
from parsing.NgramsExtractors.NgramsExtractor import NgramsExtractor
from parsing.Taggers import TreeTagger
class FrenchNgramsExtractor(NgramsExtractor):
......
from Taggers import Tagger
from parsing.Taggers import Tagger
import nltk
......@@ -17,9 +17,8 @@ class NgramsExtractor:
def __del__(self):
self.stop()
def start(self):
self.tagger = Tagger
self.tagger = Tagger()
def stop(self):
pass
......@@ -40,7 +39,7 @@ class NgramsExtractor:
except:
print("Problem while parsing rule '%s'" % (self._rule, ))
pass
return iter(result)
return result
from NgramsExtractors.FrenchNgramsExtractor import FrenchNgramsExtractor
from NgramsExtractors.EnglishNgramsExtractor import EnglishNgramsExtractor
\ No newline at end of file
#from NgramsExtractors.FrenchNgramsExtractor import FrenchNgramsExtractor
#from NgramsExtractors.EnglishNgramsExtractor import EnglishNgramsExtractor
from parsing.NgramsExtractors.FrenchNgramsExtractor import FrenchNgramsExtractor
from parsing.NgramsExtractors.EnglishNgramsExtractor import EnglishNgramsExtractor
from Taggers.Tagger import Tagger
from parsing.Taggers.Tagger import Tagger
import nltk
......
from Taggers.Tagger import Tagger
from parsing.Taggers.Tagger import Tagger
import subprocess
import threading
......
from Taggers.NltkTagger import NltkTagger
from Taggers.TreeTagger import TreeTagger
from parsing.Taggers.NltkTagger import NltkTagger
from parsing.Taggers.TreeTagger import TreeTagger
......@@ -2,21 +2,21 @@ from NgramsExtractors import *
from Taggers import *
#texts = [
# "This is quite a simple test.",
# "Forman Brown (1901–1996) was one of the world's leaders in puppet theatre in his day, as well as an important early gay novelist. He was a member of the Yale Puppeteers and the driving force behind Turnabout Theatre. He was born in Otsego, Michigan, in 1901 and died in 1996, two days after his 95th birthday. Brown briefly taught at North Carolina State College, followed by an extensive tour of Europe.",
# "James Patrick (born c. 1940) is the pseudonym of a Scottish sociologist, which he used to publish a book A Glasgow Gang Observed. It attracted some attention in Scotland when it was published in 1973. It was based on research he had done in 1966, when he was aged 26. At that time he was working as a teacher in an Approved School, a Scottish reformatory. One gang member in the school, \"Tim Malloy\" (born 1950, also a pseudonym and a generic term for a Glasgow Catholic), agreed to infiltrate him into his gang in Maryhill in Glasgow. Patrick spent four months as a gang member, observing their behaviour.",
#]
#tagger = NltkTagger()
#extractor = EnglishNgramsExtractor()
#
texts = [
"This is quite a simple test.",
"Forman Brown (1901–1996) was one of the world's leaders in puppet theatre in his day, as well as an important early gay novelist. He was a member of the Yale Puppeteers and the driving force behind Turnabout Theatre. He was born in Otsego, Michigan, in 1901 and died in 1996, two days after his 95th birthday. Brown briefly taught at North Carolina State College, followed by an extensive tour of Europe.",
"James Patrick (born c. 1940) is the pseudonym of a Scottish sociologist, which he used to publish a book A Glasgow Gang Observed. It attracted some attention in Scotland when it was published in 1973. It was based on research he had done in 1966, when he was aged 26. At that time he was working as a teacher in an Approved School, a Scottish reformatory. One gang member in the school, \"Tim Malloy\" (born 1950, also a pseudonym and a generic term for a Glasgow Catholic), agreed to infiltrate him into his gang in Maryhill in Glasgow. Patrick spent four months as a gang member, observing their behaviour.",
"La saison 1921-1922 du Foot-Ball Club Juventus est la vingtième de l'histoire du club, créé vingt-cinq ans plus tôt en 1897. La société turinoise qui fête cette année son 25e anniversaire prend part à l'édition du championnat dissident d'Italie de la CCI (appelé alors la Première division), la dernière édition d'une compétition annuelle de football avant l'ère fasciste de Mussolini.",
"Le terme oblong désigne une forme qui est plus longue que large et dont les angles sont arrondis. En langage bibliographique, oblong signifie un format dont la largeur excède la hauteur. Ce qui correspond au format paysage en termes informatiques et \"à l'italienne\", pour l'imprimerie.",
"Les sanglots longs des violons de l'automne bercent mon coeur d'une langueur monotone.",
]
tagger = NltkTagger()
extractor = EnglishNgramsExtractor()
# texts = [
# "La saison 1921-1922 du Foot-Ball Club Juventus est la vingtième de l'histoire du club, créé vingt-cinq ans plus tôt en 1897. La société turinoise qui fête cette année son 25e anniversaire prend part à l'édition du championnat dissident d'Italie de la CCI (appelé alors la Première division), la dernière édition d'une compétition annuelle de football avant l'ère fasciste de Mussolini.",
# "Le terme oblong désigne une forme qui est plus longue que large et dont les angles sont arrondis. En langage bibliographique, oblong signifie un format dont la largeur excède la hauteur. Ce qui correspond au format paysage en termes informatiques et \"à l'italienne\", pour l'imprimerie.",
# "Les sanglots longs des violons de l'automne bercent mon coeur d'une langueur monotone.",
# ]
# tagger = TreeTagger()
# extractor = FrenchNgramsExtractor()
tagger = TreeTagger()
extractor = FrenchNgramsExtractor()
for text in texts:
......
......@@ -6,4 +6,4 @@ node = Node.objects.get(name="PubMed corpus")
parser = parsing.Parser()
parser.parse_node_fichier(node)
#parser.parse_node_fichier(node)
{
"metadata": {
"name": "",
"signature": "sha256:3345ac991b0346b1dfd82386fdc2a59f39b2de9bf32d03ddfbeb565927cfe7ab"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from node.models import Node, NodeType, Language\n",
"import parsing\n",
"from parsing.FileParsers import *"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#from node.models import Language\n",
"#import pycountry\n",
"#for lang in pycountry.languages:\n",
"# try:\n",
"# Language(iso2=lang.alpha2, iso3=lang.terminology, fullname=lang.name, implemented=1).save()\n",
"# except:\n",
"# pass\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"node = Node.objects.get(name=\"PubMed corpus\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fileparser = PubmedFileParser.PubmedFileParser(file='/var/www/gargantext/media/' + node.fichier.name)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fileparser.parse(node)\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
"[<Node: Systemic spread and propagation of a plant-pathogenic virus in European honeybees, Apis mellifera.>,\n",
" <Node: A Causal Analysis of Observed Declines in Managed Honey Bees (Apis mellifera).>,\n",
" <Node: Non-specific dsRNA-mediated antiviral response in the honey bee.>,\n",
" <Node: In vitro infection of pupae with Israeli acute paralysis virus suggests disturbance of transcriptional homeostasis in honey bees (Apis mellifera).>,\n",
" <Node: Nosema ceranae has been present in Brazil for more than three decades infecting Africanized honey bees.>,\n",
" <Node: Return of the natives.>,\n",
" <Node: The road to pollinator health.>,\n",
" <Node: Do the honeybee pathogens Nosema ceranae and deformed wing virus act synergistically?>,\n",
" <Node: Essential oil from Eupatorium buniifolium leaves as potential varroacide.>,\n",
" <Node: Animal behaviour: brain food.>,\n",
" <Node: What's the buzz?>,\n",
" <Node: Nosema ceranae induced mortality in honey bees (Apis mellifera) depends on infection methods.>,\n",
" <Node: Rates of honeybee sting hypersensitivity in San Antonio during honeybee colony collapse disorder.>,\n",
" <Node: Healing power of honey.>,\n",
" <Node: Honey constituents up-regulate detoxification and immunity genes in the western honey bee Apis mellifera.>,\n",
" <Node: Nosema spp. infection and its negative effects on honey bees (Apis mellifera iberiensis) at the colony level.>,\n",
" <Node: Flight behavior and pheromone changes associated to Nosema ceranae infection of honey bee workers (Apis mellifera) in field conditions.>,\n",
" <Node: Clinical signs of deformed wing virus infection are predictive markers for honey bee colony losses.>,\n",
" <Node: The microsporidian parasites Nosema ceranae and Nosema apis are widespread in honeybee (Apis mellifera) colonies across Scotland.>,\n",
" <Node: A potential link among biogenic amines-based pesticides, learning and memory, and colony collapse disorder: a unique hypothesis.>,\n",
" <Node: Comment on \"A common pesticide decreases foraging success and survival in honey bees\".>,\n",
" <Node: Idiopathic brood disease syndrome and queen events as precursors of colony mortality in migratory beekeeping operations in the eastern United States.>,\n",
" <Node: Pathogen webs in collapsing honey bee colonies.>,\n",
" <Node: Asymptomatic presence of Nosema spp. in Spanish commercial apiaries.>,\n",
" <Node: Synergistic parasite-pathogen interactions mediated by host immunity can drive the collapse of honeybee colonies.>,\n",
" <Node: Global honey bee viral landscape altered by a parasitic mite.>,\n",
" <Node: Paratransgenesis: an approach to improve colony health and molecular insight in honey bees (Apis mellifera)?>,\n",
" <Node: Agriculture. Field research on bees raises concern about low-dose pesticides.>,\n",
" <Node: A common pesticide decreases foraging success and survival in honey bees.>,\n",
" <Node: Symbionts as major modulators of insect health: lactic acid bacteria and honeybees.>,\n",
" <Node: The habitat disruption induces immune-suppression and oxidative stress in honey bees.>,\n",
" <Node: Predictive markers of honey bee colony collapse.>,\n",
" <Node: Colony collapse disorder in Europe.>,\n",
" <Node: Pesticide exposure in honey bees results in increased levels of the gut pathogen Nosema.>,\n",
" <Node: Bromenshenk et al (PLoS One, 2011, 5(10):e13181) have claimed to have found peptides from an invertebrate iridovirus in bees.>,\n",
" <Node: A new threat to honey bees, the parasitic phorid fly Apocephalus borealis.>,\n",
" <Node: Detection of pesticides in active and depopulated beehives in Uruguay.>,\n",
" <Node: Bees brought to their knees: microbes affecting honey bee health.>,\n",
" <Node: From elephants to bees.>,\n",
" <Node: Evidence of a novel immune responsive protein in the Hymenoptera.>,\n",
" <Node: Lack of evidence for an association between Iridovirus and colony collapse disorder.>,\n",
" <Node: First report of Israeli acute paralysis virus in asymptomatic hives of Argentina.>,\n",
" <Node: Temporal analysis of the honey bee microbiome reveals four novel viruses and seasonal prevalence of known viruses, Nosema, and Crithidia.>,\n",
" <Node: A quantitative model of honey bee colony population dynamics.>,\n",
" <Node: Detection of honey bee (Apis mellifera) viruses with an oligonucleotide microarray.>,\n",
" <Node: Interpretation of data underlying the link between colony collapse disorder (CCD) and an invertebrate iridescent virus.>,\n",
" <Node: RNA viruses in hymenopteran pollinators: evidence of inter-Taxa virus transmission via pollen and potential impact on non-Apis hymenopteran species.>,\n",
" <Node: Large-scale field application of RNAi technology reducing Israeli acute paralysis virus disease in honey bees (Apis mellifera, Hymenoptera: Apidae).>,\n",
" <Node: Weighing risk factors associated with bee colony collapse disorder by classification and regression tree analysis.>,\n",
" <Node: Iridovirus and microsporidian linked to honey bee colony decline.>,\n",
" <Node: Varroa destructor is an effective vector of Israeli acute paralysis virus in the honeybee, Apis mellifera.>,\n",
" <Node: The plight of the bees.>,\n",
" <Node: Sudden deaths and colony population decline in Greek honey bee colonies.>,\n",
" <Node: Colony Collapse Disorder in context.>,\n",
" <Node: Vanishing honey bees: Is the dying of adult worker bees a consequence of short telomeres and premature aging?>,\n",
" <Node: Prevention of Chinese sacbrood virus infection in Apis cerana using RNA interference.>,\n",
" <Node: Refined methodology for the determination of neonicotinoid pesticides and their metabolites in honey bees and bee products by liquid chromatography-tandem mass spectrometry (LC-MS/MS).>,\n",
" <Node: Ecology. Clarity on honey bee collapse?>,\n",
" <Node: Medium for development of bee cell cultures (Apis mellifera: Hymenoptera: Apidae).>,\n",
" <Node: Bee mystery continues.>,\n",
" <Node: Deformed wing virus.>,\n",
" <Node: The Acute bee paralysis virus-Kashmir bee virus-Israeli acute paralysis virus complex.>,\n",
" <Node: Translocation of neonicotinoid insecticides from coated seeds to seedling guttation drops: a novel way of intoxication for bees.>,\n",
" <Node: Deformed wing virus implicated in overwintering honeybee colony losses.>,\n",
" <Node: Changes in transcript abundance relating to colony collapse disorder in honey bees (Apis mellifera).>,\n",
" <Node: Colony collapse disorder: a descriptive study.>,\n",
" <Node: A PCR method of detecting American Foulbrood (Paenibacillus larvae) in winter beehive wax debris.>,\n",
" <Node: Honeybee colony collapse due to Nosema ceranae in professional apiaries.>,\n",
" <Node: IAPV, a bee-affecting virus associated with Colony Collapse Disorder can be silenced by dsRNA ingestion.>,\n",
" <Node: Energetic stress in the honeybee Apis mellifera from Nosema ceranae infection.>,\n",
" <Node: A survey of honey bee colony losses in the U.S., fall 2007 to spring 2008.>,\n",
" <Node: A qualitative model of mortality in honey bee (Apis mellifera) colonies infested with tracheal mites (Acarapis woodi).>,\n",
" <Node: First detection of Israeli acute paralysis virus (IAPV) in France, a dicistrovirus affecting honeybees (Apis mellifera).>,\n",
" <Node: Does pathogen spillover from commercially reared bumble bees threaten wild pollinators?>,\n",
" <Node: How natural infection by Nosema ceranae causes honeybee colony collapse.>,\n",
" <Node: Genetic analysis of Israel acute paralysis virus: distinct clusters are circulating in the United States.>,\n",
" <Node: The latest buzz about colony collapse disorder.>,\n",
" <Node: Native bees provide insurance against ongoing honey bee losses.>,\n",
" <Node: A metagenomic survey of microbes in honey bee colony collapse disorder.>,\n",
" <Node: Vertical-transmission routes for deformed wing virus of honeybees (Apis mellifera).>,\n",
" <Node: RT-PCR analysis of Deformed wing virus in honeybees (Apis mellifera) and mites (Varroa destructor).>]"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from node.models import Ngram"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" Ng"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"n = node.children.first()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for node_ngram in n.node_ngram_set.all():\n",
" print(node_ngram.ngram.terms)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n",
"Ngram object\n"
]
}
],
"prompt_number": 26
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment