Commit 3abddbc1 authored by Mathieu Rodic's avatar Mathieu Rodic

Merge branch 'master' of ssh://delanoe.org:1979/gargantext

parents a0adfd88 21417615
__pycache__/
parsing/Taggers/treetagger/
{ {
"metadata": { "metadata": {
"name": "", "name": "",
"signature": "sha256:7c80ed9f4b088e13444efb451a1ee46e5727247be14aaf30ddf0236a49ac461b" "signature": "sha256:0383da299037d14e20f4be4cd7703cfddbdf0f947ee8f93f051f2ed6b7fe0cb5"
}, },
"nbformat": 3, "nbformat": 3,
"nbformat_minor": 0, "nbformat_minor": 0,
"worksheets": [] "worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pycountry\n",
"from node.models import Language"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pycountry\n",
"\n",
"for language in pycountry.languages:\n",
" try:\n",
" implemented = 1 if language.alpha2 in ['en', 'fr'] else 0\n",
" Language(iso2=language.alpha2, iso3=language.terminology, fullname=language.name, implemented=implemented).save()\n",
" except:\n",
" pass\n",
" "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Language.objects.filter(implemented=1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"[<Language: English>, <Language: French>]"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for language in Language.objects.all():\n",
" print(language)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Afar\n",
"Abkhazian\n",
"Afrikaans\n",
"Akan\n",
"Albanian\n",
"Amharic\n",
"Arabic\n",
"Aragonese\n",
"Armenian\n",
"Assamese\n",
"Avaric\n",
"Avestan\n",
"Aymara\n",
"Azerbaijani\n",
"Bashkir\n",
"Bambara\n",
"Basque\n",
"Belarusian\n",
"Bengali\n",
"Bihari languages\n",
"Bislama\n",
"Bosnian\n",
"Breton\n",
"Bulgarian\n",
"Burmese\n",
"Catalan; Valencian\n",
"Chamorro\n",
"Chechen\n",
"Chinese\n",
"Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic\n",
"Chuvash\n",
"Cornish\n",
"Corsican\n",
"Cree\n",
"Czech\n",
"Danish\n",
"Divehi; Dhivehi; Maldivian\n",
"Dutch; Flemish\n",
"Dzongkha\n",
"English\n",
"Esperanto\n",
"Estonian\n",
"Ewe\n",
"Faroese\n",
"Fijian\n",
"Finnish\n",
"French\n",
"Western Frisian\n",
"Fulah\n",
"Georgian\n",
"German\n",
"Gaelic; Scottish Gaelic\n",
"Irish\n",
"Galician\n",
"Manx\n",
"Greek, Modern (1453-)\n",
"Guarani\n",
"Gujarati\n",
"Haitian; Haitian Creole\n",
"Hausa\n",
"Hebrew\n",
"Herero\n",
"Hindi\n",
"Hiri Motu\n",
"Croatian\n",
"Hungarian\n",
"Igbo\n",
"Icelandic\n",
"Ido\n",
"Sichuan Yi; Nuosu\n",
"Inuktitut\n",
"Interlingue; Occidental\n",
"Interlingua (International Auxiliary Language Association)\n",
"Indonesian\n",
"Inupiaq\n",
"Italian\n",
"Javanese\n",
"Japanese\n",
"Kalaallisut; Greenlandic\n",
"Kannada\n",
"Kashmiri\n",
"Kanuri\n",
"Kazakh\n",
"Central Khmer\n",
"Kikuyu; Gikuyu\n",
"Kinyarwanda\n",
"Kirghiz; Kyrgyz\n",
"Komi\n",
"Kongo\n",
"Korean\n",
"Kuanyama; Kwanyama\n",
"Kurdish\n",
"Lao\n",
"Latin\n",
"Latvian\n",
"Limburgan; Limburger; Limburgish\n",
"Lingala\n",
"Lithuanian\n",
"Luxembourgish; Letzeburgesch\n",
"Luba-Katanga\n",
"Ganda\n",
"Macedonian\n",
"Marshallese\n",
"Malayalam\n",
"Maori\n",
"Marathi\n",
"Malay\n",
"Malagasy\n",
"Maltese\n",
"Moldavian; Moldovan\n",
"Mongolian\n",
"Nauru\n",
"Navajo; Navaho\n",
"Ndebele, South; South Ndebele\n",
"Ndebele, North; North Ndebele\n",
"Ndonga\n",
"Nepali\n",
"Norwegian Nynorsk; Nynorsk, Norwegian\n",
"Bokm\u00e5l, Norwegian; Norwegian Bokm\u00e5l\n",
"Norwegian\n",
"Chichewa; Chewa; Nyanja\n",
"Occitan (post 1500)\n",
"Ojibwa\n",
"Oriya\n",
"Oromo\n",
"Ossetian; Ossetic\n",
"Panjabi; Punjabi\n",
"Persian\n",
"Pali\n",
"Polish\n",
"Portuguese\n",
"Pushto; Pashto\n",
"Quechua\n",
"Romansh\n",
"Romanian\n",
"Rundi\n",
"Russian\n",
"Sango\n",
"Sanskrit\n",
"Sinhala; Sinhalese\n",
"Slovak\n",
"Slovenian\n",
"Northern Sami\n",
"Samoan\n",
"Shona\n",
"Sindhi\n",
"Somali\n",
"Sotho, Southern\n",
"Spanish; Castilian\n",
"Sardinian\n",
"Serbian\n",
"Swati\n",
"Sundanese\n",
"Swahili\n",
"Swedish\n",
"Tahitian\n",
"Tamil\n",
"Tatar\n",
"Telugu\n",
"Tajik\n",
"Tagalog\n",
"Thai\n",
"Tibetan\n",
"Tigrinya\n",
"Tonga (Tonga Islands)\n",
"Tswana\n",
"Tsonga\n",
"Turkmen\n",
"Turkish\n",
"Twi\n",
"Uighur; Uyghur\n",
"Ukrainian\n",
"Urdu\n",
"Uzbek\n",
"Venda\n",
"Vietnamese\n",
"Volap\u00fck\n",
"Welsh\n",
"Walloon\n",
"Wolof\n",
"Xhosa\n",
"Yiddish\n",
"Yoruba\n",
"Zhuang; Chuang\n",
"Zulu\n"
]
}
],
"prompt_number": 11
}
],
"metadata": {}
}
]
} }
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
{ {
"metadata": { "metadata": {
"name": "", "name": "",
"signature": "sha256:e0c3b2efe7c205a29dc4e028b10ffb7b9d0569f35c4b426febdf523069abffdb" "signature": "sha256:d03d3f5dbf9a1dbfc43deb947718f31529d3d67b0901f8e743b23ce28a9f3205"
}, },
"nbformat": 3, "nbformat": 3,
"nbformat_minor": 0, "nbformat_minor": 0,
...@@ -12,11 +12,7 @@ ...@@ -12,11 +12,7 @@
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"from pprint import pprint\n", "from parsing.NgramsExtractors import NgramsExtractorsCache"
"from node.models import Node, NodeType, Language, Ngram\n",
"from django.contrib.auth.models import User\n",
"import parsing\n",
"from parsing.FileParsers import *"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
...@@ -27,21 +23,7 @@ ...@@ -27,21 +23,7 @@
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"# Define user\n", "c = NgramsExtractorsCache()"
"try:\n",
" user = User.objects.get(username='Mat')\n",
"except:\n",
" user = User(username='Mat', password='0123', email='mathieu@rodic.fr')\n",
" user.save()\n",
"\n",
"# Define document types\n",
"nodetypes = {}\n",
"for name in ['Corpus', 'Document']:\n",
" try:\n",
" nodetypes[name] = NodeType.objects.get(name=name)\n",
" except:\n",
" nodetypes[name] = NodeType(name=name)\n",
" nodetypes[name].save()"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
...@@ -52,70 +34,159 @@ ...@@ -52,70 +34,159 @@
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"Node.objects.all().delete()\n", "c[\"en\"]"
"corpus = Node(name='PubMed corpus', user=user, type=nodetypes['Corpus'])\n",
"corpus.save()"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"<parsing.NgramsExtractors.EnglishNgramsExtractor.EnglishNgramsExtractor at 0x7fc3aa431f98>"
]
}
],
"prompt_number": 3 "prompt_number": 3
}, },
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"fileparser = PubmedFileParser.PubmedFileParser(file='/home/mat/projects/gargantext/data_samples/pubmed.zip')" "c[\"fre\"]"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [] "outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"p = c[\"fr\"]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
}, },
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"fileparser.parse(corpus)\n", "p.extract_ngrams(\"En voil\u00e0 un beau parseur !\")"
"print('Ok!')"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"output_type": "stream", "metadata": {},
"stream": "stdout", "output_type": "pyout",
"prompt_number": 8,
"text": [ "text": [
"Warning: parsing empty text\n", "[[('beau', 'NN'), ('parseur', 'NN')]]"
"Warning: parsing empty text\n",
"Warning: parsing empty text"
] ]
}, }
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"fr\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{ {
"output_type": "stream", "metadata": {},
"stream": "stdout", "output_type": "pyout",
"prompt_number": 9,
"text": [ "text": [
"\n", "<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
"Warning: parsing empty text\n",
"Warning: parsing empty text"
] ]
} }
] ],
"prompt_number": 9
}, },
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"for node_ngram in corpus.children.first().node_ngram_set.all():\n", "c[\"french\"]"
" print(node_ngram.ngram.terms)"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [] "outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
]
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"german\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 11,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"dutch\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 12,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
]
}
],
"prompt_number": 12
}, },
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [], "input": [
"c[\"]"
],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [] "outputs": []
......
This diff is collapsed.
{
"metadata": {
"name": "",
"signature": "sha256:0383da299037d14e20f4be4cd7703cfddbdf0f947ee8f93f051f2ed6b7fe0cb5"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pycountry\n",
"from node.models import Language"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pycountry\n",
"\n",
"for language in pycountry.languages:\n",
" try:\n",
" implemented = 1 if language.alpha2 in ['en', 'fr'] else 0\n",
" Language(iso2=language.alpha2, iso3=language.terminology, fullname=language.name, implemented=implemented).save()\n",
" except:\n",
" pass\n",
" "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Language.objects.filter(implemented=1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"[<Language: English>, <Language: French>]"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for language in Language.objects.all():\n",
" print(language)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Afar\n",
"Abkhazian\n",
"Afrikaans\n",
"Akan\n",
"Albanian\n",
"Amharic\n",
"Arabic\n",
"Aragonese\n",
"Armenian\n",
"Assamese\n",
"Avaric\n",
"Avestan\n",
"Aymara\n",
"Azerbaijani\n",
"Bashkir\n",
"Bambara\n",
"Basque\n",
"Belarusian\n",
"Bengali\n",
"Bihari languages\n",
"Bislama\n",
"Bosnian\n",
"Breton\n",
"Bulgarian\n",
"Burmese\n",
"Catalan; Valencian\n",
"Chamorro\n",
"Chechen\n",
"Chinese\n",
"Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic\n",
"Chuvash\n",
"Cornish\n",
"Corsican\n",
"Cree\n",
"Czech\n",
"Danish\n",
"Divehi; Dhivehi; Maldivian\n",
"Dutch; Flemish\n",
"Dzongkha\n",
"English\n",
"Esperanto\n",
"Estonian\n",
"Ewe\n",
"Faroese\n",
"Fijian\n",
"Finnish\n",
"French\n",
"Western Frisian\n",
"Fulah\n",
"Georgian\n",
"German\n",
"Gaelic; Scottish Gaelic\n",
"Irish\n",
"Galician\n",
"Manx\n",
"Greek, Modern (1453-)\n",
"Guarani\n",
"Gujarati\n",
"Haitian; Haitian Creole\n",
"Hausa\n",
"Hebrew\n",
"Herero\n",
"Hindi\n",
"Hiri Motu\n",
"Croatian\n",
"Hungarian\n",
"Igbo\n",
"Icelandic\n",
"Ido\n",
"Sichuan Yi; Nuosu\n",
"Inuktitut\n",
"Interlingue; Occidental\n",
"Interlingua (International Auxiliary Language Association)\n",
"Indonesian\n",
"Inupiaq\n",
"Italian\n",
"Javanese\n",
"Japanese\n",
"Kalaallisut; Greenlandic\n",
"Kannada\n",
"Kashmiri\n",
"Kanuri\n",
"Kazakh\n",
"Central Khmer\n",
"Kikuyu; Gikuyu\n",
"Kinyarwanda\n",
"Kirghiz; Kyrgyz\n",
"Komi\n",
"Kongo\n",
"Korean\n",
"Kuanyama; Kwanyama\n",
"Kurdish\n",
"Lao\n",
"Latin\n",
"Latvian\n",
"Limburgan; Limburger; Limburgish\n",
"Lingala\n",
"Lithuanian\n",
"Luxembourgish; Letzeburgesch\n",
"Luba-Katanga\n",
"Ganda\n",
"Macedonian\n",
"Marshallese\n",
"Malayalam\n",
"Maori\n",
"Marathi\n",
"Malay\n",
"Malagasy\n",
"Maltese\n",
"Moldavian; Moldovan\n",
"Mongolian\n",
"Nauru\n",
"Navajo; Navaho\n",
"Ndebele, South; South Ndebele\n",
"Ndebele, North; North Ndebele\n",
"Ndonga\n",
"Nepali\n",
"Norwegian Nynorsk; Nynorsk, Norwegian\n",
"Bokm\u00e5l, Norwegian; Norwegian Bokm\u00e5l\n",
"Norwegian\n",
"Chichewa; Chewa; Nyanja\n",
"Occitan (post 1500)\n",
"Ojibwa\n",
"Oriya\n",
"Oromo\n",
"Ossetian; Ossetic\n",
"Panjabi; Punjabi\n",
"Persian\n",
"Pali\n",
"Polish\n",
"Portuguese\n",
"Pushto; Pashto\n",
"Quechua\n",
"Romansh\n",
"Romanian\n",
"Rundi\n",
"Russian\n",
"Sango\n",
"Sanskrit\n",
"Sinhala; Sinhalese\n",
"Slovak\n",
"Slovenian\n",
"Northern Sami\n",
"Samoan\n",
"Shona\n",
"Sindhi\n",
"Somali\n",
"Sotho, Southern\n",
"Spanish; Castilian\n",
"Sardinian\n",
"Serbian\n",
"Swati\n",
"Sundanese\n",
"Swahili\n",
"Swedish\n",
"Tahitian\n",
"Tamil\n",
"Tatar\n",
"Telugu\n",
"Tajik\n",
"Tagalog\n",
"Thai\n",
"Tibetan\n",
"Tigrinya\n",
"Tonga (Tonga Islands)\n",
"Tswana\n",
"Tsonga\n",
"Turkmen\n",
"Turkish\n",
"Twi\n",
"Uighur; Uyghur\n",
"Ukrainian\n",
"Urdu\n",
"Uzbek\n",
"Venda\n",
"Vietnamese\n",
"Volap\u00fck\n",
"Welsh\n",
"Walloon\n",
"Wolof\n",
"Xhosa\n",
"Yiddish\n",
"Yoruba\n",
"Zhuang; Chuang\n",
"Zulu\n"
]
}
],
"prompt_number": 11
}
],
"metadata": {}
}
]
}
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
{
"metadata": {
"name": "",
"signature": "sha256:cb74945a57bed4d2ec124c7c05411b9346c7601e8339e613ddbc37fb950c4d86"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from parsing.NgramsExtractors import NgramsExtractorsCache"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c = NgramsExtractorsCache()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"en\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"<parsing.NgramsExtractors.EnglishNgramsExtractor.EnglishNgramsExtractor at 0x7f8d14947c88>"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"fre\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 4,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f8d24a97d30>"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"p = c[\"fr\"]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"p.extract_ngrams(\"En voil\u00e0 un beau parseur !\")"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"[[('beau', 'NN'), ('parseur', 'NN')]]"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"fr\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f8d24a97d30>"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"french\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 8,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f8d24a97d30>"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"german\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
"<parsing.NgramsExtractors.NgramsExtractor.NgramsExtractor at 0x7f8d24a979e8>"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"dutch\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
"<parsing.NgramsExtractors.NgramsExtractor.NgramsExtractor at 0x7f8d24a979e8>"
]
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"italian\"].extract_ngrams(\"Est-ce un texte ?\")"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Warning: parsing empty text\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 14,
"text": [
"[]"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
\ No newline at end of file
...@@ -3,23 +3,27 @@ from django.conf.urls import patterns, include, url ...@@ -3,23 +3,27 @@ from django.conf.urls import patterns, include, url
from django.contrib import admin from django.contrib import admin
from gargantext_web.views import home, projects, project, corpus from gargantext_web.views import home, projects, project, corpus
from gargantext_web.views import add_corpus from gargantext_web.views import add_corpus, delete_project, delete_corpus
admin.autodiscover() admin.autodiscover()
urlpatterns = patterns('', urlpatterns = patterns('',
# Examples:
# url(r'^$', 'gargantext_web.views.home', name='home'), # url(r'^$', 'gargantext_web.views.home', name='home'),
# url(r'^blog/', include('blog.urls')), # url(r'^blog/', include('blog.urls')),
url(r'^admin/', include(admin.site.urls)), url(r'^admin/', include(admin.site.urls)),
url(r'^login/', include(admin.site.urls)), url(r'^login/', include(admin.site.urls)),
url(r'^grappelli/', include('grappelli.urls')), # grappelli URLS url(r'^grappelli/', include('grappelli.urls')),
url(r'^$', home), url(r'^$', home),
url(r'^add/corpus/$', add_corpus),
url(r'^projects/$', projects), url(r'^projects/$', projects),
url(r'^project/(\d+)/delete/$', delete_project),
url(r'^project/(\d+)/$', project), url(r'^project/(\d+)/$', project),
url(r'^project/(\d+)/add/$', add_corpus),
url(r'^project/(\d+)/corpus/(\d+)/$', corpus), url(r'^project/(\d+)/corpus/(\d+)/$', corpus),
url(r'^project/(\d+)/corpus/(\d+)/delete$$', delete_corpus),
) )
from django.conf import settings from django.conf import settings
......
...@@ -6,7 +6,10 @@ from django.template.loader import get_template ...@@ -6,7 +6,10 @@ from django.template.loader import get_template
from django.template import Context from django.template import Context
#from documents.models import Project, Corpus, Document #from documents.models import Project, Corpus, Document
from node.models import Node, NodeType
from node.models import Language, DatabaseType, Resource
from node.models import Node, NodeType, Project, Corpus
from node.admin import CorpusForm, ProjectForm, ResourceForm
from django.contrib.auth.models import User from django.contrib.auth.models import User
...@@ -18,6 +21,8 @@ from django import forms ...@@ -18,6 +21,8 @@ from django import forms
from collections import defaultdict from collections import defaultdict
from parsing.FileParsers import *
# SOME FUNCTIONS # SOME FUNCTIONS
def query_to_dicts(query_string, *query_args): def query_to_dicts(query_string, *query_args):
...@@ -82,18 +87,27 @@ def projects(request): ...@@ -82,18 +87,27 @@ def projects(request):
user = request.user user = request.user
date = datetime.datetime.now() date = datetime.datetime.now()
project = NodeType.objects.get(name='Project') project_type = NodeType.objects.get(name='Project')
projects = Node.objects.filter(user=user, type_id = project.id).order_by("-date") projects = Node.objects.filter(user=user, type_id = project_type.id).order_by("-date")
number = len(projects) number = len(projects)
form = ProjectForm()
if request.method == 'POST':
# form = ProjectForm(request.POST)
# TODO : protect from sql injection here
name = str(request.POST['name'])
if name != "" :
Project(name=name, type=project_type, user=user).save()
return HttpResponseRedirect('/projects/')
else:
form = ProjectForm()
html = t.render(Context({\ return render(request, 'projects.html', {
'user': user,\ 'date': date,
'date': date,\ 'form': form,
'projects': projects,\ 'number': number,
'number': number,\ 'projects': projects
})) })
return HttpResponse(html)
def project(request, project_id): def project(request, project_id):
if not request.user.is_authenticated(): if not request.user.is_authenticated():
...@@ -104,9 +118,7 @@ def project(request, project_id): ...@@ -104,9 +118,7 @@ def project(request, project_id):
except ValueError: except ValueError:
raise Http404() raise Http404()
t = get_template('project.html')
user = request.user user = request.user
date = datetime.datetime.now() date = datetime.datetime.now()
project = Node.objects.get(id=project_id) project = Node.objects.get(id=project_id)
...@@ -121,15 +133,68 @@ def project(request, project_id): ...@@ -121,15 +133,68 @@ def project(request, project_id):
dashboard['count'] = corpus.children.count() dashboard['count'] = corpus.children.count()
board.append(dashboard) board.append(dashboard)
html = t.render(Context({\
'user': user,\
'date': date,\
'project': project,\
'board' : board,\
'number': number,\
}))
return HttpResponse(html) if request.method == 'POST':
#form = CorpusForm(request.POST, request.FILES)
name = str(request.POST['name'])
try:
language = Language.objects.get(id=str(request.POST['language']))
except:
language = None
try:
bdd_type = DatabaseType.objects.get(id=str(request.POST['bdd_type']))
except:
bdd_type = None
try:
file = request.FILES['file']
except:
file = None
if language is not None and name != "" and bdd_type != None and file != None :
resource = Resource(user=request.user, guid=str(date), bdd_type=bdd_type, file=file)
resource.save()
node_type = NodeType.objects.get(name='Corpus')
parent = Node.objects.get(id=project_id)
node = Node(parent=parent, type=node_type, name=name, user=request.user, language=language)
node.save()
node.resource.add(resource)
try:
for resource in node.resource.all():
print(resource.bdd_type.name)
if resource.bdd_type.name == "PubMed":
fileparser = PubmedFileParser(file='/var/www/gargantext/media/' + str(resource.file))
fileparser.parse(node)
elif resource.bdd_type.name == "Web Of Science (WOS), ISI format":
fileparser = IsiParser(file='/var/www/gargantext/media/' + str(resource.file))
fileparser.parse(node)
elif node.bdd_type.name == "Europresse":
pass
except Exception as error:
print(error)
return HttpResponseRedirect('/project/' + str(project_id))
else:
form = CorpusForm(request=request)
formResource = ResourceForm()
else:
form = CorpusForm(request=request)
formResource = ResourceForm()
return render(request, 'project.html', {
'form': form,
'formResource': formResource,
'user': user,
'date': date,
'project': project,
'board' : board,
'number': number,
})
def corpus(request, project_id, corpus_id): def corpus(request, project_id, corpus_id):
if not request.user.is_authenticated(): if not request.user.is_authenticated():
...@@ -191,6 +256,7 @@ def corpus(request, project_id, corpus_id): ...@@ -191,6 +256,7 @@ def corpus(request, project_id, corpus_id):
try: try:
dates = dict() dates = dict()
# query_to_dicts('''select to_char(t1.date, '%s'), count(*) # query_to_dicts('''select to_char(t1.date, '%s'), count(*)
# from documents_document as t1 # from documents_document as t1
# INNER JOIN documents_document_corpus as t2 # INNER JOIN documents_document_corpus as t2
...@@ -227,42 +293,45 @@ def corpus(request, project_id, corpus_id): ...@@ -227,42 +293,45 @@ def corpus(request, project_id, corpus_id):
return HttpResponse(html) return HttpResponse(html)
from node.admin import CorpusForm
class NameForm(forms.Form):
your_name = forms.CharField(label='Your name', max_length=100)
sender = forms.EmailField()
message = forms.CharField(widget=forms.Textarea)
fichier = forms.FileField()
def add_corpus(request): def add_corpus(request):
# if this is a POST request we need to process the form data form = CorpusForm(request=request)
#print(request.method)
if request.method == 'POST': if request.method == 'POST':
# create a form instance and populate it with data from the request: #form = CorpusForm(request.POST, request.FILES)
form = CorpusForm(request.POST, request.FILES) name = str(request.POST['name'])
# check whether it's valid:
if form.is_valid(): try:
form.save() #language = Language.objects.get(name=str(request.POST['language']))
# process the data in form.cleaned_data as required language = Language.objects.get(name='French')
# corpus.user = request.user except Exception as e:
# print(form.cleaned_data['name']) print(e)
language = None
try:
print(type(form.cleaned_data['fichier'])) if name != "" :
print("here we parse" + str(form.cleaned_data['fichier'])) project_id = 1047
except Exception as error: node_type = NodeType.objects.get(name='Corpus')
print(error) parent = Node.objects.get(id=project_id)
Corpus(parent=parent, type=node_type, name=name, user=request.user, language=language).save()
# redirect to a new URL: # try:
return HttpResponseRedirect('/projects/') # for resource in node.resource.all():
# fileparser = PubmedFileParser.PubmedFileParser(file='/var/www/gargantext/media/' + str(resource.file))
# fileparser.parse(node)
#
# except Exception as error:
# print(error)
return HttpResponseRedirect('/project/' + str(project_id))
# if a GET (or any other method) we'll create a blank form
else: else:
form = CorpusForm(request=request) form = CorpusForm(request=request)
return render(request, 'add_corpus.html', {'form': form}) return render(request, 'add_corpus.html', {'form': form})
print("5")
def delete_project(request, node_id):
Node.objects.filter(id=node_id).all().delete()
return HttpResponseRedirect('/projects/')
def delete_corpus(request, project_id, corpus_id):
Node.objects.filter(id=corpus_id).all().delete()
return HttpResponseRedirect('/project/' + project_id)
{ {
"metadata": { "metadata": {
"name": "", "name": "",
"signature": "sha256:7d01da7300982ebf3acd799b54b93beda7dec63ba8f164356465e08a34dc3311" "signature": "sha256:d0ac96b232bdca40d2b67ddfc85c941e41c3760733e29c981ec727196317e1a1"
}, },
"nbformat": 3, "nbformat": 3,
"nbformat_minor": 0, "nbformat_minor": 0,
...@@ -1602,7 +1602,6 @@ ...@@ -1602,7 +1602,6 @@
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"\n",
"SELECT t1.terms_id, t2.terms_id, COUNT(*) AS c, t3.project_id\n", "SELECT t1.terms_id, t2.terms_id, COUNT(*) AS c, t3.project_id\n",
"FROM documents_ngramdocument AS t1\n", "FROM documents_ngramdocument AS t1\n",
"\n", "\n",
...@@ -1620,6 +1619,16 @@ ...@@ -1620,6 +1619,16 @@
"outputs": [], "outputs": [],
"prompt_number": 20 "prompt_number": 20
}, },
{
"cell_type": "code",
"collapsed": false,
"input": [
"select t1.terms_id , \n"
],
"language": "python",
"metadata": {},
"outputs": []
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
......
...@@ -55,7 +55,8 @@ python manage.py syncdb ...@@ -55,7 +55,8 @@ python manage.py syncdb
Start the Python Notebook server Start the Python Notebook server
-------------------------------- --------------------------------
1) In Pyvenv: python manage.py shell_plus --notebook 1) In Pyvenv:
python manage.py shell_plus --notebook
2) Work from your browser! 2) Work from your browser!
...@@ -63,4 +64,5 @@ Start the Python Notebook server ...@@ -63,4 +64,5 @@ Start the Python Notebook server
Start the Django server Start the Django server
----------------------- -----------------------
In Pyvenv:
python manage.py runserver python manage.py runserver
\ No newline at end of file
from django.contrib import admin from django.contrib import admin
from ngram.models import Ngram, NodeNgram, NodeNgramNgram #from ngram.models import Ngram, NodeNgram, NodeNgramNgram
admin.site.register(Ngram)
admin.site.register(NodeNgram)
admin.site.register(NodeNgramNgram)
#admin.site.register(Ngram)
#admin.site.register(NodeNgram)
#admin.site.register(NodeNgramNgram)
#
...@@ -3,30 +3,30 @@ from django.utils import timezone ...@@ -3,30 +3,30 @@ from django.utils import timezone
from django.contrib.auth.models import User from django.contrib.auth.models import User
from node.models import Node from node.models import Node, Language
#class Ngram(models.Model):
class Ngram(models.Model): # language = models.ForeignKey(Language, blank=True, null=True, on_delete=models.SET_NULL)
terms = models.TextField(unique=True) # n = models.IntegerField()
n = models.IntegerField() # terms = models.CharField(max_length=255)
def __str__(self): # def __str__(self):
return "[%d] %s" % (self.pk, self.terms) # return "[%d] %s" % (self.pk, self.terms)
#
class NodeNgram(models.Model): #class NodeNgram(models.Model):
node = models.ForeignKey(Node) # node = models.ForeignKey(Node)
ngram = models.ForeignKey(Ngram, related_name="nodengram") # ngram = models.ForeignKey(Ngram, related_name="nodengram")
def __str__(self): # def __str__(self):
return "%s: %s" % (self.node.name, self.ngram.terms) # return "%s: %s" % (self.node.name, self.ngram.terms)
#
class NodeNgramNgram(models.Model): #class NodeNgramNgram(models.Model):
node = models.ForeignKey(Node) # node = models.ForeignKey(Node)
#
ngramX = models.ForeignKey(Ngram, related_name="nodengramngramx") # ngramX = models.ForeignKey(Ngram, related_name="nodengramngramx")
ngramY = models.ForeignKey(Ngram, related_name="nodengramngramy") # ngramY = models.ForeignKey(Ngram, related_name="nodengramngramy")
#
score = models.FloatField(default=0) # score = models.FloatField(default=0)
#
def __str__(self): # def __str__(self):
return "%s: %s / %s" % (self.node.name, self.ngramX.terms, self.ngramY.terms) # return "%s: %s / %s" % (self.node.name, self.ngramX.terms, self.ngramY.terms)
#
#
...@@ -2,14 +2,14 @@ from django.contrib import admin ...@@ -2,14 +2,14 @@ from django.contrib import admin
from django.forms import ModelForm, ModelChoiceField from django.forms import ModelForm, ModelChoiceField
from nested_inlines.admin import NestedModelAdmin, NestedStackedInline, NestedTabularInline from nested_inlines.admin import NestedModelAdmin, NestedStackedInline, NestedTabularInline
from node.models import NodeType, Language, Node, Project, Corpus, Document, DatabaseType, Resource from node.models import NodeType, Language, Node, Project, Corpus, Document, DatabaseType, Resource, Node_Ngram
class ResourceInLine(admin.TabularInline): class ResourceInLine(admin.TabularInline):
model = Resource model = Resource
extra = 0 extra = 0
class NodeAdmin(admin.ModelAdmin): class NodeAdmin(admin.ModelAdmin):
exclude = ('user', 'path', 'depth', 'numchild') exclude = ('user', 'path', 'depth', 'numchild', 'ngrams')
list_display = ('name', 'date') list_display = ('name', 'date')
search_fields = ('name',) search_fields = ('name',)
# list_filter = ('type',) # list_filter = ('type',)
...@@ -75,21 +75,35 @@ class ProjectAdmin(NodeAdmin): ...@@ -75,21 +75,35 @@ class ProjectAdmin(NodeAdmin):
from django.db.models.query import EmptyQuerySet from django.db.models.query import EmptyQuerySet
class ProjectForm(ModelForm):
class Meta:
model = Project
exclude = ['ngrams', 'metadata', 'resource', 'parent', 'user', 'type', 'language', 'date']
class ResourceForm(ModelForm):
class Meta:
model = Resource
exclude = ['user', 'guid']
class CorpusForm(ModelForm): class CorpusForm(ModelForm):
#parent = ModelChoiceField(EmptyQuerySet) #parent = ModelChoiceField(EmptyQuerySet)
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
try: try:
self.request = kwargs.pop('request', None) self.request = kwargs.pop('request', None)
super(CorpusForm, self).__init__(*args, **kwargs) super(CorpusForm, self).__init__(*args, **kwargs)
parent_type = NodeType.objects.get(name="Project") parent_type = NodeType.objects.get(name="Project")
#parent_type = NodeType.objects.get(name=self._parent_nodetype_name) #parent_type = NodeType.objects.get(name=self._parent_nodetype_name)
self.fields['parent'].queryset = Node.objects.filter(user_id=self.request.user.id, type_id=parent_type.id) # self.fields['parent'].queryset = Node.objects.filter(
except: # user_id=self.request.user.id,
pass # type_id=parent_type.id
# )
self.fields['language'].queryset = Language.objects.filter(implemented=1)
except Exception as error:
print("Error with", error)
class Meta: class Meta:
model = Corpus model = Corpus
exclude = ['parent', 'user', 'type', 'ngrams', 'metadata', 'resource', 'date']
class CorpusAdmin(NodeAdmin): class CorpusAdmin(NodeAdmin):
_parent_nodetype_name = 'Project' _parent_nodetype_name = 'Project'
...@@ -123,4 +137,5 @@ admin.site.register(Project, ProjectAdmin) ...@@ -123,4 +137,5 @@ admin.site.register(Project, ProjectAdmin)
admin.site.register(Corpus, CorpusAdmin) admin.site.register(Corpus, CorpusAdmin)
admin.site.register(Document, DocumentAdmin) admin.site.register(Document, DocumentAdmin)
admin.site.register(Node_Ngram)
...@@ -33,22 +33,28 @@ class DatabaseType(models.Model): ...@@ -33,22 +33,28 @@ class DatabaseType(models.Model):
def __str__(self): def __str__(self):
return self.name return self.name
class Ngram(models.Model):
language = models.ForeignKey(Language, blank=True, null=True, on_delete=models.SET_NULL)
n = models.IntegerField()
terms = models.CharField(max_length=255)
class Resource(models.Model): class Resource(models.Model):
user = models.ForeignKey(User) user = models.ForeignKey(User)
guid = models.CharField(max_length=255) guid = models.CharField(max_length=255)
bdd_type = models.ForeignKey(DatabaseType, blank=True, null=True) bdd_type = models.ForeignKey(DatabaseType, blank=True, null=True)
file = models.FileField(upload_to=upload_to, blank=True) file = models.FileField(upload_to=upload_to, blank=True)
def __str__(self):
return "%s => %s" % (self.bdd_type, self.file)
class NodeType(models.Model): class NodeType(models.Model):
name = models.CharField(max_length=200) name = models.CharField(max_length=200)
def __str__(self): def __str__(self):
return self.name return self.name
class Ngram(models.Model):
language = models.ForeignKey(Language, blank=True, null=True, on_delete=models.SET_NULL)
n = models.IntegerField()
terms = models.CharField(max_length=255)
def __str__(self):
return "[%d] %s" % (self.pk, self.terms)
class Node(CTENode): class Node(CTENode):
objects = Manager() objects = Manager()
...@@ -62,7 +68,7 @@ class Node(CTENode): ...@@ -62,7 +68,7 @@ class Node(CTENode):
metadata = hstore.DictionaryField(blank=True) metadata = hstore.DictionaryField(blank=True)
resource = models.ManyToManyField(Resource, blank=True) resource = models.ManyToManyField(Resource, blank=True)
ngrams = models.ManyToManyField(Ngram, blank=True) ngrams = models.ManyToManyField(Ngram, blank=True, help_text="Hold down")
def __str__(self): def __str__(self):
...@@ -72,16 +78,18 @@ class Node(CTENode): ...@@ -72,16 +78,18 @@ class Node(CTENode):
for noeud in Node.objects.filter(user=user): for noeud in Node.objects.filter(user=user):
print(noeud.depth * " " + "[%d] %d" % (noeud.pk, noeud.name)) print(noeud.depth * " " + "[%d] %d" % (noeud.pk, noeud.name))
class Node_Ngram(models.Model):
node = models.ForeignKey(Node, on_delete=models.CASCADE)
ngram = models.ForeignKey(Ngram, on_delete=models.CASCADE)
occurences = models.IntegerField()
class Project(Node): class Project(Node):
class Meta: class Meta:
proxy=True proxy=True
class CorpusManager(models.Manager):
def get_query_set(self):
corpus_type = NodeType.objects.get(name='Corpus')
return super(CorpusManager, self).get_query_set().filter(type=corpus_type)
class Corpus(Node): class Corpus(Node):
objects = CorpusManager()
class Meta: class Meta:
proxy=True proxy=True
verbose_name_plural = 'Corpora' verbose_name_plural = 'Corpora'
...@@ -90,4 +98,28 @@ class Document(Node): ...@@ -90,4 +98,28 @@ class Document(Node):
class Meta: class Meta:
proxy=True proxy=True
############################
# NGRAMS
############################
class Node_Ngram(models.Model):
node = models.ForeignKey(Node, on_delete=models.CASCADE)
ngram = models.ForeignKey(Ngram, on_delete=models.CASCADE)
occurences = models.IntegerField()
def __str__(self):
return "%s: %s" % (self.node.name, self.ngram.terms)
class NodeNgramNgram(models.Model):
node = models.ForeignKey(Node)
ngramX = models.ForeignKey(Ngram, related_name="nodengramngramx", on_delete=models.CASCADE)
ngramY = models.ForeignKey(Ngram, related_name="nodengramngramy", on_delete=models.CASCADE)
score = models.FloatField(default=0)
def __str__(self):
return "%s: %s / %s" % (self.node.name, self.ngramX.terms, self.ngramY.terms)
from parsing.FileParsers.FileParser import FileParser
#import FileParser
#
#class EuropressFileParser(FileParser, contents): class EuropressFileParser(FileParser):
#
# def parse(): def parse():
# pass pass
#
import collections
from node.models import Node, NodeType, Language, Ngram, Node_Ngram from node.models import Node, NodeType, Language, Ngram, Node_Ngram
from parsing.NgramsExtractors import * from parsing.NgramsExtractors import *
import collections
import dateutil.parser
class NgramCache: class NgramCache:
""" """
This allows the fast retrieval of ngram ids This allows the fast retrieval of ngram ids
...@@ -18,7 +21,7 @@ class NgramCache: ...@@ -18,7 +21,7 @@ class NgramCache:
try: try:
ngram = Ngram.get(terms=terms, language=self._language) ngram = Ngram.get(terms=terms, language=self._language)
except: except:
ngram = Ngram(terms=terms, n=len(terms), language=self._language) ngram = Ngram(terms=terms, n=len(terms.split()), language=self._language)
ngram.save() ngram.save()
self._cache[terms] = ngram self._cache[terms] = ngram
return self._cache[terms] return self._cache[terms]
...@@ -48,6 +51,7 @@ class FileParser: ...@@ -48,6 +51,7 @@ class FileParser:
self._extractors = dict() self._extractors = dict()
self._document_nodetype = NodeType.objects.get(name='Document') self._document_nodetype = NodeType.objects.get(name='Document')
languages = Language.objects.all() languages = Language.objects.all()
self._languages_fullname = {language.fullname.lower(): language for language in languages}
self._languages_iso2 = {language.iso2.lower(): language for language in languages} self._languages_iso2 = {language.iso2.lower(): language for language in languages}
self._languages_iso3 = {language.iso3.lower(): language for language in languages} self._languages_iso3 = {language.iso3.lower(): language for language in languages}
#self.parse() #self.parse()
...@@ -85,6 +89,7 @@ class FileParser: ...@@ -85,6 +89,7 @@ class FileParser:
"""Add a document to the database. """Add a document to the database.
""" """
def create_document(self, parentNode, title, contents, language, metadata, guid=None): def create_document(self, parentNode, title, contents, language, metadata, guid=None):
metadata = self.format_metadata(metadata)
# create or retrieve a resource for that document, based on its user id # create or retrieve a resource for that document, based on its user id
# if guid is None: # if guid is None:
# resource = Resource(guid=guid) # resource = Resource(guid=guid)
...@@ -98,6 +103,10 @@ class FileParser: ...@@ -98,6 +103,10 @@ class FileParser:
# if parentNode.descendants().filter(resource=resource).exists(): # if parentNode.descendants().filter(resource=resource).exists():
# return None # return None
# create the document itself # create the document itself
if len(title) > 200:
title = title[:200]
childNode = Node( childNode = Node(
user = parentNode.user, user = parentNode.user,
type = self._document_nodetype, type = self._document_nodetype,
...@@ -137,3 +146,51 @@ class FileParser: ...@@ -137,3 +146,51 @@ class FileParser:
def parse(self): def parse(self):
return list() return list()
def format_metadata_dates(self, metadata):
"""Format the dates found in the metadata.
Example: {"publication_date": "2014-10-23 09:57:42"} -> {...}
"""
# First, check the split dates...
prefixes = [key[:-5] for key in metadata.keys() if key[-5:] == "_year"]
for prefix in prefixes:
date_string = metadata[prefix + "_year"]
key = prefix + "_month"
if key in metadata:
date_string += " " + metadata[key]
key = prefix + "_day"
if key in metadata:
date_string += " " + metadata[key]
key = prefix + "_hour"
if key in metadata:
date_string += " " + metadata[key]
key = prefix + "_minute"
if key in metadata:
date_string += ":" + metadata[key]
key = prefix + "_second"
if key in metadata:
date_string += ":" + metadata[key]
try:
metadata[prefix + "_date"] = dateutil.parser.parse(date_string).strftime("%Y-%m-%d %H:%M:%S")
except:
pass
# ...then parse all the "date" fields, to parse it into separate elements
prefixes = [key[:-5] for key in metadata.keys() if key[-5:] == "_date"]
for prefix in prefixes:
date = dateutil.parser.parse(metadata[prefix + "_date"])
metadata[prefix + "_year"] = date.strftime("%Y")
metadata[prefix + "_month"] = date.strftime("%m")
metadata[prefix + "_day"] = date.strftime("%d")
metadata[prefix + "_hour"] = date.strftime("%H")
metadata[prefix + "_minute"] = date.strftime("%M")
metadata[prefix + "_second"] = date.strftime("%S")
# finally, return the result!
return metadata
def format_metadata(self, metadata):
"""Format the metadata."""
metadata = self.format_metadata_dates(metadata)
return metadata
from django.db import transaction from parsing.FileParsers.RisFileParser import RisFileParser
from FileParser import FileParser
class IsiFileParser(FileParser): class IsiFileParser(RisFileParser):
def parse(self, parentNode): _parameters = {
# read the file, line by line b"ER": {"type": "delimiter"},
for line in self.__file: b"TI": {"type": "metadata", "key": "title", "separator": " "},
b"AU": {"type": "metadata", "key": "authors", "separator": ", "},
b"DI": {"type": "metadata", "key": "doi"},
b"PY": {"type": "metadata", "key": "publication_year"},
# open the file as XML b"PD": {"type": "metadata", "key": "publication_month"},
xml_parser = etree.XMLParser(resolve_entities=False, recover=True) b"LA": {"type": "metadata", "key": "language"},
xml = etree.parse(self._file, parser=xml_parser) b"AB": {"type": "metadata", "key": "abstract", "separator": " "},
# parse all the articles, one by one b"WC": {"type": "metadata", "key": "fields"},
# all database operations should be performed within one transaction }
xml_articles = xml.findall('PubmedArticle')
with transaction.atomic():
for xml_article in xml_articles:
# extract data from the document
date_year = int(xml_article.find('MedlineCitation/DateCreated/Year').text)
date_month = int(xml_article.find('MedlineCitation/DateCreated/Month').text)
date_day = int(xml_article.find('MedlineCitation/DateCreated/Day').text)
metadata = {
# other metadata should also be included:
# authors, submission date, etc.
"date_pub": datetime.date(year, month, day),
"journal": xml_article.find('MedlineCitation/Article/Journal/Title').text
"title": xml_article.find('MedlineCitation/Article/ArticleTitle').text
"language_iso3": xml_article.find('MedlineCitation/Article/Language').text
"doi": xml_article.find('PubmedData/ArticleIdList/ArticleId[type=doi]').text
}
contents = xml_article.find('MedlineCitation/Article/Abstract/AbstractText').text
# create the document in the database
yield self.create_document(
parentNode = parentNode
title = metadata["title"],
contents = contents,
language = self._languages_iso3[metadata["language"].lower()]
metadata = metadata,
guid = metadata["doi"],
)
...@@ -7,7 +7,7 @@ import datetime ...@@ -7,7 +7,7 @@ import datetime
class PubmedFileParser(FileParser): class PubmedFileParser(FileParser):
def parse(self, parentNode, tag=True): def parse(self, parentNode=None, tag=True):
# open the file as XML # open the file as XML
xml_parser = etree.XMLParser(resolve_entities=False, recover=True) xml_parser = etree.XMLParser(resolve_entities=False, recover=True)
documents = [] documents = []
...@@ -16,7 +16,6 @@ class PubmedFileParser(FileParser): ...@@ -16,7 +16,6 @@ class PubmedFileParser(FileParser):
with zipfile.ZipFile(self._file) as zipFile: with zipfile.ZipFile(self._file) as zipFile:
for filename in zipFile.namelist(): for filename in zipFile.namelist():
file = zipFile.open(filename, "r") file = zipFile.open(filename, "r")
# print(file.read())
xml = etree.parse(file, parser=xml_parser) xml = etree.parse(file, parser=xml_parser)
# parse all the articles, one by one # parse all the articles, one by one
...@@ -24,19 +23,17 @@ class PubmedFileParser(FileParser): ...@@ -24,19 +23,17 @@ class PubmedFileParser(FileParser):
xml_articles = xml.findall('PubmedArticle') xml_articles = xml.findall('PubmedArticle')
for xml_article in xml_articles: for xml_article in xml_articles:
# extract data from the document # extract data from the document
date_year = int(xml_article.find('MedlineCitation/DateCreated/Year').text) metadata = {}
date_month = int(xml_article.find('MedlineCitation/DateCreated/Month').text)
date_day = int(xml_article.find('MedlineCitation/DateCreated/Day').text)
metadata = {
"date_pub": '%s-%s-%s' % (date_year, date_month, date_day),
}
metadata_path = { metadata_path = {
"journal" : 'MedlineCitation/Article/Journal/Title', "journal" : 'MedlineCitation/Article/Journal/Title',
"title" : 'MedlineCitation/Article/ArticleTitle', "title" : 'MedlineCitation/Article/ArticleTitle',
"language_iso3" : 'MedlineCitation/Article/Language', "language_iso3" : 'MedlineCitation/Article/Language',
"doi" : 'PubmedData/ArticleIdList/ArticleId[type=doi]', "doi" : 'PubmedData/ArticleIdList/ArticleId[type=doi]',
"abstract" : 'MedlineCitation/Article/Abstract/AbstractText' "abstract" : 'MedlineCitation/Article/Abstract/AbstractText',
} "publication_year" : 'MedlineCitation/DateCreated/Year',
"publication_month" : 'MedlineCitation/DateCreated/Month',
"publication_day" : 'MedlineCitation/DateCreated/Day',
}
for key, path in metadata_path.items(): for key, path in metadata_path.items():
try: try:
node = xml_article.find(path) node = xml_article.find(path)
......
from django.db import transaction
from parsing.FileParsers.FileParser import FileParser
class RisFileParser(FileParser):
_parameters = {
}
def _parse(self, parentNode, file):
metadata = {}
last_key = None
last_values = []
with transaction.atomic():
for line in self._file:
if len(line) > 2:
parameter_key = line[:2]
if parameter_key != b' ' and parameter_key != last_key:
if last_key in self._parameters:
parameter = self._parameters[last_key]
if parameter["type"] == "metadata":
separator = parameter["separator"] if "separator" in parameter else ""
metadata[parameter["key"]] = separator.join(last_values)
elif parameter["type"] == "delimiter":
language = self._languages_fullname[metadata["language"].lower()]
self.create_document(
parentNode = parentNode,
title = metadata["title"],
metadata = metadata,
guid = metadata["doi"]
)
# print(self.format_metadata(metadata))
# print()
metadata = {}
last_key = parameter_key
last_values = []
last_values.append(line[3:-1].decode())
self._file.close()
#from parsing.FileParsers import EuropressFileParser from parsing.FileParsers.IsiFileParser import IsiFileParser
from parsing.FileParsers import PubmedFileParser from parsing.FileParsers.PubmedFileParser import PubmedFileParser
from parsing.FileParsers.EuropressFileParser import EuropressFileParser
SELECT
177 as node_id, x.ngram_id as ngramX_id, y.ngram_id as ngramY_id, COUNT(*) AS score
FROM
node_node_ngram AS x
INNER JOIN
node_node_ngram AS y
ON x.node_id = y.node_id
WHERE
x.id NOT IN (SELECT id FROM node_node_ngram WHERE node_id = 174 )
AND
y.id NOT IN (SELECT id from node_node_ngram WHERE node_id = 174 )
AND
x.ngram_id <> y.ngram_id
GROUP BY
x.ngram_id, y.ngram_id
LIMIT 10
SELECT
100 as "NodeType Cooc", x.ngram_id, y.ngram_id, SQRT(SUM(x.occurences * y.occurences)) AS score
FROM
node_node_ngram AS x
INNER JOIN
node_node_ngram AS y
ON
x.node_id = y.node_id
AND
x.ngram_id <> y.ngram_id
GROUP BY
x.ngram_id, y.ngram_id
SELECT
id, 177 as node_id, x.ngram_id as ngramX_id, y.ngram_id as ngramY_id, COUNT(*) AS score
FROM
node_node_ngram AS x
INNER JOIN
node_node_ngram AS y
ON
x.node_id = y.node_id
AND
x.ngram_id <> y.ngram_id
GROUP BY
x.ngram_id, y.ngram_id
-- TODO Add count for synonyms
SELECT
177 as node_id, x.ngram_id as ngramX_id, y.ngram_id as ngramY_id, COUNT(*) AS score
FROM
node_node_ngram AS x
INNER JOIN
node_node_ngram AS y
ON x.node_id = y.node_id
WHERE
x.id IN (SELECT id FROM node_node_ngram WHERE node_id = 173 )
AND
y.id IN (SELECT id FROM node_node_ngram WHERE node_id = 173 )
AND
x.ngram_id <> y.ngram_id
GROUP BY
x.ngram_id, y.ngram_id
LIMIT 10
INSERT INTO node_nodengramngram (node_id, "ngramX_id", "ngramY_id", score)
SELECT
177 as node_id, x.ngram_id, y.ngram_id, COUNT(*) AS score
FROM
node_node_ngram AS x
INNER JOIN
node_node_ngram AS y
ON x.node_id = y.node_id
WHERE
x.id in (select id from node_node_ngram WHERE node_id = 173 )
AND
y.id in (select id from node_node_ngram WHERE node_id = 173 )
AND
x.ngram_id <> y.ngram_id
GROUP BY
x.ngram_id, y.ngram_id
LIMIT 1000
...@@ -17,8 +17,10 @@ ...@@ -17,8 +17,10 @@
<h1>Title</h1> <h1>Title</h1>
<form enctype="multipart/form-data" action="/add/corpus/" method="post"> <form enctype="multipart/form-data" action="/add/corpus/" method="post">
{% csrf_token %} {% csrf_token %}
{{ form.as_p }} {{ form.non_field_errors }}
<input type="submit" value="Save" /> {{ form.as_p}}
<input type="submit" value="Save" />
</form> </form>
</div> </div>
</div> </div>
......
{% extends "menu.html" %}
{% block css %}
{% load staticfiles %}
<link rel="stylesheet" href="{% static "css/bootstrap.css" %}">
<link rel="stylesheet" href="{% static "css/bootstrap-theme.min.css" %}">
{% endblock %}
{% block content %}
<div class="container theme-showcase" role="main">
<div class="jumbotron">
<h1>Title</h1>
<form enctype="multipart/form-data" action="/add/corpus/" method="post">
{% csrf_token %}
{{ form.non_field_errors }}
{{ form.as_p}}
<input type="submit" value="Save" />
</form>
</div>
</div>
{% endblock %}
{% extends "menu.html" %}
{% block css %}
{% load staticfiles %}
<link rel="stylesheet" href="{% static "css/bootstrap.css" %}">
<link rel="stylesheet" href="{% static "css/bootstrap-theme.min.css" %}">
{% endblock %}
{% block content %}
<div class="container theme-showcase" role="main">
<div class="jumbotron">
<h1>Title</h1>
<form enctype="multipart/form-data" action="/add/corpus/" method="post">
{% csrf_token %}
{{ form.non_field_errors }}
<div class="fieldWrapper">
<label for="id_date">Date :</label>
{{ form.date.errors }}
{{ form.date }}
</div>
<div class="fieldWrapper">
<label for="id_type">Type :</label>
{{ form.type.errors }}
{{ form.type }}
</div>
<div class="fieldWrapper">
<label for="id_user">User :</label>
{{ form.user.errors }}
{{ form.user }}
</div>
<div class="fieldWrapper">
<label for="id_name">Corpus name :</label>
{{ form.name.errors }}
{{ form.name }}
</div>
<div class="fieldWrapper">
<label for="id_parent">Parent :</label>
{{ form.parent.errors }}
<p>{{ form.parent }}</p>
</div>
<div class="fieldWrapper">
<label for="id_language">Language:</label>
{{ form.language.errors }}
<p>{{ form.language }}</p>
</div>
<div class="fieldWrapper">
<label for="id_metadata">Metadata:</label>
{{ form.metadata.errors }}
<p>{{ form.metadata }}</p>
</div>
<div class="fieldWrapper">
<label for="id_resource">Files :</label>
{{ form.resource.errors }}
<p>{{ form.resource }}</p>
</div>
<div class="fieldWrapper">
<label for="id_ngrams">{{ form.ngrams.label }}</label>
<p>
{{ form.ngrams.errors }}
{{ form.ngrams.help_text }}
{{ form.ngrams }}</p>
</div>
<input type="submit" value="Save" />
</form>
</div>
</div>
{% endblock %}
{% extends "menu.html" %}
{% block css %}
{% load staticfiles %}
<link rel="stylesheet" href="{% static "css/bootstrap.css" %}">
<link rel="stylesheet" href="{% static "css/bootstrap-theme.min.css" %}">
{% endblock %}
{% block content %}
<div class="container theme-showcase" role="main">
<div class="jumbotron">
<h1>Gargantext</h1>
<p>A web platform to explore text-mining</p>
</div>
</div>
<div class="container">
<div class="row">
<div class="col-md-4 content">
<h3>Presentation</h3>
<p>
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</div>
<div class="col-md-4 content">
<h3>Historic</h3>
<p>
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</div>
<div class="col-md-4 content">
<h3>Tutorials</h3>
<p>
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</div>
</div>
</div>
{% endblock %}
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
<div class="navbar-collapse collapse"> <div class="navbar-collapse collapse">
<ul class="nav navbar-nav"> <ul class="nav navbar-nav">
<li><a href="/admin/">Admin</a></li> <li><a href="/admin/">Admin</a></li>
<li><a href="/projects/">Projects</a></li> <li><a href="/projects/">My Projects</a></li>
<li><a href="/contact/">Contact</a></li> <li><a href="/contact/">Contact</a></li>
</ul> </ul>
...@@ -75,6 +75,16 @@ $(function() { ...@@ -75,6 +75,16 @@ $(function() {
</script> </script>
<script src="{% static "js/bootstrap.min.js" %}"></script> <script src="{% static "js/bootstrap.min.js" %}"></script>
<script>$(function () { $("[data-toggle='popover']").popover({
html:true,
title: function() {
return $("#popover-head").html();
},
content: function() {
return $("#popover-content").html();
}
});});</script>
</body> </body>
</html> </html>
...@@ -15,23 +15,56 @@ ...@@ -15,23 +15,56 @@
<div class="container theme-showcase" role="main"> <div class="container theme-showcase" role="main">
<div class="jumbotron"> <div class="jumbotron">
<div class="row">
<div class="col-md-3">
{% if project %} {% if project %}
<h1>{{ project.name }}</h1> <h1>{{ project.name }}</h1>
<h3> {{number}} corpora </h3> <h3> {{number}} corpora </h3>
<p>
<a class="btn btn-primary btn-lg" role="button" href="/add/corpus/">Add a corpus</a></p>
{% endif %} {% endif %}
</div> </div>
<div class="col-md-4">
<button
type="button"
class="btn btn-primary btn-lg"
data-container="body"
data-toggle="popover"
data-placement="bottom"
>Add a corpus</button>
<div id="popover-content" class="hide">
<form enctype="multipart/form-data" action="/project/{{project.id}}/" method="post">
{% csrf_token %}
{{ form.non_field_errors }}
{{ form.as_p}}
{{ formResource.non_field_errors }}
{{ formResource.as_p}}
<input type="submit" class="btn" value="Add this corpus" />
</form>
</div>
</div>
</div>
</div>
</div>
</div> </div>
<!-- Add jumbotron container for each type of coprus (presse, science etc.) --!> <!-- Add jumbotron container for each type of corpus (presse, science etc.) --!>
<div class="container"> <div class="container">
<div class="row"> <div class="row">
{% if board %} {% if board %}
{% for corpus in board %} {% for corpus in board %}
<div class="col-md-4"> <div class="col-md-4">
<h3><a href="/project/{{project.id}}/corpus/{{corpus.id}}">{{corpus.name}}</a></h3> <h3><a href="/project/{{project.id}}/corpus/{{corpus.id}}">{{corpus.name}}</a>
<button type="button" class="btn btn-xs btn-default" data-container="body" data-toggle="popover" data-placement="bottom" data-content='<a href="/project/{{ project.id }}/corpus/{{ corpus.id}}/delete">Delete it!</a>'>Manage</button>
</h3>
<h4>{{ corpus.count }} Documents </h4> <h4>{{ corpus.count }} Documents </h4>
<p>{{ corpus.language }} {{ corpus.database}}</p> <p>{{ corpus.language }} {{ corpus.database}}</p>
<h5>Activity:</h5> <h5>Activity:</h5>
......
...@@ -13,9 +13,26 @@ ...@@ -13,9 +13,26 @@
<div class="container theme-showcase" role="main"> <div class="container theme-showcase" role="main">
<div class="jumbotron"> <div class="jumbotron">
<div class="row">
<div class="col-md-3">
<h1>My {{number}} projects</h1> <h1>My {{number}} projects</h1>
<p>Template showing my working space</p> <p>Template showing my working space</p>
<a class="btn btn-primary btn-lg" role="button" href="/admin/node/project/add/">Add a project</a> </div>
<div class="col-md-4"></div>
<div class="col-md-4">
<button
type="button"
class="btn btn-primary btn-lg"
data-container="body"
data-toggle="popover"
data-placement="bottom"
>Add a project</button>
<div id="popover-content" class="hide">
<form enctype='multipart/form-data' action='/projects/' method='post'>{% csrf_token %}{{ form.non_field_errors }}{{ form.as_p}}<input type='submit' class="btn" value='Add this project !'/></form>
</div>
</div>
</div>
</div>
</div> </div>
</div> </div>
...@@ -25,7 +42,10 @@ ...@@ -25,7 +42,10 @@
{% for project in projects %} {% for project in projects %}
<!--<div class="col-md-offset-7 col-md-4 content" style="background-color:grey">!--> <!--<div class="col-md-offset-7 col-md-4 content" style="background-color:grey">!-->
<div class="col-md-3 content"> <div class="col-md-3 content">
<h3><a href="/project/{{ project.id }}">{{ project.name }}</a></h3> <h3><a href="/project/{{ project.id }}">{{ project.name }}</a>
<button type="button" class="btn btn-xs btn-default" data-container="body" data-toggle="popover" data-placement="bottom" data-content='<a href="/project/{{ project.id }}/delete">Delete it!</a>'>Manage</button>
</h3>
<h4>{{ project.subtitle }}<h4> <h4>{{ project.subtitle }}<h4>
<h5>Completed:</h5> <h5>Completed:</h5>
<div class="chart" barColor="#fffff" data-percent="75">75%</div> <div class="chart" barColor="#fffff" data-percent="75">75%</div>
......
{ {
"metadata": { "metadata": {
"name": "", "name": "",
"signature": "sha256:6df2ce47b09a6203b244f7b4dc27f3346901261b85922dd46bc54d669d6469a6" "signature": "sha256:471ecc2290c2a84d75008cf33cc7db2b8c74f4bea96be0f180e58bedfabceaa8"
}, },
"nbformat": 3, "nbformat": 3,
"nbformat_minor": 0, "nbformat_minor": 0,
...@@ -833,6 +833,37 @@ ...@@ -833,6 +833,37 @@
], ],
"prompt_number": 26 "prompt_number": 26
}, },
{
"cell_type": "code",
"collapsed": false,
"input": [
"from autoslug import AutoSlugField"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 33
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"AutoSlugField()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 34,
"text": [
"<autoslug.fields.AutoSlugField>"
]
}
],
"prompt_number": 34
},
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
......
{ {
"metadata": { "metadata": {
"name": "", "name": "",
"signature": "sha256:3345ac991b0346b1dfd82386fdc2a59f39b2de9bf32d03ddfbeb565927cfe7ab" "signature": "sha256:8c764ebc660400cc2f2dddafacfdb7082971d16cb2b75bac1470575d33428427"
}, },
"nbformat": 3, "nbformat": 3,
"nbformat_minor": 0, "nbformat_minor": 0,
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
"# try:\n", "# try:\n",
"# Language(iso2=lang.alpha2, iso3=lang.terminology, fullname=lang.name, implemented=1).save()\n", "# Language(iso2=lang.alpha2, iso3=lang.terminology, fullname=lang.name, implemented=1).save()\n",
"# except:\n", "# except:\n",
"# pass\n" "# pass"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
...@@ -41,17 +41,32 @@ ...@@ -41,17 +41,32 @@
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [], "input": [
"corpus = Node.objects.get(name=\"OneMoreLife PubMed\")\n",
"print(corpus.resource.all())"
],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
"prompt_number": 2 {
"output_type": "stream",
"stream": "stdout",
"text": [
"[<Resource: PubMed => corpora/alexandre/test_pkqLVdy.zip>]\n"
]
}
],
"prompt_number": 3
}, },
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"node = Node.objects.get(name=\"PubMed corpus\")" "from node.models import Project\n",
"from django.contrib.auth.models import User\n",
"user = User.objects.get(username=\"alexandre\")\n",
"project_type = NodeType.objects.get(name=\"Project\")\n",
"Project(user=user, type=project_type, name=\"Abeilles\").save()"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
...@@ -62,18 +77,80 @@ ...@@ -62,18 +77,80 @@
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"fileparser = PubmedFileParser.PubmedFileParser(file='/var/www/gargantext/media/' + node.fichier.name)" "node = Node.objects.filter(name=\"Abeilles\", user=user)[0]\n",
"print(node.pk)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"24\n"
]
}
],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"r = node.children.get(name=\"Pubmed\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 40
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"r = r.resource.all()[0]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 41
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"r.file"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 42,
"text": [
"<FieldFile: corpora/alexandre/pubmed_BwIXSzN.zip>"
]
}
],
"prompt_number": 42
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fileparser = PubmedFileParser.PubmedFileParser(file='/var/www/gargantext/media/' + str(r.file))"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"prompt_number": 4 "prompt_number": 43
}, },
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"fileparser.parse(node)\n" "fileparser.parse(node)"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
...@@ -179,7 +256,7 @@ ...@@ -179,7 +256,7 @@
{ {
"metadata": {}, "metadata": {},
"output_type": "pyout", "output_type": "pyout",
"prompt_number": 5, "prompt_number": 44,
"text": [ "text": [
"[<Node: Systemic spread and propagation of a plant-pathogenic virus in European honeybees, Apis mellifera.>,\n", "[<Node: Systemic spread and propagation of a plant-pathogenic virus in European honeybees, Apis mellifera.>,\n",
" <Node: A Causal Analysis of Observed Declines in Managed Honey Bees (Apis mellifera).>,\n", " <Node: A Causal Analysis of Observed Declines in Managed Honey Bees (Apis mellifera).>,\n",
...@@ -265,7 +342,7 @@ ...@@ -265,7 +342,7 @@
] ]
} }
], ],
"prompt_number": 5 "prompt_number": 44
}, },
{ {
"cell_type": "code", "cell_type": "code",
...@@ -394,10 +471,156 @@ ...@@ -394,10 +471,156 @@
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [], "input": [
"Project.objects.all().delete()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 18
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from node.models import Corpus, Project\n",
"Project.objects.all()\n",
"Corpus.objects.all()"
],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [] "outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
"[<Corpus: Abeilles>]"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node.objects.filter(user=user, type=project_type)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
"[<Node: Abeilles>]"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"destruction = Node.objects.filter(id=1038).all()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 20
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"destruction.delete()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 21
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"destruction"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 22,
"text": [
"[]"
]
}
],
"prompt_number": 22
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"destruction.children.all().delete()\n",
"destruction.delete"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 14,
"text": [
"<bound method Node.delete of <Node: Encore un >>"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"help(destruction.delete)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Help on method delete in module cte_tree.models:\n",
"\n",
"delete(method=None, position=None, save=True) method of node.models.Node instance\n",
" Prepares the tree for deletion according to the deletion semantics\n",
" specified for the :class:`CTENode` Model, and then delegates to the\n",
" :class:`CTENode` superclass ``delete`` method.\n",
" \n",
" Default deletion `method` and `position` callable can be overridden\n",
" by being supplied as arguments to this method.\n",
" \n",
" :param method: optionally a particular deletion method, overriding\n",
" the default method specified for this model.\n",
" \n",
" :param position: optional callable to invoke prior to each move\n",
" operation, should the delete method require any moves.\n",
" \n",
" :param save: optional flag indicating whether this model's\n",
" :meth:`save` method should be invoked after each move operation,\n",
" should the delete method require any moves.\n",
"\n"
]
}
],
"prompt_number": 15
} }
], ],
"metadata": {} "metadata": {}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment