Commit 8b87cac2 authored by Administrator's avatar Administrator

ajout du parsing au chargement

parent 159efcff
This diff is collapsed.
...@@ -18,6 +18,8 @@ from django import forms ...@@ -18,6 +18,8 @@ from django import forms
from collections import defaultdict from collections import defaultdict
from parsing.FileParsers import *
# SOME FUNCTIONS # SOME FUNCTIONS
def query_to_dicts(query_string, *query_args): def query_to_dicts(query_string, *query_args):
...@@ -245,14 +247,14 @@ def add_corpus(request): ...@@ -245,14 +247,14 @@ def add_corpus(request):
form = CorpusForm(request.POST, request.FILES) form = CorpusForm(request.POST, request.FILES)
# check whether it's valid: # check whether it's valid:
if form.is_valid(): if form.is_valid():
form.save() node = form.save()
# process the data in form.cleaned_data as required
# corpus.user = request.user
# print(form.cleaned_data['name']) # print(form.cleaned_data['name'])
try: try:
print(type(form.cleaned_data['fichier'])) for resource in node.resource.all():
print("here we parse" + str(form.cleaned_data['fichier'])) fileparser = PubmedFileParser.PubmedFileParser(file='/var/www/gargantext/media/' + str(resource.file))
fileparser.parse(node)
except Exception as error: except Exception as error:
print(error) print(error)
......
{ {
"metadata": { "metadata": {
"name": "", "name": "",
"signature": "sha256:3345ac991b0346b1dfd82386fdc2a59f39b2de9bf32d03ddfbeb565927cfe7ab" "signature": "sha256:dd0951010ab6bd7a5909f5efbd5f9c0fc791ab47839acdbb492e7d8fbb41f318"
}, },
"nbformat": 3, "nbformat": 3,
"nbformat_minor": 0, "nbformat_minor": 0,
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"prompt_number": 1 "prompt_number": 10
}, },
{ {
"cell_type": "code", "cell_type": "code",
...@@ -41,39 +41,106 @@ ...@@ -41,39 +41,106 @@
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [], "input": [
"project_type = NodeType.objects.get(name=\"Project\")"
],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"prompt_number": 2 "prompt_number": 20
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from node.models import Project\n",
"from django.contrib.auth.models import User\n",
"user = User.objects.get(username=\"alexandre\")\n",
"Project(user=user, type=project_type, name=\"Abeilles\").save()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 46
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"node = Node.objects.filter(name=\"Abeilles\", user=user)[0]\n",
"print(node.pk)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"24\n"
]
}
],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"r = node.children.get(name=\"Pubmed\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 40
}, },
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"node = Node.objects.get(name=\"PubMed corpus\")" "r = r.resource.all()[0]"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"prompt_number": 3 "prompt_number": 41
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"r.file"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 42,
"text": [
"<FieldFile: corpora/alexandre/pubmed_BwIXSzN.zip>"
]
}
],
"prompt_number": 42
}, },
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"fileparser = PubmedFileParser.PubmedFileParser(file='/var/www/gargantext/media/' + node.fichier.name)" "fileparser = PubmedFileParser.PubmedFileParser(file='/var/www/gargantext/media/' + str(r.file))"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"prompt_number": 4 "prompt_number": 43
}, },
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"fileparser.parse(node)\n" "fileparser.parse(node)"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
...@@ -179,7 +246,7 @@ ...@@ -179,7 +246,7 @@
{ {
"metadata": {}, "metadata": {},
"output_type": "pyout", "output_type": "pyout",
"prompt_number": 5, "prompt_number": 44,
"text": [ "text": [
"[<Node: Systemic spread and propagation of a plant-pathogenic virus in European honeybees, Apis mellifera.>,\n", "[<Node: Systemic spread and propagation of a plant-pathogenic virus in European honeybees, Apis mellifera.>,\n",
" <Node: A Causal Analysis of Observed Declines in Managed Honey Bees (Apis mellifera).>,\n", " <Node: A Causal Analysis of Observed Declines in Managed Honey Bees (Apis mellifera).>,\n",
...@@ -265,7 +332,7 @@ ...@@ -265,7 +332,7 @@
] ]
} }
], ],
"prompt_number": 5 "prompt_number": 44
}, },
{ {
"cell_type": "code", "cell_type": "code",
...@@ -391,6 +458,28 @@ ...@@ -391,6 +458,28 @@
], ],
"prompt_number": 26 "prompt_number": 26
}, },
{
"cell_type": "code",
"collapsed": false,
"input": [
"Project.objects.all().delete()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 18
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Corpus.objects.all().delete()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 45
},
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment