Commit 50eec160 authored by c24b's avatar c24b

Add corpus method into toolchain & check into projects

parent cd2b2d52
...@@ -17,6 +17,52 @@ from gargantext.models import Node ...@@ -17,6 +17,52 @@ from gargantext.models import Node
from datetime import datetime from datetime import datetime
from celery import shared_task from celery import shared_task
def add_corpus(request):
'''adding a new corpus into project corpus:
verifying two prerequisites before upload:
- file size can exceed UPLOAD_LIMIT set in constants
- file extension must comply with ACCEPTED_EXTENSIONS in RESOURCES_TYPE[corpus.type]
'''
#add a new corpus
corpus_status = True
corpus_msg = None
#Corpus est du type Node
#print(corpus.__str__)
#get ressource type
corpus_type = int(request.POST['type'])
#corpus.type = int(request.POST['type'])
#check format
try:
corpus_format = check_format(type, str(request.FILES['file']))
except TypeError as e:
#Incorrect Extension Type
corpus_status = False
corpus_status_msg = str(e)
try:
path = upload(request.FILES['file'])
except OSError as e:
corpus_status = False
corpus_status_msg = str(e)
if corpus_status:
corpus.add_resource(
type,
path,
type= corpus_type,
format = corpus_format,
)
else:
corpus.add_resource(
type,
path,
type= corpus_type,
format = corpus_format,
status = corpus_status,
status_msg = corpus_status_msg,
)
print(session.add(corpus))
print(session.commit())
return session.query(Node).filter(Node.id == corpus_id).first()
#@shared_task #@shared_task
def parse_extract(corpus): def parse_extract(corpus):
# retrieve corpus from database from id # retrieve corpus from database from id
......
...@@ -7,6 +7,7 @@ from gargantext.models import * ...@@ -7,6 +7,7 @@ from gargantext.models import *
from gargantext.constants import * from gargantext.constants import *
from gargantext.util.scheduling import scheduled from gargantext.util.scheduling import scheduled
from gargantext.util.toolchain import parse_extract_indexhyperdata from gargantext.util.toolchain import parse_extract_indexhyperdata
from gargantext.util.toolchain import add_corpus
from datetime import datetime from datetime import datetime
from collections import defaultdict from collections import defaultdict
...@@ -59,7 +60,7 @@ def overview(request): ...@@ -59,7 +60,7 @@ def overview(request):
class NewCorpusForm(forms.Form): class NewCorpusForm(forms.Form):
'''c24b: je dirai que je ne sais pas quand il sert ce truc''' '''OK: add corpus Form (NIY)'''
type = forms.ChoiceField( type = forms.ChoiceField(
choices = enumerate(resource_type['name'] for resource_type in RESOURCETYPES), choices = enumerate(resource_type['name'] for resource_type in RESOURCETYPES),
widget = forms.Select(attrs={ 'onchange' :'CustomForSelect( $("option:selected", this).text() );'}) widget = forms.Select(attrs={ 'onchange' :'CustomForSelect( $("option:selected", this).text() );'})
...@@ -91,86 +92,36 @@ def project(request, project_id): ...@@ -91,86 +92,36 @@ def project(request, project_id):
if not user.owns(project): if not user.owns(project):
raise HttpResponseForbidden() raise HttpResponseForbidden()
# add a new corpus # add a new corpus into Node Project > Node Corpus > Ressource
if request.method == 'POST': if request.method == 'POST':
corpus = project.add_child( corpus = project.add_child(
name = request.POST['name'], name = request.POST['name'],
typename = 'CORPUS', typename = 'CORPUS',
) )
#check type and name corpus = add_corpus(request)
print(request.POST)
type = int(request.POST['type']) if corpus.status:
try: # parse_extract: fileparsing -> ngram extraction -> lists
format = check_format(type, str(request.FILES['file'])) scheduled(parse_extract_indexhyperdata)(corpus.id)
except TypeError as e:
return render( return render(
template_name = 'pages/projects/overview.html', template_name = 'pages/projects/wait.html',
request = request, request = request,
context = { context = {
'debug': True, 'user' : request.user,
#'date': datetime.now(), 'project': project,
# projects owned by the user
#'number': user_projects.count(),
#'projects': user_projects,
# projects owned by the user's contacts
#'common_users': (contact for contact, projects in contacts_projects),
#'common_projects': sum((projects for contact, projects in contacts_projects), []),
'error_msg': str(e),
}, },
) )
try:
path = upload(request.FILES['file'])
except OSError:
return render(
template_name = 'pages/projects/overview.html',
request = request,
context = {
'debug': True,
'date': datetime.now(),
# projects owned by the user
'number': user_projects.count(),
'projects': user_projects,
# projects owned by the user's contacts
'common_users': (contact for contact, projects in contacts_projects),
'common_projects': sum((projects for contact, projects in contacts_projects), []),
'error_msg':"File uploaded is two heavy > 1G ",
},
)
corpus.add_resource(
type,
path
)
#except Exception as error:
session.add(corpus)
session.commit()
# parse_extract: fileparsing -> ngram extraction -> lists
scheduled(parse_extract_indexhyperdata)(corpus.id)
return render(
template_name = 'pages/projects/wait.html',
request = request,
context = {
'user' : request.user,
'project': project,
},
)
# list all the corpora within this project # list all the corpora within this project
corpora = project.children('CORPUS', order=True).all() corpora = project.children('CORPUS', order=True).all()
print(corpora) #print(corpora)
sourcename2corpora = defaultdict(list) sourcename2corpora = defaultdict(list)
for corpus in corpora: for corpus in corpora:
# we only consider the first resource of the corpus to determine its type # we only consider the first resource of the corpus to determine its type
resources = corpus.resources() resources = corpus.resources()
if len(resources): if len(resources):
resource = resources[0] resource = resources[0]
##here map from RESSOURCES_TYPES_ID and NOT NAME
resource_type_name = RESOURCETYPES[resource['type']]['name'] resource_type_name = RESOURCETYPES[resource['type']]['name']
resource_type_accepted_formats = RESOURCETYPES[resource['type']]['accepted_formats'] resource_type_accepted_formats = RESOURCETYPES[resource['type']]['accepted_formats']
else: else:
......
...@@ -8,6 +8,4 @@ su gargantua ...@@ -8,6 +8,4 @@ su gargantua
#activate the virtualenv #activate the virtualenv
source /srv/env_3-5/bin/activate source /srv/env_3-5/bin/activate
#go to gargantext srv #go to gargantext srv
cd /srv/gargantext/ cd /srv/gargantext/manage.py runserver 0.0.0.0:8000
#run the server
/manage.py runserver 0.0.0.0:8000
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment