Add corpus method into toolchain & check into projects

50eec160 · c24b · cd2b2d52 · 50eec160 · 50eec160 · 50eec160
Commit 50eec160 authored May 13, 2016 by c24b
Hide whitespace changes
Inline Side-by-side

Showing with 59 additions and 64 deletions

__init__.py gargantext/util/toolchain/__init__.py +46 -0

projects.py gargantext/views/pages/projects.py +12 -61

run_docker.md install/run_docker.md +1 -3

No files found.
--- a/gargantext/util/toolchain/__init__.py
+++ b/gargantext/util/toolchain/__init__.py
@@ -17,6 +17,52 @@ from gargantext.models    import Node
 from datetime             import datetime
 from celery               import shared_task
+def add_corpus(request):
+    '''adding a new corpus into project corpus:
+    verifying two prerequisites before upload:
+    - file size can exceed UPLOAD_LIMIT set in constants
+    - file extension must comply with ACCEPTED_EXTENSIONS in RESOURCES_TYPE[corpus.type]
+    '''
+    #add a new corpus
+    corpus_status = True
+    corpus_msg = None
+    #Corpus est du type Node
+    #print(corpus.__str__)
+    #get ressource type
+    corpus_type = int(request.POST['type'])
+    #corpus.type = int(request.POST['type'])
+    #check format
+    try:
+        corpus_format = check_format(type, str(request.FILES['file']))
+    except TypeError as e:
+        #Incorrect Extension Type
+        corpus_status = False
+        corpus_status_msg = str(e)
+    try:
+        path = upload(request.FILES['file'])
+    except OSError as e:
+        corpus_status = False
+        corpus_status_msg = str(e)
+    if corpus_status:
+        corpus.add_resource(
+                type,
+                path,
+                type= corpus_type,
+                format = corpus_format,
+            )
+    else:
+        corpus.add_resource(
+                type,
+                path,
+                type= corpus_type,
+                format = corpus_format,
+                status = corpus_status,
+                status_msg = corpus_status_msg,
+            )
+    print(session.add(corpus))
+    print(session.commit())
+    return session.query(Node).filter(Node.id == corpus_id).first()
 #@shared_task
 def parse_extract(corpus):
    # retrieve corpus from database from id

--- a/gargantext/views/pages/projects.py
+++ b/gargantext/views/pages/projects.py
@@ -7,6 +7,7 @@ from gargantext.models import *
 from gargantext.constants import *
 from gargantext.util.scheduling import scheduled
 from gargantext.util.toolchain import parse_extract_indexhyperdata
+from gargantext.util.toolchain import add_corpus
 from datetime import datetime
 from collections import defaultdict
@@ -59,7 +60,7 @@ def overview(request):
 class NewCorpusForm(forms.Form):
-    '''c24b: je dirai que je ne sais pas quand il sert ce truc'''
+    '''OK: add corpus Form (NIY)'''
    type = forms.ChoiceField(
        choices = enumerate(resource_type['name'] for resource_type in RESOURCETYPES),
        widget = forms.Select(attrs={ 'onchange' :'CustomForSelect( $("option:selected", this).text() );'})
@@ -91,86 +92,36 @@ def project(request, project_id):
    if not user.owns(project):
        raise HttpResponseForbidden()
-    # add a new corpus
+    # add a new corpus into Node Project > Node Corpus > Ressource
    if request.method == 'POST':
        corpus = project.add_child(
            name = request.POST['name'],
            typename = 'CORPUS',
        )
-        #check type and name
+        corpus = add_corpus(request)
-        print(request.POST)
-        type = int(request.POST['type'])
+        if corpus.status:
-        try:
+            # parse_extract: fileparsing -> ngram extraction -> lists
-            format = check_format(type, str(request.FILES['file']))
+            scheduled(parse_extract_indexhyperdata)(corpus.id)
-        except TypeError as e:
            return render(
-                template_name = 'pages/projects/overview.html',
+                template_name = 'pages/projects/wait.html',
                request = request,
                context = {
-                'debug': True,
+                'user'   : request.user,
-                #'date': datetime.now(),
+                'project': project,
-                # projects owned by the user
-                #'number': user_projects.count(),
-                #'projects': user_projects,
-                # projects owned by the user's contacts
-                #'common_users': (contact for contact, projects in contacts_projects),
-                #'common_projects': sum((projects for contact, projects in contacts_projects), []),
-                'error_msg': str(e),
                },
            )
-        try:
-            path = upload(request.FILES['file'])
-        except OSError:
-            return render(
-                template_name = 'pages/projects/overview.html',
-                request = request,
-                context = {
-                'debug': True,
-                'date': datetime.now(),
-                # projects owned by the user
-                'number': user_projects.count(),
-                'projects': user_projects,
-                # projects owned by the user's contacts
-                'common_users': (contact for contact, projects in contacts_projects),
-                'common_projects': sum((projects for contact, projects in contacts_projects), []),
-                'error_msg':"File uploaded is two heavy > 1G ",
-                },
-            )
-        corpus.add_resource(
-            type,
-            path
-        )
-        #except Exception as error:
-        session.add(corpus)
-        session.commit()
-        # parse_extract: fileparsing -> ngram extraction -> lists
-        scheduled(parse_extract_indexhyperdata)(corpus.id)
-        return render(
-            template_name = 'pages/projects/wait.html',
-            request = request,
-            context = {
-            'user'   : request.user,
-            'project': project,
-            },
-        )
    # list all the corpora within this project
    corpora = project.children('CORPUS', order=True).all()
-    print(corpora)
+    #print(corpora)
    sourcename2corpora = defaultdict(list)
    for corpus in corpora:
        # we only consider the first resource of the corpus to determine its type
        resources = corpus.resources()
        if len(resources):
            resource = resources[0]
+            ##here map from RESSOURCES_TYPES_ID and NOT NAME
            resource_type_name = RESOURCETYPES[resource['type']]['name']
            resource_type_accepted_formats = RESOURCETYPES[resource['type']]['accepted_formats']
        else:

--- a/install/run.sh
+++ b/install/run.sh
@@ -8,6 +8,4 @@ su gargantua
 #activate the virtualenv
 source /srv/env_3-5/bin/activate
 #go to gargantext srv
-cd /srv/gargantext/
+cd /srv/gargantext/manage.py runserver 0.0.0.0:8000
-#run the server
-/manage.py runserver 0.0.0.0:8000