Prod/Dev version of Gargantext. TODO: asynchronous parser has a bug.

aedde7a7 · delanoe · 1bc49d68 · aedde7a7 · aedde7a7 · aedde7a7
Commit aedde7a7 authored Apr 19, 2016 by delanoe
9 changed files
--- a/gargantext.ini
+++ b/gargantext.ini
@@ -16,7 +16,7 @@ chdir           = /srv/gargantext
 #module          = wsgi
 wsgi-file       = /srv/gargantext/gargantext/wsgi.py
 # the virtualenv
-home            = /srv/gargantext_env_3.5
+home            = /srv/env_3-5

 lazy-apps = True

@@ -58,14 +58,3 @@ max-requests = 5000
 uid = 1000
 gid = 1000

-
-
-################### other gargantext constants ###################
-[scrappers]
-# default number of docs POSTed to scrappers.views.py
-#  (at page  project > add a corpus > scan/process sample)
-QUERY_SIZE_N_DEFAULT = 1000
-
-# checked just before scrap to prevent running impossible workflows
-# even if somebody would set "query size N" manually in POST data
-QUERY_SIZE_N_MAX = 20000
--- a/gargantext/util/toolchain/__init__.py
+++ b/gargantext/util/toolchain/__init__.py
@@ -17,7 +17,7 @@ from gargantext.models    import Node
 from datetime             import datetime
 from celery               import shared_task

-@shared_task
+#@shared_task
 def parse_extract(corpus):
    # retrieve corpus from database from id
    if isinstance(corpus, int):

--- a/install/debian/asGargantua.sh
+++ b/install/debian/asGargantua.sh
 #!/bin/bash

-#MAINTAINER ISCPIF <alexandre.delanoe@iscpif.fr>
-
-git clone ssh://gitolite@delanoe.org:1979/gargantext /srv/gargantext \
-	&& cd /srv/gargantext \
-	&& git fetch origin refactoring-alex \
-	&& git checkout refactoring-alex
-
-cd /srv/gargantext/install \
-   && /usr/bin/virtualenv --py=/usr/bin/python3.5 /srv/env_3-5 \
-   && /bin/bash -c 'source /srv/env_3-5/bin/activate' \
-   && /bin/bash -c '/srv/env_3-5/bin/pip install git+https://github.com/zzzeek/sqlalchemy.git@rel_1_1' \
-   && /bin/bash -c '/srv/env_3-5/bin/pip install -r /srv/gargantext/install/python/requirements.txt' \
-
+##MAINTAINER ISCPIF <alexandre.delanoe@iscpif.fr>
+#
+#git clone ssh://gitolite@delanoe.org:1979/gargantext /srv/gargantext \
+#	&& cd /srv/gargantext \
+#	&& git fetch origin refactoring-alex \
+#	&& git checkout refactoring-alex
+#
+#cd /srv/gargantext/install \
+#   && /usr/bin/virtualenv --py=/usr/bin/python3.5 /srv/env_3-5 \
+#   && /bin/bash -c 'source /srv/env_3-5/bin/activate' \
+#   && /bin/bash -c '/srv/env_3-5/bin/pip install git+https://github.com/zzzeek/sqlalchemy.git@rel_1_1' \
+#   && /bin/bash -c '/srv/env_3-5/bin/pip install -r /srv/gargantext/install/python/requirements.txt' \
+#

 ## INSTALL MAIN DEPENDENCIES

@@ -22,9 +22,9 @@ cd /tmp && wget http://dl.gargantext.org/gargantext_lib.tar.bz2 \

 ## End of configuration
 ## be sure that postgres is running
-cd /srv/gargantext && /bin/bash -c 'source /srv/bin/env_3-5/bin/activate' \
-    && /srv/gargantext/manage.py shell < /srv/gargantext/init.py
-
+#cd /srv/gargantext && /bin/bash -c 'source /srv/bin/env_3-5/bin/activate' \
+#    && /srv/gargantext/manage.py shell < /srv/gargantext/init.py
+#

 echo "Gargantua: END of the installation of Gargantext"

--- a/install/debian/asRoot.sh
+++ b/install/debian/asRoot.sh
@@ -17,8 +17,8 @@ echo "Europe/Paris" > /etc/timezone && \
   update-locale LANG=fr_FR.UTF-8


-## PROD VERSION OF GARGANTEXt
-apt-get install -y uwsgi nginx 
+## PROD VERSION OF GARGANTEXT
+# apt-get install -y uwsgi nginx  uwsgi-plugin-python rabbitmq-server


 ### CREATE USER and adding it to sudo

--- a/install/python/requirements.txt
+++ b/install/python/requirements.txt
@@ -13,7 +13,6 @@ djangorestframework==3.3.2
 html5lib==0.9999999
 jdatetime==1.7.2
 kombu==3.0.33                  # messaging
-lxml==3.5.0
 nltk==3.1
 numpy==1.10.4
 psycopg2==2.6.1
@@ -26,6 +25,7 @@ SQLAlchemy==1.1.0b1.dev0
 ujson==1.35
 umalqurra==0.2                 # arabic calendars (?? why use ??)
 wheel==0.29.0
-pandas==0.18.0
 networkx==1.11
+pandas==0.18.0
 six==1.10.0
+lxml==3.5.0
--- a/scrapers/istex.py
+++ b/scrapers/istex.py
@@ -12,7 +12,6 @@ from gargantext.constants       import RESOURCETYPES, QUERY_SIZE_N_MAX
 from gargantext.models.nodes    import Node
 from gargantext.util.db         import session
 from gargantext.util.http       import JsonHttpResponse
-from gargantext.util.tools      import ensure_dir
 from gargantext.util.scheduling import scheduled
 from gargantext.util.toolchain  import parse_extract_indexhyperdata

@@ -123,7 +122,6 @@ def save(request , project_id):
        corpus_id = corpus.id

        print("NEW CORPUS", corpus_id)
-        ensure_dir(request.user)
        tasks = Scraper()

        for i in range(8):
@@ -144,12 +142,14 @@ def save(request , project_id):
                                   )
                dwnldsOK+=1

+        session.commit()
+        
        if dwnldsOK == 0 :
            return JsonHttpResponse(["fail"])
        ###########################
        ###########################
        try:
-            scheduled(parse_extract_indexhyperdata(corpus_id,))
+            scheduled(parse_extract_indexhyperdata)(corpus_id)
        except Exception as error:
            print('WORKFLOW ERROR')
            print(error)

--- a/scrapers/pubmed.py
+++ b/scrapers/pubmed.py
@@ -21,8 +21,8 @@ from django.http import Http404, HttpResponseRedirect, HttpResponseForbidden
 from gargantext.constants       import RESOURCETYPES, QUERY_SIZE_N_MAX
 from gargantext.models.nodes    import Node
 from gargantext.util.db         import session
+from gargantext.util.db_cache   import cache
 from gargantext.util.http       import JsonHttpResponse
-from gargantext.util.tools      import ensure_dir
 from gargantext.util.scheduling import scheduled
 from gargantext.util.toolchain  import parse_extract_indexhyperdata

@@ -74,20 +74,16 @@ def save( request , project_id ) :
    except ValueError:
        raise Http404()
    # do we have a valid project?
-    project = (session.query( Node )
-                      .filter(Node.id == project_id)
-                      .filter(Node.typename == 'PROJECT')
-              ).first()
+    
+    project = session.query( Node ).filter(Node.id == project_id).first()

    if project is None:
        raise Http404()
+    

-    # do we have a valid user?
-    user = request.user
-    if not user.is_authenticated():
-        return redirect('/auth/?next=%s' % request.path)
-    if project.user_id != user.id:
-        return HttpResponseForbidden()
+    user = cache.User[request.user.id]
+    if not user.owns(project):
+        raise HttpResponseForbidden()


    if request.method == "POST":
@@ -111,15 +107,10 @@ def save( request , project_id ) :


        # corpus node instanciation as a Django model
-        corpus = Node(
-            name = name,
-            user_id = request.user.id,
-            parent_id = project_id,
-            typename = 'CORPUS',
-                        hyperdata    = { "action"        : "Scraping data"
-                                        , "language_id" : None
-                                        }
-        )
+        corpus = project.add_child( name=name
+                                  , typename = "CORPUS"
+                                  )
+
        session.add(corpus)
        session.commit()
        corpus_id = corpus.id
@@ -130,8 +121,6 @@ def save( request , project_id ) :
        #     eFetchResult.read()  # this will output the XML... normally you write this to a XML-file.
        # """

-
-        ensure_dir(request.user)
        tasks = Scraper()

        for i in range(8):
@@ -143,20 +132,21 @@ def save( request , project_id ) :
        tasks.q.join() # wait until everything is finished

        dwnldsOK = 0
+        
        for filename in tasks.firstResults :
            print(filename)
            if filename != False:
                # add the uploaded resource to the corpus
-                corpus.add_resource( type = 3
+                corpus.add_resource( 
+                                    type = 3
                                   , path = filename
                                   )
                dwnldsOK+=1
-
+        #session.commit()
        if dwnldsOK == 0 :
            return JsonHttpResponse(["fail"])
-
        try:
-            scheduled(parse_extract_indexhyperdata(corpus_id,))
+            scheduled(parse_extract_indexhyperdata)(corpus_id)
        except Exception as error:
            print('WORKFLOW ERROR')
            print(error)

--- a/start_celery
+++ b/start_celery
 #!/bin/bash

 FILE="/var/log/gargantext/celery/$(date +%Y%m%d-%H:%M:%S).log"
-source /srv/gargantext_env_3.5/bin/activate
+source /srv/env_3-5/bin/activate
 ./manage.py celery worker -f $FILE
--- a/templates/pages/projects/overview.html
+++ b/templates/pages/projects/overview.html
@@ -74,7 +74,7 @@
                ">Manage</button>
                </li>
                {% if common_users %}
-                <a style="cursor:pointer;"><img class="share_button" data-id="{{ project.id }}" title="Share it!" width="20px" src="{% static "img/share.png" %}"></img></a>
+                <!-- <a style="cursor:pointer;"><img class="share_button" data-id="{{ project.id }}" title="Share it!" width="20px" src="{% static "img/share.png" %}"></img></a> --!>
                {% endif %}

            </h3>