Commit aedde7a7 authored by delanoe's avatar delanoe

Prod/Dev version of Gargantext. TODO: asynchronous parser has a bug.

parent 1bc49d68
......@@ -16,7 +16,7 @@ chdir = /srv/gargantext
#module = wsgi
wsgi-file = /srv/gargantext/gargantext/wsgi.py
# the virtualenv
home = /srv/gargantext_env_3.5
home = /srv/env_3-5
lazy-apps = True
......@@ -58,14 +58,3 @@ max-requests = 5000
uid = 1000
gid = 1000
################### other gargantext constants ###################
[scrappers]
# default number of docs POSTed to scrappers.views.py
# (at page project > add a corpus > scan/process sample)
QUERY_SIZE_N_DEFAULT = 1000
# checked just before scrap to prevent running impossible workflows
# even if somebody would set "query size N" manually in POST data
QUERY_SIZE_N_MAX = 20000
......@@ -17,7 +17,7 @@ from gargantext.models import Node
from datetime import datetime
from celery import shared_task
@shared_task
#@shared_task
def parse_extract(corpus):
# retrieve corpus from database from id
if isinstance(corpus, int):
......
#!/bin/bash
#MAINTAINER ISCPIF <alexandre.delanoe@iscpif.fr>
git clone ssh://gitolite@delanoe.org:1979/gargantext /srv/gargantext \
&& cd /srv/gargantext \
&& git fetch origin refactoring-alex \
&& git checkout refactoring-alex
cd /srv/gargantext/install \
&& /usr/bin/virtualenv --py=/usr/bin/python3.5 /srv/env_3-5 \
&& /bin/bash -c 'source /srv/env_3-5/bin/activate' \
&& /bin/bash -c '/srv/env_3-5/bin/pip install git+https://github.com/zzzeek/sqlalchemy.git@rel_1_1' \
&& /bin/bash -c '/srv/env_3-5/bin/pip install -r /srv/gargantext/install/python/requirements.txt' \
##MAINTAINER ISCPIF <alexandre.delanoe@iscpif.fr>
#
#git clone ssh://gitolite@delanoe.org:1979/gargantext /srv/gargantext \
# && cd /srv/gargantext \
# && git fetch origin refactoring-alex \
# && git checkout refactoring-alex
#
#cd /srv/gargantext/install \
# && /usr/bin/virtualenv --py=/usr/bin/python3.5 /srv/env_3-5 \
# && /bin/bash -c 'source /srv/env_3-5/bin/activate' \
# && /bin/bash -c '/srv/env_3-5/bin/pip install git+https://github.com/zzzeek/sqlalchemy.git@rel_1_1' \
# && /bin/bash -c '/srv/env_3-5/bin/pip install -r /srv/gargantext/install/python/requirements.txt' \
#
## INSTALL MAIN DEPENDENCIES
......@@ -22,9 +22,9 @@ cd /tmp && wget http://dl.gargantext.org/gargantext_lib.tar.bz2 \
## End of configuration
## be sure that postgres is running
cd /srv/gargantext && /bin/bash -c 'source /srv/bin/env_3-5/bin/activate' \
&& /srv/gargantext/manage.py shell < /srv/gargantext/init.py
#cd /srv/gargantext && /bin/bash -c 'source /srv/bin/env_3-5/bin/activate' \
# && /srv/gargantext/manage.py shell < /srv/gargantext/init.py
#
echo "Gargantua: END of the installation of Gargantext"
......@@ -17,8 +17,8 @@ echo "Europe/Paris" > /etc/timezone && \
update-locale LANG=fr_FR.UTF-8
## PROD VERSION OF GARGANTEXt
apt-get install -y uwsgi nginx
## PROD VERSION OF GARGANTEXT
# apt-get install -y uwsgi nginx uwsgi-plugin-python rabbitmq-server
### CREATE USER and adding it to sudo
......
......@@ -13,7 +13,6 @@ djangorestframework==3.3.2
html5lib==0.9999999
jdatetime==1.7.2
kombu==3.0.33 # messaging
lxml==3.5.0
nltk==3.1
numpy==1.10.4
psycopg2==2.6.1
......@@ -26,6 +25,7 @@ SQLAlchemy==1.1.0b1.dev0
ujson==1.35
umalqurra==0.2 # arabic calendars (?? why use ??)
wheel==0.29.0
pandas==0.18.0
networkx==1.11
pandas==0.18.0
six==1.10.0
lxml==3.5.0
......@@ -12,7 +12,6 @@ from gargantext.constants import RESOURCETYPES, QUERY_SIZE_N_MAX
from gargantext.models.nodes import Node
from gargantext.util.db import session
from gargantext.util.http import JsonHttpResponse
from gargantext.util.tools import ensure_dir
from gargantext.util.scheduling import scheduled
from gargantext.util.toolchain import parse_extract_indexhyperdata
......@@ -123,7 +122,6 @@ def save(request , project_id):
corpus_id = corpus.id
print("NEW CORPUS", corpus_id)
ensure_dir(request.user)
tasks = Scraper()
for i in range(8):
......@@ -144,12 +142,14 @@ def save(request , project_id):
)
dwnldsOK+=1
session.commit()
if dwnldsOK == 0 :
return JsonHttpResponse(["fail"])
###########################
###########################
try:
scheduled(parse_extract_indexhyperdata(corpus_id,))
scheduled(parse_extract_indexhyperdata)(corpus_id)
except Exception as error:
print('WORKFLOW ERROR')
print(error)
......
......@@ -21,8 +21,8 @@ from django.http import Http404, HttpResponseRedirect, HttpResponseForbidden
from gargantext.constants import RESOURCETYPES, QUERY_SIZE_N_MAX
from gargantext.models.nodes import Node
from gargantext.util.db import session
from gargantext.util.db_cache import cache
from gargantext.util.http import JsonHttpResponse
from gargantext.util.tools import ensure_dir
from gargantext.util.scheduling import scheduled
from gargantext.util.toolchain import parse_extract_indexhyperdata
......@@ -74,20 +74,16 @@ def save( request , project_id ) :
except ValueError:
raise Http404()
# do we have a valid project?
project = (session.query( Node )
.filter(Node.id == project_id)
.filter(Node.typename == 'PROJECT')
).first()
project = session.query( Node ).filter(Node.id == project_id).first()
if project is None:
raise Http404()
# do we have a valid user?
user = request.user
if not user.is_authenticated():
return redirect('/auth/?next=%s' % request.path)
if project.user_id != user.id:
return HttpResponseForbidden()
user = cache.User[request.user.id]
if not user.owns(project):
raise HttpResponseForbidden()
if request.method == "POST":
......@@ -111,15 +107,10 @@ def save( request , project_id ) :
# corpus node instanciation as a Django model
corpus = Node(
name = name,
user_id = request.user.id,
parent_id = project_id,
typename = 'CORPUS',
hyperdata = { "action" : "Scraping data"
, "language_id" : None
}
)
corpus = project.add_child( name=name
, typename = "CORPUS"
)
session.add(corpus)
session.commit()
corpus_id = corpus.id
......@@ -130,8 +121,6 @@ def save( request , project_id ) :
# eFetchResult.read() # this will output the XML... normally you write this to a XML-file.
# """
ensure_dir(request.user)
tasks = Scraper()
for i in range(8):
......@@ -143,20 +132,21 @@ def save( request , project_id ) :
tasks.q.join() # wait until everything is finished
dwnldsOK = 0
for filename in tasks.firstResults :
print(filename)
if filename != False:
# add the uploaded resource to the corpus
corpus.add_resource( type = 3
corpus.add_resource(
type = 3
, path = filename
)
dwnldsOK+=1
#session.commit()
if dwnldsOK == 0 :
return JsonHttpResponse(["fail"])
try:
scheduled(parse_extract_indexhyperdata(corpus_id,))
scheduled(parse_extract_indexhyperdata)(corpus_id)
except Exception as error:
print('WORKFLOW ERROR')
print(error)
......
#!/bin/bash
FILE="/var/log/gargantext/celery/$(date +%Y%m%d-%H:%M:%S).log"
source /srv/gargantext_env_3.5/bin/activate
source /srv/env_3-5/bin/activate
./manage.py celery worker -f $FILE
......@@ -74,7 +74,7 @@
">Manage</button>
</li>
{% if common_users %}
<a style="cursor:pointer;"><img class="share_button" data-id="{{ project.id }}" title="Share it!" width="20px" src="{% static "img/share.png" %}"></img></a>
<!-- <a style="cursor:pointer;"><img class="share_button" data-id="{{ project.id }}" title="Share it!" width="20px" src="{% static "img/share.png" %}"></img></a> --!>
{% endif %}
</h3>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment