Commit f64176b7 authored by delanoe's avatar delanoe

En push: le kit d'installation en version prod de Gargantext.

Good news: grâce à la factorisation, le problème des tâches celery
lancées en parallèle n'est plus.

Bravo à toute l'équipe et en particulier à Mathieu Rodic et Romain
Loth pour leur travail coordonné.

:)

Nous entamons la dernière ligne droite pour la factorisation.

Modifications qui seront validées :
	nouveau fichier : gargantext.ini
	modifié :         gargantext/settings.py
	modifié :         gargantext/util/scheduling.py
	modifié :         gargantext/util/toolchain/__init__.py
	modifié :         gargantext/util/toolchain/ngrams_extraction.py
	modifié :         gargantext/views/pages/projects.py
	renommé :         README.md -> install/README.md
	renommé :         requirements.txt -> install/requirements.txt
	nouveau fichier : start_celery
	nouveau fichier : start_uwsgi
parent f5027062
# django.ini file
[uwsgi]
# uwsgi --vacuum --socket monsite/mysite.sock --wsgi-file monsite/wsgi.py --chmod-socket=666 --home=/srv/alexandre.delanoe/env --chdir=/var/www/www/alexandre/monsite --env
env = DJANGO_SETTINGS_MODULE=gargantext.settings
#module = django.core.handlers.wsgi:WSGIHandler()
plugins = python35
# the base directory
chdir = /srv/gargantext
# Django's wsgi file
#module = wsgi
wsgi-file = /srv/gargantext/gargantext/wsgi.py
# the virtualenv
home = /srv/gargantext_env_3.5
lazy-apps = True
# master
master = true
# maximum number of processes
processes = 10
# the socket (use the full path to be safe)
socket = /tmp/gargantext.sock
threads = 4
# with appropriate permissions - *may* be needed
chmod-socket = 666
# clear environment on exit
vacuum = true
pidfile = /tmp/gargantext.pid
# touch /tmp/gargantext.reload to reload configuration (after git pull for instance)
touch-reload = /tmp/gargantext.reload
# respawn processes taking more than 20 seconds
harakiri = 120
# limit the project to 128 MB
#limit-as = 128
# respawn processes after serving 5000 requests
max-requests = 5000
# background the process & log
#daemonize = /var/log/uwsgi/gargantext.log
uid = 1000
gid = 1000
################### other gargantext constants ###################
[scrappers]
# default number of docs POSTed to scrappers.views.py
# (at page project > add a corpus > scan/process sample)
QUERY_SIZE_N_DEFAULT = 1000
# checked just before scrap to prevent running impossible workflows
# even if somebody would set "query size N" manually in POST data
QUERY_SIZE_N_MAX = 20000
......@@ -37,7 +37,8 @@ def scheduled_celery(func):
"""Provides a decorator to schedule a task with Celery.
"""
def go(*args, **kwargs):
shared_task(func).apply_async(args=args, kwargs=kwargs)
func.apply_async(args=args, kwargs=kwargs)
#shared_task(func).apply_async(args=args, kwargs=kwargs)
return go
......
from .parsing import parse
from .ngrams_extraction import extract_ngrams
from .hyperdata_indexing import index_hyperdata
from .parsing import parse
from .ngrams_extraction import extract_ngrams
from .hyperdata_indexing import index_hyperdata
# in usual run order
from .list_stop import do_stoplist
......@@ -11,11 +11,13 @@ from .metric_specificity import compute_specificity
from .list_map import do_maplist # TEST
from .ngram_groups import compute_groups
from gargantext.util.db import session
from gargantext.models import Node
from gargantext.util.db import session
from gargantext.models import Node
from datetime import datetime
from datetime import datetime
from celery import shared_task
@shared_task
def parse_extract(corpus):
# retrieve corpus from database from id
if isinstance(corpus, int):
......@@ -37,6 +39,7 @@ def parse_extract(corpus):
extract_ngrams(corpus)
print('CORPUS #%d: extracted ngrams' % (corpus.id))
@shared_task
def parse_extract_indexhyperdata(corpus):
# retrieve corpus from database from id
if isinstance(corpus, int):
......
......@@ -5,6 +5,7 @@ from gargantext.util.ngramsextractors import ngramsextractors
from collections import defaultdict
from gargantext.util.scheduling import scheduled
def _integrate_associations(nodes_ngrams_count, ngrams_data, db, cursor):
print('INTEGRATE')
......
......@@ -97,7 +97,6 @@ def project(request, project_id):
# parse_extract: fileparsing -> ngram extraction -> lists
scheduled(parse_extract_indexhyperdata)(corpus.id)
#scheduled(parse_extract)(corpus.id)
# corpora within this project
corpora = project.children('CORPUS').all()
......
#!/bin/bash
FILE="/var/log/gargantext/celery/$(date +%Y%m%d-%H:%M:%S).log"
source /srv/gargantext_env_3.5/bin/activate
./manage.py celery worker -f $FILE
#!/bin/bash
FILE="/var/log/gargantext/uwsgi/$(date +%Y%m%d-%H:%M:%S).log"
#touch /var/log/gargantext/uwsgi/$FILE && sudo
uwsgi gargantext.ini --logto $FILE
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment