Commit aedde7a7 authored by delanoe's avatar delanoe

Prod/Dev version of Gargantext. TODO: asynchronous parser has a bug.

parent 1bc49d68
...@@ -16,7 +16,7 @@ chdir = /srv/gargantext ...@@ -16,7 +16,7 @@ chdir = /srv/gargantext
#module = wsgi #module = wsgi
wsgi-file = /srv/gargantext/gargantext/wsgi.py wsgi-file = /srv/gargantext/gargantext/wsgi.py
# the virtualenv # the virtualenv
home = /srv/gargantext_env_3.5 home = /srv/env_3-5
lazy-apps = True lazy-apps = True
...@@ -58,14 +58,3 @@ max-requests = 5000 ...@@ -58,14 +58,3 @@ max-requests = 5000
uid = 1000 uid = 1000
gid = 1000 gid = 1000
################### other gargantext constants ###################
[scrappers]
# default number of docs POSTed to scrappers.views.py
# (at page project > add a corpus > scan/process sample)
QUERY_SIZE_N_DEFAULT = 1000
# checked just before scrap to prevent running impossible workflows
# even if somebody would set "query size N" manually in POST data
QUERY_SIZE_N_MAX = 20000
...@@ -17,7 +17,7 @@ from gargantext.models import Node ...@@ -17,7 +17,7 @@ from gargantext.models import Node
from datetime import datetime from datetime import datetime
from celery import shared_task from celery import shared_task
@shared_task #@shared_task
def parse_extract(corpus): def parse_extract(corpus):
# retrieve corpus from database from id # retrieve corpus from database from id
if isinstance(corpus, int): if isinstance(corpus, int):
......
#!/bin/bash #!/bin/bash
#MAINTAINER ISCPIF <alexandre.delanoe@iscpif.fr> ##MAINTAINER ISCPIF <alexandre.delanoe@iscpif.fr>
#
git clone ssh://gitolite@delanoe.org:1979/gargantext /srv/gargantext \ #git clone ssh://gitolite@delanoe.org:1979/gargantext /srv/gargantext \
&& cd /srv/gargantext \ # && cd /srv/gargantext \
&& git fetch origin refactoring-alex \ # && git fetch origin refactoring-alex \
&& git checkout refactoring-alex # && git checkout refactoring-alex
#
cd /srv/gargantext/install \ #cd /srv/gargantext/install \
&& /usr/bin/virtualenv --py=/usr/bin/python3.5 /srv/env_3-5 \ # && /usr/bin/virtualenv --py=/usr/bin/python3.5 /srv/env_3-5 \
&& /bin/bash -c 'source /srv/env_3-5/bin/activate' \ # && /bin/bash -c 'source /srv/env_3-5/bin/activate' \
&& /bin/bash -c '/srv/env_3-5/bin/pip install git+https://github.com/zzzeek/sqlalchemy.git@rel_1_1' \ # && /bin/bash -c '/srv/env_3-5/bin/pip install git+https://github.com/zzzeek/sqlalchemy.git@rel_1_1' \
&& /bin/bash -c '/srv/env_3-5/bin/pip install -r /srv/gargantext/install/python/requirements.txt' \ # && /bin/bash -c '/srv/env_3-5/bin/pip install -r /srv/gargantext/install/python/requirements.txt' \
#
## INSTALL MAIN DEPENDENCIES ## INSTALL MAIN DEPENDENCIES
...@@ -22,9 +22,9 @@ cd /tmp && wget http://dl.gargantext.org/gargantext_lib.tar.bz2 \ ...@@ -22,9 +22,9 @@ cd /tmp && wget http://dl.gargantext.org/gargantext_lib.tar.bz2 \
## End of configuration ## End of configuration
## be sure that postgres is running ## be sure that postgres is running
cd /srv/gargantext && /bin/bash -c 'source /srv/bin/env_3-5/bin/activate' \ #cd /srv/gargantext && /bin/bash -c 'source /srv/bin/env_3-5/bin/activate' \
&& /srv/gargantext/manage.py shell < /srv/gargantext/init.py # && /srv/gargantext/manage.py shell < /srv/gargantext/init.py
#
echo "Gargantua: END of the installation of Gargantext" echo "Gargantua: END of the installation of Gargantext"
...@@ -17,8 +17,8 @@ echo "Europe/Paris" > /etc/timezone && \ ...@@ -17,8 +17,8 @@ echo "Europe/Paris" > /etc/timezone && \
update-locale LANG=fr_FR.UTF-8 update-locale LANG=fr_FR.UTF-8
## PROD VERSION OF GARGANTEXt ## PROD VERSION OF GARGANTEXT
apt-get install -y uwsgi nginx # apt-get install -y uwsgi nginx uwsgi-plugin-python rabbitmq-server
### CREATE USER and adding it to sudo ### CREATE USER and adding it to sudo
......
...@@ -13,7 +13,6 @@ djangorestframework==3.3.2 ...@@ -13,7 +13,6 @@ djangorestframework==3.3.2
html5lib==0.9999999 html5lib==0.9999999
jdatetime==1.7.2 jdatetime==1.7.2
kombu==3.0.33 # messaging kombu==3.0.33 # messaging
lxml==3.5.0
nltk==3.1 nltk==3.1
numpy==1.10.4 numpy==1.10.4
psycopg2==2.6.1 psycopg2==2.6.1
...@@ -26,6 +25,7 @@ SQLAlchemy==1.1.0b1.dev0 ...@@ -26,6 +25,7 @@ SQLAlchemy==1.1.0b1.dev0
ujson==1.35 ujson==1.35
umalqurra==0.2 # arabic calendars (?? why use ??) umalqurra==0.2 # arabic calendars (?? why use ??)
wheel==0.29.0 wheel==0.29.0
pandas==0.18.0
networkx==1.11 networkx==1.11
pandas==0.18.0
six==1.10.0 six==1.10.0
lxml==3.5.0
...@@ -12,7 +12,6 @@ from gargantext.constants import RESOURCETYPES, QUERY_SIZE_N_MAX ...@@ -12,7 +12,6 @@ from gargantext.constants import RESOURCETYPES, QUERY_SIZE_N_MAX
from gargantext.models.nodes import Node from gargantext.models.nodes import Node
from gargantext.util.db import session from gargantext.util.db import session
from gargantext.util.http import JsonHttpResponse from gargantext.util.http import JsonHttpResponse
from gargantext.util.tools import ensure_dir
from gargantext.util.scheduling import scheduled from gargantext.util.scheduling import scheduled
from gargantext.util.toolchain import parse_extract_indexhyperdata from gargantext.util.toolchain import parse_extract_indexhyperdata
...@@ -123,7 +122,6 @@ def save(request , project_id): ...@@ -123,7 +122,6 @@ def save(request , project_id):
corpus_id = corpus.id corpus_id = corpus.id
print("NEW CORPUS", corpus_id) print("NEW CORPUS", corpus_id)
ensure_dir(request.user)
tasks = Scraper() tasks = Scraper()
for i in range(8): for i in range(8):
...@@ -144,12 +142,14 @@ def save(request , project_id): ...@@ -144,12 +142,14 @@ def save(request , project_id):
) )
dwnldsOK+=1 dwnldsOK+=1
session.commit()
if dwnldsOK == 0 : if dwnldsOK == 0 :
return JsonHttpResponse(["fail"]) return JsonHttpResponse(["fail"])
########################### ###########################
########################### ###########################
try: try:
scheduled(parse_extract_indexhyperdata(corpus_id,)) scheduled(parse_extract_indexhyperdata)(corpus_id)
except Exception as error: except Exception as error:
print('WORKFLOW ERROR') print('WORKFLOW ERROR')
print(error) print(error)
......
...@@ -21,8 +21,8 @@ from django.http import Http404, HttpResponseRedirect, HttpResponseForbidden ...@@ -21,8 +21,8 @@ from django.http import Http404, HttpResponseRedirect, HttpResponseForbidden
from gargantext.constants import RESOURCETYPES, QUERY_SIZE_N_MAX from gargantext.constants import RESOURCETYPES, QUERY_SIZE_N_MAX
from gargantext.models.nodes import Node from gargantext.models.nodes import Node
from gargantext.util.db import session from gargantext.util.db import session
from gargantext.util.db_cache import cache
from gargantext.util.http import JsonHttpResponse from gargantext.util.http import JsonHttpResponse
from gargantext.util.tools import ensure_dir
from gargantext.util.scheduling import scheduled from gargantext.util.scheduling import scheduled
from gargantext.util.toolchain import parse_extract_indexhyperdata from gargantext.util.toolchain import parse_extract_indexhyperdata
...@@ -74,20 +74,16 @@ def save( request , project_id ) : ...@@ -74,20 +74,16 @@ def save( request , project_id ) :
except ValueError: except ValueError:
raise Http404() raise Http404()
# do we have a valid project? # do we have a valid project?
project = (session.query( Node )
.filter(Node.id == project_id) project = session.query( Node ).filter(Node.id == project_id).first()
.filter(Node.typename == 'PROJECT')
).first()
if project is None: if project is None:
raise Http404() raise Http404()
# do we have a valid user? user = cache.User[request.user.id]
user = request.user if not user.owns(project):
if not user.is_authenticated(): raise HttpResponseForbidden()
return redirect('/auth/?next=%s' % request.path)
if project.user_id != user.id:
return HttpResponseForbidden()
if request.method == "POST": if request.method == "POST":
...@@ -111,15 +107,10 @@ def save( request , project_id ) : ...@@ -111,15 +107,10 @@ def save( request , project_id ) :
# corpus node instanciation as a Django model # corpus node instanciation as a Django model
corpus = Node( corpus = project.add_child( name=name
name = name, , typename = "CORPUS"
user_id = request.user.id, )
parent_id = project_id,
typename = 'CORPUS',
hyperdata = { "action" : "Scraping data"
, "language_id" : None
}
)
session.add(corpus) session.add(corpus)
session.commit() session.commit()
corpus_id = corpus.id corpus_id = corpus.id
...@@ -130,8 +121,6 @@ def save( request , project_id ) : ...@@ -130,8 +121,6 @@ def save( request , project_id ) :
# eFetchResult.read() # this will output the XML... normally you write this to a XML-file. # eFetchResult.read() # this will output the XML... normally you write this to a XML-file.
# """ # """
ensure_dir(request.user)
tasks = Scraper() tasks = Scraper()
for i in range(8): for i in range(8):
...@@ -143,20 +132,21 @@ def save( request , project_id ) : ...@@ -143,20 +132,21 @@ def save( request , project_id ) :
tasks.q.join() # wait until everything is finished tasks.q.join() # wait until everything is finished
dwnldsOK = 0 dwnldsOK = 0
for filename in tasks.firstResults : for filename in tasks.firstResults :
print(filename) print(filename)
if filename != False: if filename != False:
# add the uploaded resource to the corpus # add the uploaded resource to the corpus
corpus.add_resource( type = 3 corpus.add_resource(
type = 3
, path = filename , path = filename
) )
dwnldsOK+=1 dwnldsOK+=1
#session.commit()
if dwnldsOK == 0 : if dwnldsOK == 0 :
return JsonHttpResponse(["fail"]) return JsonHttpResponse(["fail"])
try: try:
scheduled(parse_extract_indexhyperdata(corpus_id,)) scheduled(parse_extract_indexhyperdata)(corpus_id)
except Exception as error: except Exception as error:
print('WORKFLOW ERROR') print('WORKFLOW ERROR')
print(error) print(error)
......
#!/bin/bash #!/bin/bash
FILE="/var/log/gargantext/celery/$(date +%Y%m%d-%H:%M:%S).log" FILE="/var/log/gargantext/celery/$(date +%Y%m%d-%H:%M:%S).log"
source /srv/gargantext_env_3.5/bin/activate source /srv/env_3-5/bin/activate
./manage.py celery worker -f $FILE ./manage.py celery worker -f $FILE
...@@ -74,7 +74,7 @@ ...@@ -74,7 +74,7 @@
">Manage</button> ">Manage</button>
</li> </li>
{% if common_users %} {% if common_users %}
<a style="cursor:pointer;"><img class="share_button" data-id="{{ project.id }}" title="Share it!" width="20px" src="{% static "img/share.png" %}"></img></a> <!-- <a style="cursor:pointer;"><img class="share_button" data-id="{{ project.id }}" title="Share it!" width="20px" src="{% static "img/share.png" %}"></img></a> --!>
{% endif %} {% endif %}
</h3> </h3>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment