Commit 1a7b5bf3 authored by sim's avatar sim

Install scrapyd server to run scrapers

parent 4fc0ec1a
......@@ -5,3 +5,4 @@ __pycache__
gargantext.ini
postgrest.conf
*.log
scrapyd
CELERY_INIT=./tools/init.d/gargantext-celery
POSTGREST_INIT=./tools/init.d/gargantext-postgrest
SCRAPYD_INIT=./tools/init.d/gargantext-scrapyd
SCRAPYD_DEPLOY=./tools/scrapyd-deploy.sh
ifeq ("$(ENVIR)", "prod")
PIPENV_ARGS=
......@@ -49,6 +52,16 @@ conf:
./tools/mkconf.sh $(ENVIR)
@echo
.PHONY: scrapyd
scrapyd:
@echo "• Setup scrapyd..."
@mkdir -p scrapyd/logs
@echo "[*] Deploy spiders to scrapyd..."
@pipenv run $(SCRAPYD_DEPLOY) $(SCRAPYD_INIT)
@echo "[*] Clean build files..."
@rm -fr build gargantext_light.egg-info
@echo
.PHONY: checkdebian
checkdebian:
@./tools/checkdebian.sh
......@@ -66,6 +79,7 @@ start: checkstartup
@$(BACKEND_INIT) start
@$(CELERY_INIT) start
@$(POSTGREST_INIT) start
@$(SCRAPYD_INIT) start
@echo
.PHONY: stop
......@@ -74,6 +88,7 @@ stop: checkstartup
@$(BACKEND_INIT) stop
@$(CELERY_INIT) stop
@$(POSTGREST_INIT) stop
@$(SCRAPYD_INIT) stop
@echo
.PHONY: restart
......@@ -82,6 +97,7 @@ restart: checkstartup
@$(BACKEND_INIT) restart
@$(CELERY_INIT) restart
@$(POSTGREST_INIT) restart
@$(SCRAPYD_INIT) restart
@echo
.PHONY: reload
......@@ -90,6 +106,7 @@ reload: checkstartup
@$(BACKEND_INIT) reload
@$(CELERY_INIT) force-reload
@$(POSTGREST_INIT) reload
@$(SCRAPYD_INIT) reload
@echo
.PHONY: check
......@@ -98,6 +115,7 @@ check: checkstartup
@$(BACKEND_INIT) status || true
@$(CELERY_INIT) status || true
@$(POSTGREST_INIT) status || true
@$(SCRAPYD_INIT) status || true
@echo
.PHONY: status
......
......@@ -19,7 +19,7 @@ django = "*"
dateutils = "*"
celery = "*"
sqlalchemy = "*"
psycopg2-binary = "*"
"psycopg2-binary" = "*"
sqlalchemy-utils = "*"
djangorestframework = "*"
djangorestframework-jwt = "*"
......@@ -29,6 +29,8 @@ alembic = "*"
scrapy = "*"
jmespath = "*"
risparser = "*"
scrapyd = "*"
scrapyd-client = "*"
[requires]
......
{
"_meta": {
"hash": {
"sha256": "d94567674a7b0441d3a9ba14b73201e335c3511ee2dd75306138b635dc1eedc7"
"sha256": "2d58c4f4ea845b5f4e8eb1ae9b5ffa8b26d82dee5bd324a6a1d0f01591bded19"
},
"pipfile-spec": 6,
"requires": {
......@@ -333,6 +333,12 @@
],
"version": "==0.2.1"
},
"pycparser": {
"hashes": [
"sha256:99a8ca03e29851d96616ad0404b4aad7d9ee16f25c9f9708a11faf2810f7b226"
],
"version": "==2.18"
},
"pydispatcher": {
"hashes": [
"sha256:5570069e1b1769af1fe481de6dd1d3a388492acddd2cdad7a3bde145615d5caf",
......@@ -408,6 +414,20 @@
],
"version": "==1.5.0"
},
"scrapyd": {
"hashes": [
"sha256:4983898bd6b6c53735cfa9e92e166c1d89d5c108a36ae2959c5cae914dc61887",
"sha256:c7189100759e60ee5ae7fec1f040a6be88e20fbbd353ac07db6a78d729bada7f"
],
"version": "==1.2.0"
},
"scrapyd-client": {
"hashes": [
"sha256:caa0f5369c2e1efa7b79c309afb9819b2518870c5f4f2caf84d3e474cd6a9890",
"sha256:e547475c5c8dbd811e2cc4141a0e7b4ba47600e9980c59df4f831bb60b94e4cb"
],
"version": "==1.1.0"
},
"service-identity": {
"hashes": [
"sha256:0e76f3c042cc0f5c7e6da002cf646f59dc4023962d1d1166343ce53bdad39e17",
......@@ -500,13 +520,6 @@
],
"version": "==0.3.9"
},
"django": {
"hashes": [
"sha256:3d9916515599f757043c690ae2b5ea28666afa09779636351da505396cbb2f19",
"sha256:769f212ffd5762f72c764fa648fca3b7f7dd4ec27407198b68e7c4abf4609fd0"
],
"version": "==2.0.3"
},
"isort": {
"hashes": [
"sha256:1153601da39a25b14ddc54955dbbacbb6b2d19135386699e2ad58517953b34af",
......@@ -571,10 +584,10 @@
},
"pylint-django": {
"hashes": [
"sha256:0ccb38ac08df8f380e2a7d86b40b46ba4d68c64993c4b8c88a6ba6cd1a644ecc",
"sha256:994715c3f0ff37d86def2224bf15b46b482f3b75096f9d9cc9f4cb1e8d58b0ac"
"sha256:681f5105c98c9a96ed10895ad346d132659a56c313181a9e2642f6fb5029f5f2",
"sha256:d014c0a64996914f748cd7d803cce5e41496ca5898f3a69c54d4b600aa72f7de"
],
"version": "==0.9.3"
"version": "==0.9.4"
},
"pylint-plugin-utils": {
"hashes": [
......@@ -582,20 +595,6 @@
],
"version": "==0.2.6"
},
"pytz": {
"hashes": [
"sha256:07edfc3d4d2705a20a6e99d97f0c4b61c800b8232dc1c04d87e8554f130148dd",
"sha256:3a47ff71597f821cd84a162e71593004286e5be07a340fd462f0d33a760782b5",
"sha256:410bcd1d6409026fbaa65d9ed33bf6dd8b1e94a499e32168acfc7b332e4095c0",
"sha256:5bd55c744e6feaa4d599a6cbd8228b4f8f9ba96de2c38d56f08e534b3c9edf0d",
"sha256:61242a9abc626379574a166dc0e96a66cd7c3b27fc10868003fa210be4bff1c9",
"sha256:887ab5e5b32e4d0c86efddd3d055c1f363cbaa583beb8da5e22d2fa2f64d51ef",
"sha256:ba18e6a243b3625513d85239b3e49055a2f0318466e0b8a92b8fb8ca7ccdf55f",
"sha256:ed6509d9af298b7995d69a440e2822288f2eca1681b8cce37673dbb10091e5fe",
"sha256:f93ddcdd6342f94cea379c73cddb5724e0d6d0a1c91c9bdef364dc0368ba4fda"
],
"version": "==2018.3"
},
"six": {
"hashes": [
"sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
......
gargantext.ini
\ No newline at end of file
# Automatically created by: scrapyd-deploy
from setuptools import setup, find_packages
setup(
name = 'gargantext-light',
version = '0.1',
packages = find_packages(),
entry_points = {'scrapy': ['settings = gargantext.settings']},
)
......@@ -27,6 +27,25 @@ CELERYD_PID_FILE = /tmp/celery.pid
CELERYD_LOG_FILE = /var/log/gargantext/backend/celery.log
CELERYD_LOG_LEVEL = {LOG_LEVEL}
[deploy]
url = http://localhost:6800
project = gargantext
[scrapyd]
eggs_dir = scrapyd/eggs
logs_dir = scrapyd/logs
jobs_to_keep = 5
dbs_dir = scrapyd/dbs
max_proc = 0
max_proc_per_cpu = 4
finished_to_keep = 100
poll_interval = 5.0
bind_address = 127.0.0.1
http_port = 6800
debug = {DEBUG}
[uwsgi]
# See: http://uwsgi-docs.readthedocs.io/en/latest/ThingsToKnow.html
......
#!/bin/sh
### BEGIN INIT INFO
# Provides: gargantext-scrapyd
# Required-Start: $local_fs $remote_fs $network
# Required-Stop: $local_fs $remote_fs
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: starts gargantext scrapyd server
# Description: starts gargantext scrapyd server using start-stop-daemon
### END INIT INFO
# PATH should only include /usr/* if it runs after the mountnfs.sh script
PATH=$PATH:/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
DAEMON=${SCRAPYD:-$(which scrapyd)}
NAME=gargantext-scrapyd
DESC=gargantext-scrapyd
LOGDIR=scrapyd/logs
PIDFILE="/tmp/$NAME.pid"
DAEMON_ARGS="-l $LOGDIR/scrapyd.log --pidfile=$PIDFILE"
SCRIPTNAME="$0"
# Exit if the package is not installed
test -x "$DAEMON" || exit 0
. /lib/init/vars.sh
. /lib/lsb/init-functions
start()
{
start-stop-daemon --start --quiet --pidfile $PIDFILE --chdir $PWD \
--startas $DAEMON --test \
|| return 1
start-stop-daemon --start --pidfile $PIDFILE --chdir $PWD \
--background --startas $DAEMON -- $DAEMON_ARGS \
|| return 2
}
stop()
{
start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --chdir $PWD \
--pidfile $PIDFILE
RETVAL="$?"
[ "$RETVAL" = 2 ] && return 2
rm -f $PIDFILE
return "$RETVAL"
}
case "$1" in
start)
log_daemon_msg "Starting $DESC " "$NAME"
start
case "$?" in
0|1) log_end_msg 0 ;;
2) log_end_msg 1 ;;
esac
;;
stop)
log_daemon_msg "Stopping $DESC" "$NAME"
stop
case "$?" in
0|1) log_end_msg 0 ;;
2) log_end_msg 1 ;;
esac
;;
status)
status_of_proc "$DAEMON" "$NAME" && exit 0 || exit $?
;;
restart|force-reload)
#
# If the "reload" option is implemented then remove the
# 'force-reload' alias
#
log_daemon_msg "Restarting $DESC" "$NAME"
stop
case "$?" in
0|1)
start
case "$?" in
0) log_end_msg 0 ;;
1) log_end_msg 1 ;; # Old process is still running
*) log_end_msg 1 ;; # Failed to start
esac
;;
*)
# Failed to stop
log_end_msg 1
;;
esac
;;
*)
echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2
exit 3
;;
esac
#!/bin/sh
PROJECT=gargantext
SCRAPYD_DEPLOY=scrapyd-deploy
SCRAPYD_INIT="$1"
# Is scrapyd already running?
$SCRAPYD_INIT status 2>&1 >/dev/null
SCRAPYD_RUNNING="$?"
# Start scrapyd if it is not running
[ "$SCRAPYD_RUNNING" = "0" ] || $SCRAPYD_INIT start 2>&1 >/dev/null
# Deploy spiders
$SCRAPYD_DEPLOY -a -p $PROJECT
# Stop scrapyd if it was not running
[ "$SCRAPYD_RUNNING" = "0" ] || $SCRAPYD_INIT stop 2>&1 >/dev/null
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment