Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
1a7b5bf3
Commit
1a7b5bf3
authored
Mar 13, 2018
by
sim
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Install scrapyd server to run scrapers
parent
4fc0ec1a
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
189 additions
and
26 deletions
+189
-26
.gitignore
.gitignore
+1
-0
Makefile
Makefile
+18
-0
Pipfile
Pipfile
+3
-1
Pipfile.lock
Pipfile.lock
+24
-25
scrapy.cfg
scrapy.cfg
+1
-0
setup.py
setup.py
+10
-0
gargantext.template.ini
tools/conf/gargantext.template.ini
+19
-0
gargantext-scrapyd
tools/init.d/gargantext-scrapyd
+95
-0
scrapyd-deploy.sh
tools/scrapyd-deploy.sh
+18
-0
No files found.
.gitignore
View file @
1a7b5bf3
...
...
@@ -5,3 +5,4 @@ __pycache__
gargantext.ini
postgrest.conf
*.log
scrapyd
Makefile
View file @
1a7b5bf3
CELERY_INIT
=
./tools/init.d/gargantext-celery
POSTGREST_INIT
=
./tools/init.d/gargantext-postgrest
SCRAPYD_INIT
=
./tools/init.d/gargantext-scrapyd
SCRAPYD_DEPLOY
=
./tools/scrapyd-deploy.sh
ifeq
(
"$(ENVIR)"
,
"prod"
)
PIPENV_ARGS
=
...
...
@@ -49,6 +52,16 @@ conf:
./tools/mkconf.sh
$(ENVIR)
@
echo
.PHONY
:
scrapyd
scrapyd
:
@
echo
"• Setup scrapyd..."
@
mkdir
-p
scrapyd/logs
@
echo
"[*] Deploy spiders to scrapyd..."
@
pipenv run
$(SCRAPYD_DEPLOY)
$(SCRAPYD_INIT)
@
echo
"[*] Clean build files..."
@
rm
-fr
build gargantext_light.egg-info
@
echo
.PHONY
:
checkdebian
checkdebian
:
@
./tools/checkdebian.sh
...
...
@@ -66,6 +79,7 @@ start: checkstartup
@
$(BACKEND_INIT)
start
@
$(CELERY_INIT)
start
@
$(POSTGREST_INIT)
start
@
$(SCRAPYD_INIT)
start
@
echo
.PHONY
:
stop
...
...
@@ -74,6 +88,7 @@ stop: checkstartup
@
$(BACKEND_INIT)
stop
@
$(CELERY_INIT)
stop
@
$(POSTGREST_INIT)
stop
@
$(SCRAPYD_INIT)
stop
@
echo
.PHONY
:
restart
...
...
@@ -82,6 +97,7 @@ restart: checkstartup
@
$(BACKEND_INIT)
restart
@
$(CELERY_INIT)
restart
@
$(POSTGREST_INIT)
restart
@
$(SCRAPYD_INIT)
restart
@
echo
.PHONY
:
reload
...
...
@@ -90,6 +106,7 @@ reload: checkstartup
@
$(BACKEND_INIT)
reload
@
$(CELERY_INIT)
force-reload
@
$(POSTGREST_INIT)
reload
@
$(SCRAPYD_INIT)
reload
@
echo
.PHONY
:
check
...
...
@@ -98,6 +115,7 @@ check: checkstartup
@
$(BACKEND_INIT)
status
||
true
@
$(CELERY_INIT)
status
||
true
@
$(POSTGREST_INIT)
status
||
true
@
$(SCRAPYD_INIT)
status
||
true
@
echo
.PHONY
:
status
...
...
Pipfile
View file @
1a7b5bf3
...
...
@@ -19,7 +19,7 @@ django = "*"
dateutils = "*"
celery = "*"
sqlalchemy = "*"
psycopg2-binary
= "*"
"psycopg2-binary"
= "*"
sqlalchemy-utils = "*"
djangorestframework = "*"
djangorestframework-jwt = "*"
...
...
@@ -29,6 +29,8 @@ alembic = "*"
scrapy = "*"
jmespath = "*"
risparser = "*"
scrapyd = "*"
scrapyd-client = "*"
[requires]
...
...
Pipfile.lock
View file @
1a7b5bf3
{
"_meta": {
"hash": {
"sha256": "
d94567674a7b0441d3a9ba14b73201e335c3511ee2dd75306138b635dc1eedc7
"
"sha256": "
2d58c4f4ea845b5f4e8eb1ae9b5ffa8b26d82dee5bd324a6a1d0f01591bded19
"
},
"pipfile-spec": 6,
"requires": {
...
...
@@ -333,6 +333,12 @@
],
"version": "==0.2.1"
},
"pycparser": {
"hashes": [
"sha256:99a8ca03e29851d96616ad0404b4aad7d9ee16f25c9f9708a11faf2810f7b226"
],
"version": "==2.18"
},
"pydispatcher": {
"hashes": [
"sha256:5570069e1b1769af1fe481de6dd1d3a388492acddd2cdad7a3bde145615d5caf",
...
...
@@ -408,6 +414,20 @@
],
"version": "==1.5.0"
},
"scrapyd": {
"hashes": [
"sha256:4983898bd6b6c53735cfa9e92e166c1d89d5c108a36ae2959c5cae914dc61887",
"sha256:c7189100759e60ee5ae7fec1f040a6be88e20fbbd353ac07db6a78d729bada7f"
],
"version": "==1.2.0"
},
"scrapyd-client": {
"hashes": [
"sha256:caa0f5369c2e1efa7b79c309afb9819b2518870c5f4f2caf84d3e474cd6a9890",
"sha256:e547475c5c8dbd811e2cc4141a0e7b4ba47600e9980c59df4f831bb60b94e4cb"
],
"version": "==1.1.0"
},
"service-identity": {
"hashes": [
"sha256:0e76f3c042cc0f5c7e6da002cf646f59dc4023962d1d1166343ce53bdad39e17",
...
...
@@ -500,13 +520,6 @@
],
"version": "==0.3.9"
},
"django": {
"hashes": [
"sha256:3d9916515599f757043c690ae2b5ea28666afa09779636351da505396cbb2f19",
"sha256:769f212ffd5762f72c764fa648fca3b7f7dd4ec27407198b68e7c4abf4609fd0"
],
"version": "==2.0.3"
},
"isort": {
"hashes": [
"sha256:1153601da39a25b14ddc54955dbbacbb6b2d19135386699e2ad58517953b34af",
...
...
@@ -571,10 +584,10 @@
},
"pylint-django": {
"hashes": [
"sha256:
0ccb38ac08df8f380e2a7d86b40b46ba4d68c64993c4b8c88a6ba6cd1a644ecc
",
"sha256:
994715c3f0ff37d86def2224bf15b46b482f3b75096f9d9cc9f4cb1e8d58b0ac
"
"sha256:
681f5105c98c9a96ed10895ad346d132659a56c313181a9e2642f6fb5029f5f2
",
"sha256:
d014c0a64996914f748cd7d803cce5e41496ca5898f3a69c54d4b600aa72f7de
"
],
"version": "==0.9.
3
"
"version": "==0.9.
4
"
},
"pylint-plugin-utils": {
"hashes": [
...
...
@@ -582,20 +595,6 @@
],
"version": "==0.2.6"
},
"pytz": {
"hashes": [
"sha256:07edfc3d4d2705a20a6e99d97f0c4b61c800b8232dc1c04d87e8554f130148dd",
"sha256:3a47ff71597f821cd84a162e71593004286e5be07a340fd462f0d33a760782b5",
"sha256:410bcd1d6409026fbaa65d9ed33bf6dd8b1e94a499e32168acfc7b332e4095c0",
"sha256:5bd55c744e6feaa4d599a6cbd8228b4f8f9ba96de2c38d56f08e534b3c9edf0d",
"sha256:61242a9abc626379574a166dc0e96a66cd7c3b27fc10868003fa210be4bff1c9",
"sha256:887ab5e5b32e4d0c86efddd3d055c1f363cbaa583beb8da5e22d2fa2f64d51ef",
"sha256:ba18e6a243b3625513d85239b3e49055a2f0318466e0b8a92b8fb8ca7ccdf55f",
"sha256:ed6509d9af298b7995d69a440e2822288f2eca1681b8cce37673dbb10091e5fe",
"sha256:f93ddcdd6342f94cea379c73cddb5724e0d6d0a1c91c9bdef364dc0368ba4fda"
],
"version": "==2018.3"
},
"six": {
"hashes": [
"sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
...
...
scrapy.cfg
0 → 120000
View file @
1a7b5bf3
gargantext.ini
\ No newline at end of file
setup.py
0 → 100644
View file @
1a7b5bf3
# Automatically created by: scrapyd-deploy
from
setuptools
import
setup
,
find_packages
setup
(
name
=
'gargantext-light'
,
version
=
'0.1'
,
packages
=
find_packages
(),
entry_points
=
{
'scrapy'
:
[
'settings = gargantext.settings'
]},
)
tools/conf/gargantext.template.ini
View file @
1a7b5bf3
...
...
@@ -27,6 +27,25 @@ CELERYD_PID_FILE = /tmp/celery.pid
CELERYD_LOG_FILE
=
/var/log/gargantext/backend/celery.log
CELERYD_LOG_LEVEL
=
{LOG_LEVEL}
[deploy]
url
=
http://localhost:6800
project
=
gargantext
[scrapyd]
eggs_dir
=
scrapyd/eggs
logs_dir
=
scrapyd/logs
jobs_to_keep
=
5
dbs_dir
=
scrapyd/dbs
max_proc
=
0
max_proc_per_cpu
=
4
finished_to_keep
=
100
poll_interval
=
5.0
bind_address
=
127.0.0.1
http_port
=
6800
debug
=
{DEBUG}
[uwsgi]
# See: http://uwsgi-docs.readthedocs.io/en/latest/ThingsToKnow.html
...
...
tools/init.d/gargantext-scrapyd
0 → 100755
View file @
1a7b5bf3
#!/bin/sh
### BEGIN INIT INFO
# Provides: gargantext-scrapyd
# Required-Start: $local_fs $remote_fs $network
# Required-Stop: $local_fs $remote_fs
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: starts gargantext scrapyd server
# Description: starts gargantext scrapyd server using start-stop-daemon
### END INIT INFO
# PATH should only include /usr/* if it runs after the mountnfs.sh script
PATH
=
$PATH
:/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
DAEMON
=
${
SCRAPYD
:-$(
which scrapyd
)}
NAME
=
gargantext-scrapyd
DESC
=
gargantext-scrapyd
LOGDIR
=
scrapyd/logs
PIDFILE
=
"/tmp/
$NAME
.pid"
DAEMON_ARGS
=
"-l
$LOGDIR
/scrapyd.log --pidfile=
$PIDFILE
"
SCRIPTNAME
=
"
$0
"
# Exit if the package is not installed
test
-x
"
$DAEMON
"
||
exit
0
.
/lib/init/vars.sh
.
/lib/lsb/init-functions
start
()
{
start-stop-daemon
--start
--quiet
--pidfile
$PIDFILE
--chdir
$PWD
\
--startas
$DAEMON
--test
\
||
return
1
start-stop-daemon
--start
--pidfile
$PIDFILE
--chdir
$PWD
\
--background
--startas
$DAEMON
--
$DAEMON_ARGS
\
||
return
2
}
stop
()
{
start-stop-daemon
--stop
--quiet
--retry
=
TERM/30/KILL/5
--chdir
$PWD
\
--pidfile
$PIDFILE
RETVAL
=
"
$?
"
[
"
$RETVAL
"
=
2
]
&&
return
2
rm
-f
$PIDFILE
return
"
$RETVAL
"
}
case
"
$1
"
in
start
)
log_daemon_msg
"Starting
$DESC
"
"
$NAME
"
start
case
"
$?
"
in
0|1
)
log_end_msg 0
;;
2
)
log_end_msg 1
;;
esac
;;
stop
)
log_daemon_msg
"Stopping
$DESC
"
"
$NAME
"
stop
case
"
$?
"
in
0|1
)
log_end_msg 0
;;
2
)
log_end_msg 1
;;
esac
;;
status
)
status_of_proc
"
$DAEMON
"
"
$NAME
"
&&
exit
0
||
exit
$?
;;
restart|force-reload
)
#
# If the "reload" option is implemented then remove the
# 'force-reload' alias
#
log_daemon_msg
"Restarting
$DESC
"
"
$NAME
"
stop
case
"
$?
"
in
0|1
)
start
case
"
$?
"
in
0
)
log_end_msg 0
;;
1
)
log_end_msg 1
;;
# Old process is still running
*
)
log_end_msg 1
;;
# Failed to start
esac
;;
*
)
# Failed to stop
log_end_msg 1
;;
esac
;;
*
)
echo
"Usage:
$SCRIPTNAME
{start|stop|status|restart|force-reload}"
>
&2
exit
3
;;
esac
tools/scrapyd-deploy.sh
0 → 100755
View file @
1a7b5bf3
#!/bin/sh
PROJECT
=
gargantext
SCRAPYD_DEPLOY
=
scrapyd-deploy
SCRAPYD_INIT
=
"
$1
"
# Is scrapyd already running?
$SCRAPYD_INIT
status 2>&1
>
/dev/null
SCRAPYD_RUNNING
=
"
$?
"
# Start scrapyd if it is not running
[
"
$SCRAPYD_RUNNING
"
=
"0"
]
||
$SCRAPYD_INIT
start 2>&1
>
/dev/null
# Deploy spiders
$SCRAPYD_DEPLOY
-a
-p
$PROJECT
# Stop scrapyd if it was not running
[
"
$SCRAPYD_RUNNING
"
=
"0"
]
||
$SCRAPYD_INIT
stop 2>&1
>
/dev/null
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment