Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
aedde7a7
Commit
aedde7a7
authored
8 years ago
by
delanoe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Prod/Dev version of Gargantext. TODO: asynchronous parser has a bug.
parent
1bc49d68
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
43 additions
and
64 deletions
+43
-64
gargantext.ini
gargantext.ini
+1
-12
__init__.py
gargantext/util/toolchain/__init__.py
+1
-1
asGargantua.sh
install/debian/asGargantua.sh
+16
-16
asRoot.sh
install/debian/asRoot.sh
+2
-2
requirements.txt
install/python/requirements.txt
+2
-2
istex.py
scrapers/istex.py
+3
-3
pubmed.py
scrapers/pubmed.py
+16
-26
start_celery
start_celery
+1
-1
overview.html
templates/pages/projects/overview.html
+1
-1
No files found.
gargantext.ini
View file @
aedde7a7
...
...
@@ -16,7 +16,7 @@ chdir = /srv/gargantext
#module = wsgi
wsgi-file
=
/srv/gargantext/gargantext/wsgi.py
# the virtualenv
home
=
/srv/
gargantext_env_3.
5
home
=
/srv/
env_3-
5
lazy-apps
=
True
...
...
@@ -58,14 +58,3 @@ max-requests = 5000
uid
=
1000
gid
=
1000
################### other gargantext constants ###################
[scrappers]
# default number of docs POSTed to scrappers.views.py
# (at page project > add a corpus > scan/process sample)
QUERY_SIZE_N_DEFAULT
=
1000
# checked just before scrap to prevent running impossible workflows
# even if somebody would set "query size N" manually in POST data
QUERY_SIZE_N_MAX
=
20000
This diff is collapsed.
Click to expand it.
gargantext/util/toolchain/__init__.py
View file @
aedde7a7
...
...
@@ -17,7 +17,7 @@ from gargantext.models import Node
from
datetime
import
datetime
from
celery
import
shared_task
@
shared_task
#
@shared_task
def
parse_extract
(
corpus
):
# retrieve corpus from database from id
if
isinstance
(
corpus
,
int
):
...
...
This diff is collapsed.
Click to expand it.
install/debian/asGargantua.sh
View file @
aedde7a7
#!/bin/bash
#MAINTAINER ISCPIF <alexandre.delanoe@iscpif.fr>
git clone ssh://gitolite@delanoe.org:1979/gargantext /srv/gargantext
\
&&
cd
/srv/gargantext
\
&&
git fetch origin refactoring-alex
\
&&
git checkout refactoring-alex
cd
/srv/gargantext/install
\
&&
/usr/bin/virtualenv
--py
=
/usr/bin/python3.5 /srv/env_3-5
\
&&
/bin/bash
-c
'source /srv/env_3-5/bin/activate'
\
&&
/bin/bash
-c
'/srv/env_3-5/bin/pip install git+https://github.com/zzzeek/sqlalchemy.git@rel_1_1'
\
&&
/bin/bash
-c
'/srv/env_3-5/bin/pip install -r /srv/gargantext/install/python/requirements.txt'
\
#
#
MAINTAINER ISCPIF <alexandre.delanoe@iscpif.fr>
#
#
git clone ssh://gitolite@delanoe.org:1979/gargantext /srv/gargantext \
#
&& cd /srv/gargantext \
#
&& git fetch origin refactoring-alex \
#
&& git checkout refactoring-alex
#
#
cd /srv/gargantext/install \
#
&& /usr/bin/virtualenv --py=/usr/bin/python3.5 /srv/env_3-5 \
#
&& /bin/bash -c 'source /srv/env_3-5/bin/activate' \
#
&& /bin/bash -c '/srv/env_3-5/bin/pip install git+https://github.com/zzzeek/sqlalchemy.git@rel_1_1' \
#
&& /bin/bash -c '/srv/env_3-5/bin/pip install -r /srv/gargantext/install/python/requirements.txt' \
#
## INSTALL MAIN DEPENDENCIES
...
...
@@ -22,9 +22,9 @@ cd /tmp && wget http://dl.gargantext.org/gargantext_lib.tar.bz2 \
## End of configuration
## be sure that postgres is running
cd
/srv/gargantext
&&
/bin/bash
-c
'source /srv/bin/env_3-5/bin/activate'
\
&&
/srv/gargantext/manage.py shell < /srv/gargantext/init.py
#
cd /srv/gargantext && /bin/bash -c 'source /srv/bin/env_3-5/bin/activate' \
#
&& /srv/gargantext/manage.py shell < /srv/gargantext/init.py
#
echo
"Gargantua: END of the installation of Gargantext"
This diff is collapsed.
Click to expand it.
install/debian/asRoot.sh
View file @
aedde7a7
...
...
@@ -17,8 +17,8 @@ echo "Europe/Paris" > /etc/timezone && \
update-locale
LANG
=
fr_FR.UTF-8
## PROD VERSION OF GARGANTEX
t
apt-get
install
-y
uwsgi nginx
## PROD VERSION OF GARGANTEX
T
# apt-get install -y uwsgi nginx uwsgi-plugin-python rabbitmq-server
### CREATE USER and adding it to sudo
...
...
This diff is collapsed.
Click to expand it.
install/python/requirements.txt
View file @
aedde7a7
...
...
@@ -13,7 +13,6 @@ djangorestframework==3.3.2
html5lib==0.9999999
jdatetime==1.7.2
kombu==3.0.33 # messaging
lxml==3.5.0
nltk==3.1
numpy==1.10.4
psycopg2==2.6.1
...
...
@@ -26,6 +25,7 @@ SQLAlchemy==1.1.0b1.dev0
ujson==1.35
umalqurra==0.2 # arabic calendars (?? why use ??)
wheel==0.29.0
pandas==0.18.0
networkx==1.11
pandas==0.18.0
six==1.10.0
lxml==3.5.0
This diff is collapsed.
Click to expand it.
scrapers/istex.py
View file @
aedde7a7
...
...
@@ -12,7 +12,6 @@ from gargantext.constants import RESOURCETYPES, QUERY_SIZE_N_MAX
from
gargantext.models.nodes
import
Node
from
gargantext.util.db
import
session
from
gargantext.util.http
import
JsonHttpResponse
from
gargantext.util.tools
import
ensure_dir
from
gargantext.util.scheduling
import
scheduled
from
gargantext.util.toolchain
import
parse_extract_indexhyperdata
...
...
@@ -123,7 +122,6 @@ def save(request , project_id):
corpus_id
=
corpus
.
id
print
(
"NEW CORPUS"
,
corpus_id
)
ensure_dir
(
request
.
user
)
tasks
=
Scraper
()
for
i
in
range
(
8
):
...
...
@@ -144,12 +142,14 @@ def save(request , project_id):
)
dwnldsOK
+=
1
session
.
commit
()
if
dwnldsOK
==
0
:
return
JsonHttpResponse
([
"fail"
])
###########################
###########################
try
:
scheduled
(
parse_extract_indexhyperdata
(
corpus_id
,)
)
scheduled
(
parse_extract_indexhyperdata
)(
corpus_id
)
except
Exception
as
error
:
print
(
'WORKFLOW ERROR'
)
print
(
error
)
...
...
This diff is collapsed.
Click to expand it.
scrapers/pubmed.py
View file @
aedde7a7
...
...
@@ -21,8 +21,8 @@ from django.http import Http404, HttpResponseRedirect, HttpResponseForbidden
from
gargantext.constants
import
RESOURCETYPES
,
QUERY_SIZE_N_MAX
from
gargantext.models.nodes
import
Node
from
gargantext.util.db
import
session
from
gargantext.util.db_cache
import
cache
from
gargantext.util.http
import
JsonHttpResponse
from
gargantext.util.tools
import
ensure_dir
from
gargantext.util.scheduling
import
scheduled
from
gargantext.util.toolchain
import
parse_extract_indexhyperdata
...
...
@@ -74,20 +74,16 @@ def save( request , project_id ) :
except
ValueError
:
raise
Http404
()
# do we have a valid project?
project
=
(
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
project_id
)
.
filter
(
Node
.
typename
==
'PROJECT'
)
)
.
first
()
project
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
project_id
)
.
first
()
if
project
is
None
:
raise
Http404
()
# do we have a valid user?
user
=
request
.
user
if
not
user
.
is_authenticated
():
return
redirect
(
'/auth/?next=
%
s'
%
request
.
path
)
if
project
.
user_id
!=
user
.
id
:
return
HttpResponseForbidden
()
user
=
cache
.
User
[
request
.
user
.
id
]
if
not
user
.
owns
(
project
):
raise
HttpResponseForbidden
()
if
request
.
method
==
"POST"
:
...
...
@@ -111,15 +107,10 @@ def save( request , project_id ) :
# corpus node instanciation as a Django model
corpus
=
Node
(
name
=
name
,
user_id
=
request
.
user
.
id
,
parent_id
=
project_id
,
typename
=
'CORPUS'
,
hyperdata
=
{
"action"
:
"Scraping data"
,
"language_id"
:
None
}
corpus
=
project
.
add_child
(
name
=
name
,
typename
=
"CORPUS"
)
session
.
add
(
corpus
)
session
.
commit
()
corpus_id
=
corpus
.
id
...
...
@@ -130,8 +121,6 @@ def save( request , project_id ) :
# eFetchResult.read() # this will output the XML... normally you write this to a XML-file.
# """
ensure_dir
(
request
.
user
)
tasks
=
Scraper
()
for
i
in
range
(
8
):
...
...
@@ -143,20 +132,21 @@ def save( request , project_id ) :
tasks
.
q
.
join
()
# wait until everything is finished
dwnldsOK
=
0
for
filename
in
tasks
.
firstResults
:
print
(
filename
)
if
filename
!=
False
:
# add the uploaded resource to the corpus
corpus
.
add_resource
(
type
=
3
corpus
.
add_resource
(
type
=
3
,
path
=
filename
)
dwnldsOK
+=
1
#session.commit()
if
dwnldsOK
==
0
:
return
JsonHttpResponse
([
"fail"
])
try
:
scheduled
(
parse_extract_indexhyperdata
(
corpus_id
,)
)
scheduled
(
parse_extract_indexhyperdata
)(
corpus_id
)
except
Exception
as
error
:
print
(
'WORKFLOW ERROR'
)
print
(
error
)
...
...
This diff is collapsed.
Click to expand it.
start_celery
View file @
aedde7a7
#!/bin/bash
FILE
=
"/var/log/gargantext/celery/
$(
date
+%Y%m%d-%H:%M:%S
)
.log"
source
/srv/
gargantext_env_3.
5/bin/activate
source
/srv/
env_3-
5/bin/activate
./manage.py celery worker
-f
$FILE
This diff is collapsed.
Click to expand it.
templates/pages/projects/overview.html
View file @
aedde7a7
...
...
@@ -74,7 +74,7 @@
"
>
Manage
</button>
</li>
{% if common_users %}
<
a
style=
"cursor:pointer;"
><img
class=
"share_button"
data-id=
"{{ project.id }}"
title=
"Share it!"
width=
"20px"
src=
"{% static "
img
/
share
.
png
"
%}"
></img></a
>
<
!-- <a style="cursor:pointer;"><img class="share_button" data-id="{{ project.id }}" title="Share it!" width="20px" src="{% static "img/share.png" %}"></img></a> --!
>
{% endif %}
</h3>
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment