Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
aedde7a7
Commit
aedde7a7
authored
Apr 19, 2016
by
delanoe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Prod/Dev version of Gargantext. TODO: asynchronous parser has a bug.
parent
1bc49d68
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
43 additions
and
64 deletions
+43
-64
gargantext.ini
gargantext.ini
+1
-12
__init__.py
gargantext/util/toolchain/__init__.py
+1
-1
asGargantua.sh
install/debian/asGargantua.sh
+16
-16
asRoot.sh
install/debian/asRoot.sh
+2
-2
requirements.txt
install/python/requirements.txt
+2
-2
istex.py
scrapers/istex.py
+3
-3
pubmed.py
scrapers/pubmed.py
+16
-26
start_celery
start_celery
+1
-1
overview.html
templates/pages/projects/overview.html
+1
-1
No files found.
gargantext.ini
View file @
aedde7a7
...
@@ -16,7 +16,7 @@ chdir = /srv/gargantext
...
@@ -16,7 +16,7 @@ chdir = /srv/gargantext
#module = wsgi
#module = wsgi
wsgi-file
=
/srv/gargantext/gargantext/wsgi.py
wsgi-file
=
/srv/gargantext/gargantext/wsgi.py
# the virtualenv
# the virtualenv
home
=
/srv/
gargantext_env_3.
5
home
=
/srv/
env_3-
5
lazy-apps
=
True
lazy-apps
=
True
...
@@ -58,14 +58,3 @@ max-requests = 5000
...
@@ -58,14 +58,3 @@ max-requests = 5000
uid
=
1000
uid
=
1000
gid
=
1000
gid
=
1000
################### other gargantext constants ###################
[scrappers]
# default number of docs POSTed to scrappers.views.py
# (at page project > add a corpus > scan/process sample)
QUERY_SIZE_N_DEFAULT
=
1000
# checked just before scrap to prevent running impossible workflows
# even if somebody would set "query size N" manually in POST data
QUERY_SIZE_N_MAX
=
20000
gargantext/util/toolchain/__init__.py
View file @
aedde7a7
...
@@ -17,7 +17,7 @@ from gargantext.models import Node
...
@@ -17,7 +17,7 @@ from gargantext.models import Node
from
datetime
import
datetime
from
datetime
import
datetime
from
celery
import
shared_task
from
celery
import
shared_task
@
shared_task
#
@shared_task
def
parse_extract
(
corpus
):
def
parse_extract
(
corpus
):
# retrieve corpus from database from id
# retrieve corpus from database from id
if
isinstance
(
corpus
,
int
):
if
isinstance
(
corpus
,
int
):
...
...
install/debian/asGargantua.sh
View file @
aedde7a7
#!/bin/bash
#!/bin/bash
#MAINTAINER ISCPIF <alexandre.delanoe@iscpif.fr>
#
#
MAINTAINER ISCPIF <alexandre.delanoe@iscpif.fr>
#
git clone ssh://gitolite@delanoe.org:1979/gargantext /srv/gargantext
\
#
git clone ssh://gitolite@delanoe.org:1979/gargantext /srv/gargantext \
&&
cd
/srv/gargantext
\
#
&& cd /srv/gargantext \
&&
git fetch origin refactoring-alex
\
#
&& git fetch origin refactoring-alex \
&&
git checkout refactoring-alex
#
&& git checkout refactoring-alex
#
cd
/srv/gargantext/install
\
#
cd /srv/gargantext/install \
&&
/usr/bin/virtualenv
--py
=
/usr/bin/python3.5 /srv/env_3-5
\
#
&& /usr/bin/virtualenv --py=/usr/bin/python3.5 /srv/env_3-5 \
&&
/bin/bash
-c
'source /srv/env_3-5/bin/activate'
\
#
&& /bin/bash -c 'source /srv/env_3-5/bin/activate' \
&&
/bin/bash
-c
'/srv/env_3-5/bin/pip install git+https://github.com/zzzeek/sqlalchemy.git@rel_1_1'
\
#
&& /bin/bash -c '/srv/env_3-5/bin/pip install git+https://github.com/zzzeek/sqlalchemy.git@rel_1_1' \
&&
/bin/bash
-c
'/srv/env_3-5/bin/pip install -r /srv/gargantext/install/python/requirements.txt'
\
#
&& /bin/bash -c '/srv/env_3-5/bin/pip install -r /srv/gargantext/install/python/requirements.txt' \
#
## INSTALL MAIN DEPENDENCIES
## INSTALL MAIN DEPENDENCIES
...
@@ -22,9 +22,9 @@ cd /tmp && wget http://dl.gargantext.org/gargantext_lib.tar.bz2 \
...
@@ -22,9 +22,9 @@ cd /tmp && wget http://dl.gargantext.org/gargantext_lib.tar.bz2 \
## End of configuration
## End of configuration
## be sure that postgres is running
## be sure that postgres is running
cd
/srv/gargantext
&&
/bin/bash
-c
'source /srv/bin/env_3-5/bin/activate'
\
#
cd /srv/gargantext && /bin/bash -c 'source /srv/bin/env_3-5/bin/activate' \
&&
/srv/gargantext/manage.py shell < /srv/gargantext/init.py
#
&& /srv/gargantext/manage.py shell < /srv/gargantext/init.py
#
echo
"Gargantua: END of the installation of Gargantext"
echo
"Gargantua: END of the installation of Gargantext"
install/debian/asRoot.sh
View file @
aedde7a7
...
@@ -17,8 +17,8 @@ echo "Europe/Paris" > /etc/timezone && \
...
@@ -17,8 +17,8 @@ echo "Europe/Paris" > /etc/timezone && \
update-locale
LANG
=
fr_FR.UTF-8
update-locale
LANG
=
fr_FR.UTF-8
## PROD VERSION OF GARGANTEX
t
## PROD VERSION OF GARGANTEX
T
apt-get
install
-y
uwsgi nginx
# apt-get install -y uwsgi nginx uwsgi-plugin-python rabbitmq-server
### CREATE USER and adding it to sudo
### CREATE USER and adding it to sudo
...
...
install/python/requirements.txt
View file @
aedde7a7
...
@@ -13,7 +13,6 @@ djangorestframework==3.3.2
...
@@ -13,7 +13,6 @@ djangorestframework==3.3.2
html5lib==0.9999999
html5lib==0.9999999
jdatetime==1.7.2
jdatetime==1.7.2
kombu==3.0.33 # messaging
kombu==3.0.33 # messaging
lxml==3.5.0
nltk==3.1
nltk==3.1
numpy==1.10.4
numpy==1.10.4
psycopg2==2.6.1
psycopg2==2.6.1
...
@@ -26,6 +25,7 @@ SQLAlchemy==1.1.0b1.dev0
...
@@ -26,6 +25,7 @@ SQLAlchemy==1.1.0b1.dev0
ujson==1.35
ujson==1.35
umalqurra==0.2 # arabic calendars (?? why use ??)
umalqurra==0.2 # arabic calendars (?? why use ??)
wheel==0.29.0
wheel==0.29.0
pandas==0.18.0
networkx==1.11
networkx==1.11
pandas==0.18.0
six==1.10.0
six==1.10.0
lxml==3.5.0
scrapers/istex.py
View file @
aedde7a7
...
@@ -12,7 +12,6 @@ from gargantext.constants import RESOURCETYPES, QUERY_SIZE_N_MAX
...
@@ -12,7 +12,6 @@ from gargantext.constants import RESOURCETYPES, QUERY_SIZE_N_MAX
from
gargantext.models.nodes
import
Node
from
gargantext.models.nodes
import
Node
from
gargantext.util.db
import
session
from
gargantext.util.db
import
session
from
gargantext.util.http
import
JsonHttpResponse
from
gargantext.util.http
import
JsonHttpResponse
from
gargantext.util.tools
import
ensure_dir
from
gargantext.util.scheduling
import
scheduled
from
gargantext.util.scheduling
import
scheduled
from
gargantext.util.toolchain
import
parse_extract_indexhyperdata
from
gargantext.util.toolchain
import
parse_extract_indexhyperdata
...
@@ -123,7 +122,6 @@ def save(request , project_id):
...
@@ -123,7 +122,6 @@ def save(request , project_id):
corpus_id
=
corpus
.
id
corpus_id
=
corpus
.
id
print
(
"NEW CORPUS"
,
corpus_id
)
print
(
"NEW CORPUS"
,
corpus_id
)
ensure_dir
(
request
.
user
)
tasks
=
Scraper
()
tasks
=
Scraper
()
for
i
in
range
(
8
):
for
i
in
range
(
8
):
...
@@ -144,12 +142,14 @@ def save(request , project_id):
...
@@ -144,12 +142,14 @@ def save(request , project_id):
)
)
dwnldsOK
+=
1
dwnldsOK
+=
1
session
.
commit
()
if
dwnldsOK
==
0
:
if
dwnldsOK
==
0
:
return
JsonHttpResponse
([
"fail"
])
return
JsonHttpResponse
([
"fail"
])
###########################
###########################
###########################
###########################
try
:
try
:
scheduled
(
parse_extract_indexhyperdata
(
corpus_id
,)
)
scheduled
(
parse_extract_indexhyperdata
)(
corpus_id
)
except
Exception
as
error
:
except
Exception
as
error
:
print
(
'WORKFLOW ERROR'
)
print
(
'WORKFLOW ERROR'
)
print
(
error
)
print
(
error
)
...
...
scrapers/pubmed.py
View file @
aedde7a7
...
@@ -21,8 +21,8 @@ from django.http import Http404, HttpResponseRedirect, HttpResponseForbidden
...
@@ -21,8 +21,8 @@ from django.http import Http404, HttpResponseRedirect, HttpResponseForbidden
from
gargantext.constants
import
RESOURCETYPES
,
QUERY_SIZE_N_MAX
from
gargantext.constants
import
RESOURCETYPES
,
QUERY_SIZE_N_MAX
from
gargantext.models.nodes
import
Node
from
gargantext.models.nodes
import
Node
from
gargantext.util.db
import
session
from
gargantext.util.db
import
session
from
gargantext.util.db_cache
import
cache
from
gargantext.util.http
import
JsonHttpResponse
from
gargantext.util.http
import
JsonHttpResponse
from
gargantext.util.tools
import
ensure_dir
from
gargantext.util.scheduling
import
scheduled
from
gargantext.util.scheduling
import
scheduled
from
gargantext.util.toolchain
import
parse_extract_indexhyperdata
from
gargantext.util.toolchain
import
parse_extract_indexhyperdata
...
@@ -74,20 +74,16 @@ def save( request , project_id ) :
...
@@ -74,20 +74,16 @@ def save( request , project_id ) :
except
ValueError
:
except
ValueError
:
raise
Http404
()
raise
Http404
()
# do we have a valid project?
# do we have a valid project?
project
=
(
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
project_id
)
project
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
project_id
)
.
first
()
.
filter
(
Node
.
typename
==
'PROJECT'
)
)
.
first
()
if
project
is
None
:
if
project
is
None
:
raise
Http404
()
raise
Http404
()
# do we have a valid user?
user
=
request
.
user
user
=
cache
.
User
[
request
.
user
.
id
]
if
not
user
.
is_authenticated
():
if
not
user
.
owns
(
project
):
return
redirect
(
'/auth/?next=
%
s'
%
request
.
path
)
raise
HttpResponseForbidden
()
if
project
.
user_id
!=
user
.
id
:
return
HttpResponseForbidden
()
if
request
.
method
==
"POST"
:
if
request
.
method
==
"POST"
:
...
@@ -111,15 +107,10 @@ def save( request , project_id ) :
...
@@ -111,15 +107,10 @@ def save( request , project_id ) :
# corpus node instanciation as a Django model
# corpus node instanciation as a Django model
corpus
=
Node
(
corpus
=
project
.
add_child
(
name
=
name
name
=
name
,
,
typename
=
"CORPUS"
user_id
=
request
.
user
.
id
,
parent_id
=
project_id
,
typename
=
'CORPUS'
,
hyperdata
=
{
"action"
:
"Scraping data"
,
"language_id"
:
None
}
)
)
session
.
add
(
corpus
)
session
.
add
(
corpus
)
session
.
commit
()
session
.
commit
()
corpus_id
=
corpus
.
id
corpus_id
=
corpus
.
id
...
@@ -130,8 +121,6 @@ def save( request , project_id ) :
...
@@ -130,8 +121,6 @@ def save( request , project_id ) :
# eFetchResult.read() # this will output the XML... normally you write this to a XML-file.
# eFetchResult.read() # this will output the XML... normally you write this to a XML-file.
# """
# """
ensure_dir
(
request
.
user
)
tasks
=
Scraper
()
tasks
=
Scraper
()
for
i
in
range
(
8
):
for
i
in
range
(
8
):
...
@@ -143,20 +132,21 @@ def save( request , project_id ) :
...
@@ -143,20 +132,21 @@ def save( request , project_id ) :
tasks
.
q
.
join
()
# wait until everything is finished
tasks
.
q
.
join
()
# wait until everything is finished
dwnldsOK
=
0
dwnldsOK
=
0
for
filename
in
tasks
.
firstResults
:
for
filename
in
tasks
.
firstResults
:
print
(
filename
)
print
(
filename
)
if
filename
!=
False
:
if
filename
!=
False
:
# add the uploaded resource to the corpus
# add the uploaded resource to the corpus
corpus
.
add_resource
(
type
=
3
corpus
.
add_resource
(
type
=
3
,
path
=
filename
,
path
=
filename
)
)
dwnldsOK
+=
1
dwnldsOK
+=
1
#session.commit()
if
dwnldsOK
==
0
:
if
dwnldsOK
==
0
:
return
JsonHttpResponse
([
"fail"
])
return
JsonHttpResponse
([
"fail"
])
try
:
try
:
scheduled
(
parse_extract_indexhyperdata
(
corpus_id
,)
)
scheduled
(
parse_extract_indexhyperdata
)(
corpus_id
)
except
Exception
as
error
:
except
Exception
as
error
:
print
(
'WORKFLOW ERROR'
)
print
(
'WORKFLOW ERROR'
)
print
(
error
)
print
(
error
)
...
...
start_celery
View file @
aedde7a7
#!/bin/bash
#!/bin/bash
FILE
=
"/var/log/gargantext/celery/
$(
date
+%Y%m%d-%H:%M:%S
)
.log"
FILE
=
"/var/log/gargantext/celery/
$(
date
+%Y%m%d-%H:%M:%S
)
.log"
source
/srv/
gargantext_env_3.
5/bin/activate
source
/srv/
env_3-
5/bin/activate
./manage.py celery worker
-f
$FILE
./manage.py celery worker
-f
$FILE
templates/pages/projects/overview.html
View file @
aedde7a7
...
@@ -74,7 +74,7 @@
...
@@ -74,7 +74,7 @@
"
>
Manage
</button>
"
>
Manage
</button>
</li>
</li>
{% if common_users %}
{% if common_users %}
<
a
style=
"cursor:pointer;"
><img
class=
"share_button"
data-id=
"{{ project.id }}"
title=
"Share it!"
width=
"20px"
src=
"{% static "
img
/
share
.
png
"
%}"
></img></a
>
<
!-- <a style="cursor:pointer;"><img class="share_button" data-id="{{ project.id }}" title="Share it!" width="20px" src="{% static "img/share.png" %}"></img></a> --!
>
{% endif %}
{% endif %}
</h3>
</h3>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment