Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
dfde20b4
Commit
dfde20b4
authored
Oct 12, 2017
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'stable' into stable-imt
parents
2791e98e
3ef753ca
Changes
11
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
2052 additions
and
169 deletions
+2052
-169
devel_infos.md
annotations/devel_infos.md
+0
-0
gargantext.ini
gargantext.ini
+16
-7
metric_tfidf.py
gargantext/util/toolchain/metric_tfidf.py
+17
-7
ngram_coocs.py
gargantext/util/toolchain/ngram_coocs.py
+12
-4
cooccurrences.py
graph/cooccurrences.py
+2
-1
Dockerfile
install/notebook/Dockerfile
+12
-12
gargantext_notebook.py
install/notebook/gargantext_notebook.py
+5
-2
AdvancedTutorial-checkpoint.ipynb
...ooks/.ipynb_checkpoints/AdvancedTutorial-checkpoint.ipynb
+1433
-64
AdvancedTutorial.ipynb
notebooks/AdvancedTutorial.ipynb
+551
-62
start_uwsgi
start_uwsgi
+3
-9
menu.html
templates/pages/menu.html
+1
-1
No files found.
annotations/
romain
_infos.md
→
annotations/
devel
_infos.md
View file @
dfde20b4
File moved
gargantext.ini
View file @
dfde20b4
...
...
@@ -2,8 +2,6 @@
[uwsgi]
# uwsgi --vacuum --socket monsite/mysite.sock --wsgi-file monsite/wsgi.py --chmod-socket=666 --home=/srv/alexandre.delanoe/env --chdir=/var/www/www/alexandre/monsite --env
env
=
DJANGO_SETTINGS_MODULE=gargantext.settings
#module = django.core.handlers.wsgi:WSGIHandler()
...
...
@@ -44,7 +42,7 @@ touch-reload = /tmp/gargantext.reload
# respawn processes taking more than 20 seconds
harakiri
=
120
harakiri
=
120
0
post-buffering
=
8192
# limit the project to 128 MB
...
...
@@ -55,7 +53,18 @@ max-requests = 5000
# background the process & log
#daemonize = /var/log/uwsgi/gargantext.log
uid
=
1000
gid
=
1000
daemonize
=
/var/log/gargantext/uwsgi/@(exec://date +%%Y-%%m-%%d_%%H%%M).log
log-reopen
=
true
#uid = 1000
#gid = 1000
#
how-config
=
true
disable-logging
=
false
logfile-chmod
=
644
#logfile-chown=false
log-maxsize
=
500000000
##logto=%(chdir)logs/uwsgi_access.log
#logger = longquery file:%(chdir)logs/uwsgi_long.log
#log-route = longquery msec
#
gargantext/util/toolchain/metric_tfidf.py
View file @
dfde20b4
...
...
@@ -23,7 +23,7 @@ from datetime import datetime
def
t
():
return
datetime
.
now
()
.
strftime
(
"
%
Y-
%
m-
%
d_
%
H:
%
M:
%
S"
)
def
compute_occs
(
corpus
,
overwrite_id
=
None
,
groupings_id
=
None
,):
def
compute_occs
(
corpus
,
overwrite_id
=
None
,
groupings_id
=
None
,
year
=
None
,
start
=
None
,
end
=
None
,
interactiv
=
False
):
"""
Calculates sum of occs per ngram (or per mainform if groups) within corpus
(used as info in the ngrams table view)
...
...
@@ -61,6 +61,8 @@ def compute_occs(corpus, overwrite_id = None, groupings_id = None,):
.
group_by
(
NodeNgram
.
ngram_id
)
)
if
year
is
not
None
:
occs_q
=
occs_q
.
filter
(
Node
.
hyperdata
[
"publication_year"
]
.
astext
==
str
(
year
))
# difficult case: with groups
# ------------
...
...
@@ -108,6 +110,10 @@ def compute_occs(corpus, overwrite_id = None, groupings_id = None,):
# for the sum
.
group_by
(
"counted_form"
)
)
if
year
is
not
None
:
occs_q
=
occs_q
.
filter
(
Node
.
hyperdata
[
"publication_year"
]
.
astext
==
str
(
year
))
#print(str(occs_q.all()))
occ_sums
=
occs_q
.
all
()
...
...
@@ -134,13 +140,17 @@ def compute_occs(corpus, overwrite_id = None, groupings_id = None,):
# £TODO make it NodeNgram instead NodeNodeNgram ! and rebase :/
# (idem ti_ranking)
bulk_insert
(
NodeNodeNgram
,
(
'node1_id'
,
'node2_id'
,
'ngram_id'
,
'score'
),
((
the_id
,
corpus
.
id
,
res
[
0
],
res
[
1
])
for
res
in
occ_sums
)
)
return
the_id
if
interactiv
is
False
:
bulk_insert
(
NodeNodeNgram
,
(
'node1_id'
,
'node2_id'
,
'ngram_id'
,
'score'
),
((
the_id
,
corpus
.
id
,
res
[
0
],
res
[
1
])
for
res
in
occ_sums
)
)
return
the_id
else
:
return
[(
res
[
0
],
res
[
1
])
for
res
in
occ_sums
]
def
compute_ti_ranking
(
corpus
,
...
...
gargantext/util/toolchain/ngram_coocs.py
View file @
dfde20b4
...
...
@@ -20,6 +20,7 @@ def compute_coocs( corpus,
stoplist_id
=
None
,
start
=
None
,
end
=
None
,
year
=
None
,
symmetry_filter
=
False
,
diagonal_filter
=
True
):
"""
...
...
@@ -97,14 +98,21 @@ def compute_coocs( corpus,
WHERE
n.typename = {nodetype_id}
AND n.parent_id = {corpus_id}
"""
.
format
(
nodetype_id
=
NODETYPES
.
index
(
'DOCUMENT'
)
,
corpus_id
=
corpus
.
id
)
if
year
:
cooc_filter_sql
+=
"""
AND n.hyperdata -> 'publication_year' = '{year}'
"""
.
format
(
year
=
str
(
year
))
cooc_filter_sql
+=
"""
GROUP BY 1,2
-- ==
-- GROUP BY ngA, ngB
)
"""
.
format
(
nodetype_id
=
NODETYPES
.
index
(
'DOCUMENT'
)
,
corpus_id
=
corpus
.
id
)
"""
# 3) taking the cooccurrences of ngram x2
ngram_filter_A_sql
+=
"""
-- STEP 1: X axis of the matrix
...
...
graph/cooccurrences.py
View file @
dfde20b4
...
...
@@ -230,6 +230,7 @@ def countCooccurrences( corpus_id=None , cooc_id=None
session
.
commit
()
#data = cooc2graph(coocNode.id, cooc, distance=distance, bridgeness=bridgeness)
#return data
else
:
return
cooc
return
(
coocNode
.
id
,
cooc
)
install/notebook/Dockerfile
View file @
dfde20b4
...
...
@@ -106,17 +106,17 @@ RUN apt-get update && apt-get install -y \
libblas-dev
\
liblapack-dev
USER
notebooks
RUN
cd
/home/notebooks
\
&&
curl
-sSL
https://get.haskellstack.org/ | sh
\
&&
stack setup
\
&&
git clone https://github.com/gibiansky/IHaskell
\
&&
.
/env_3-5/bin/activate
\
&&
cd
IHaskell
\
&&
stack
install
gtk2hs-buildtools
\
&&
stack
install
--fast
\
&&
/root/.local/bin/ihaskell
install
--stack
#
USER notebooks
#
#
RUN cd /home/notebooks \
#
&& curl -sSL https://get.haskellstack.org/ | sh \
#
&& stack setup \
#
&& git clone https://github.com/gibiansky/IHaskell \
#
&& . /env_3-5/bin/activate \
#
&& cd IHaskell \
#
&& stack install gtk2hs-buildtools \
#
&& stack install --fast \
#
&& /root/.local/bin/ihaskell install --stack
#
install/notebook/gargantext_notebook.py
View file @
dfde20b4
...
...
@@ -25,7 +25,7 @@ from django.http import Http404
# Import those to be available by notebook user
from
langdetect
import
detect
as
detect_lang
from
gargantext.models
import
UserNode
,
User
import
functools
class
NotebookError
(
Exception
):
pass
...
...
@@ -40,8 +40,11 @@ def documents(corpus_id):
#import seaborn as sns
import
pandas
as
pd
def
countByField
(
docs
,
field
):
return
list
(
Counter
([
doc
.
hyperdata
[
field
]
for
doc
in
docs
])
.
items
())
def
chart
(
docs
,
field
):
year_publis
=
list
(
Counter
([
doc
.
hyperdata
[
field
]
for
doc
in
docs
])
.
items
()
)
year_publis
=
countByField
(
docs
,
field
)
frame0
=
pd
.
DataFrame
(
year_publis
,
columns
=
[
'Date'
,
'DateValue'
])
frame1
=
pd
.
DataFrame
(
year_publis
,
columns
=
[
'Date'
,
'DateValue'
],
index
=
frame0
.
Date
)
return
frame1
...
...
notebooks/.ipynb_checkpoints/AdvancedTutorial-checkpoint.ipynb
View file @
dfde20b4
This diff is collapsed.
Click to expand it.
notebooks/AdvancedTutorial.ipynb
View file @
dfde20b4
This diff is collapsed.
Click to expand it.
start_uwsgi
View file @
dfde20b4
#!/bin/bash
# Script to start uwsgi
# TODO do it as sudo or simple user ?
# Creat the log file at current time
FILE
=
"/var/log/gargantext/uwsgi/
$(
date
+%Y%m%d-%H:%M:%S
)
.log"
touch
${
FILE
}
# Script to start uwsgi
uwsgi /srv/gargantext/gargantext.ini
# add a group here
chmod
o+r
${
FILE
}
echo
"To reload UWSGI: touch /tmp/gargantext.reload"
# finally start the server with that log file
uwsgi gargantext.ini
--logto
${
FILE
}
templates/pages/menu.html
View file @
dfde20b4
...
...
@@ -368,7 +368,7 @@
<p>
Gargantext
<span
class=
"glyphicon glyphicon-registration-mark"
aria-hidden=
"true"
></span>
, version 3.0.
7
,
, version 3.0.
8
,
<a
href=
"http://www.cnrs.fr"
target=
"blank"
title=
"Institution that enables this project."
>
Copyrights
<span
class=
"glyphicon glyphicon-copyright-mark"
aria-hidden=
"true"
></span>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment