Commit 3bc37be6 authored by Administrator's avatar Administrator

Merge branch 'master' into alex

Merge last changes
parents 03a0580b 7d77ae22
......@@ -243,6 +243,7 @@ def project(request, project_id):
# async
corpus.children.filter(type_id=type_document.pk).extract_ngrams(keys=['title',])
#corpus.children.filter(type_id=type_document.pk).extract_ngrams(keys=['title',])
except Exception as error:
print(error)
......@@ -486,24 +487,31 @@ def node_link(request, corpus_id):
matrix = defaultdict(lambda : defaultdict(float))
labels = dict()
weight = dict()
corpus = Node.objects.get(id=corpus_id)
type_cooc = NodeType.objects.get(name="Cooccurrence")
if Node.objects.filter(type=type_cooc, parent=corpus).first() is None:
print("Coocurrences do not exist yet, create it.")
whitelist = create_whitelist(request.user, corpus)
cooc = create_cooc(user=request.user, corpus=corpus, whitelist=whitelist)
print(cooc.id, "Cooc created")
cooccurrence_node = create_cooc(user=request.user, corpus=corpus, whitelist=whitelist)
print(cooccurrence_matrix.id, "Cooc created")
else:
cooc = Node.objects.filter(type=type_cooc, parent=corpus).first()
cooccurrence_node = Node.objects.filter(type=type_cooc, parent=corpus).first()
for cooccurrence in NodeNgramNgram.objects.filter(node=cooc):
for cooccurrence in NodeNgramNgram.objects.filter(node=cooccurrence_node):
labels[cooccurrence.ngramx.id] = cooccurrence.ngramx.terms
labels[cooccurrence.ngramy.id] = cooccurrence.ngramy.terms
matrix[cooccurrence.ngramx.id][cooccurrence.ngramy.id] = cooccurrence.score
matrix[cooccurrence.ngramy.id][cooccurrence.ngramx.id] = cooccurrence.score
weight[cooccurrence.ngramy.terms] = weight.get(cooccurrence.ngramy.terms, 0) + cooccurrence.score
weight[cooccurrence.ngramx.terms] = weight.get(cooccurrence.ngramx.terms, 0) + cooccurrence.score
df = pd.DataFrame(matrix).T.fillna(0)
x = copy(df.values)
x = x / x.sum(axis=1)
......@@ -514,7 +522,7 @@ def node_link(request, corpus_id):
#matrix_filtered = np.where(x > threshold, x, 0)
G = nx.from_numpy_matrix(matrix_filtered)
G = nx.relabel_nodes(G, dict(enumerate([ labels[x] for x in list(df.columns)])))
G = nx.relabel_nodes(G, dict(enumerate([ labels[label] for label in list(df.columns)])))
#G = nx.relabel_nodes(G, dict(enumerate(df.columns)))
# Removing too connected nodes (find automatic way to do it)
......@@ -528,6 +536,7 @@ def node_link(request, corpus_id):
try:
#node,type(labels[node])
G.node[node]['label'] = node
G.node[node]['weight'] = weight[node]
# G.node[node]['color'] = '19,180,300'
except Exception as error:
print(error)
......
......@@ -4,23 +4,30 @@ Install the requirements
1) Install all the Debian packages listed in dependances.deb
(also: sudo apt-get install postgresql-contrib)
2) Create a virtual enironnement with pyvenv: apt-get install python-virtualenv
2) Create a Pythton virtual enironnement
3) Type: source [your virtual environment directory]/bin/activate
4) Do your work!
On Debian:
---------
sudo apt-get install python3.4-venv
pyvenv3 /srv/gargantext_env
5) Type: deactivate
On ubuntu:
---------
sudo apt-get install python-pip
sudo pip install -U pip
sudo pip install -U virtualenv
ensuite tu peux créer ton virtualenv dans le dossier de travail ou à un
endroit choisi :
Configure stuff
---------------
sudo virtualenv -p python3 /srv/gargantext_env
1) ln -s [the project folder] /srv/gargantext
3) Type: source [your virtual environment directory]/bin/activate
2) ln -s [your folder for tree tagger] [the project folder]/parsing/Tagger/treetagger
4) sudo chown -R user:user /srv/gargantext_env
pip install -r /srv/gargantext/init/requirements.txt
Warning: for ln, path has to be absolute!
5) Type: deactivate
In PostreSQL
......@@ -52,6 +59,36 @@ Populate the database
python manage.py syncdb
Last steps of configuration:
----------------------------
1) If your project is not in /srv/gargantext:
ln -s [the project folder] /srv/gargantext
2) build gargantext_lib
wget http://docs.delanoe.org/gargantext_lib.tar.bz2
cd /srv/
sudo tar xvjf gargantext_lib.tar.bz2
sudo chown user:user /srv/gargantext_lib
3) Explorer:
cd /srv/gargantext_lib/js
git clone git@github.com:PkSM3/garg.git
4) Adapt all symlinks:
ln -s [your folder for tree tagger] [the project folder]/parsing/Tagger/treetagger
Warning: for ln, path has to be absolute!
5) patch CTE
patch /srv/gargantext_env/lib/python3.4/site-packages/cte_tree/models.py /srv/gargantext/init/cte_tree.models.diff
6) init nodetypes and main variables
/srv/gargantext/manage.py shell < /srv/gargantext/init/init.py
Extras:
======
Start the Python Notebook server
--------------------------------
......@@ -65,4 +102,4 @@ Start the Django server
-----------------------
In Pyvenv:
python manage.py runserver
\ No newline at end of file
python manage.py runserver
sudo apt-get install postgresql
sudo apt-get install postgresql-contrib
sudo apt-get install python-virtualenv
sudo apt-cache search libpng
sudo apt-get install libpng12-dev
sudo apt-get install libpng-dev
apt-cache search freetype
apt-cache search freetype | grep dev
sudo apt-cache search freetype
sudo apt-get install libfreetype6-dev
sudo apt-cache search python-dev
sudo apt-get install python-dev
sudo apt-get install libpq-dev
sudo apt-get postgresql-contrib
sudo aptèget install libpq-dev
postgresql-contrib
libpq-dev
# rajouter david
#
#
# Pour avoir toutes les dependences de matpolotlib (c'est sale, trouver
sudo apt-get build-dep python-matplotlib
#Paquets Debian a installer
# easy_install -U distribute (matplotlib)
#lxml
libffi-dev
libxml2-dev
libxslt1-dev
sudo apt-get install libffi-dev
sudo apt-get install libxml2-dev
sudo apt-get install libxslt1-dev
# ipython readline
libncurses5-dev
pandoc
sudo apt-get install libncurses5-dev
sudo apt-get install pandoc
# scipy:
gfortran
libopenblas-dev
liblapack-dev
sudo apt-get install gfortran
sudo apt-get install libopenblas-dev
sudo apt-get install liblapack-dev
......@@ -82,6 +82,13 @@ except Exception as error:
typeDoc = NodeType(name='Synonyme')
typeDoc.save()
try:
typeDoc = NodeType.objects.get(name='Cooccurrence')
except Exception as error:
print(error)
typeDoc = NodeType(name='Cooccurrence')
typeDoc.save()
# In[33]:
......
......@@ -4,7 +4,9 @@ Jinja2==2.7.3
MarkupSafe==0.23
Pillow==2.5.3
Pygments==1.6
SQLAlchemy==0.9.8
South==1.0
aldjemy==0.3.51
amqp==1.4.6
anyjson==0.3.3
billiard==3.3.0.18
......
......@@ -168,6 +168,7 @@ class Node(CTENode):
# mark the resources as parsed for this node
self.node_resource.update(parsed=True)
@current_app.task(filter=task_method)
def extract_ngrams(self, keys, ngramsextractorscache=None, ngramscaches=None):
# if there is no cache...
if ngramsextractorscache is None:
......@@ -223,6 +224,18 @@ class Corpus(Node):
proxy=True
verbose_name_plural = 'Corpora'
#class WhiteList(Node):
# class Meta:
# proxy=True
#
#class BlackList(Node):
# class Meta:
# proxy=True
#
#class Synonyms(Node):
# class Meta:
# proxy=True
class Document(Node):
class Meta:
proxy=True
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment