Commit 832351f8 authored by Administrator's avatar Administrator

Merge branch 'mat' of ssh://delanoe.org:1979/gargantext

parents 4387de88 2ca5116a
{
"metadata": {
"name": "",
"signature": "sha256:7c80ed9f4b088e13444efb451a1ee46e5727247be14aaf30ddf0236a49ac461b"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": []
}
\ No newline at end of file
{
"metadata": {
"name": "",
"signature": "sha256:e0c3b2efe7c205a29dc4e028b10ffb7b9d0569f35c4b426febdf523069abffdb"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from pprint import pprint\n",
"from node.models import Node, NodeType, Language, Ngram\n",
"from django.contrib.auth.models import User\n",
"import parsing\n",
"from parsing.FileParsers import *"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Define user\n",
"try:\n",
" user = User.objects.get(username='Mat')\n",
"except:\n",
" user = User(username='Mat', password='0123', email='mathieu@rodic.fr')\n",
" user.save()\n",
"\n",
"# Define document types\n",
"nodetypes = {}\n",
"for name in ['Corpus', 'Document']:\n",
" try:\n",
" nodetypes[name] = NodeType.objects.get(name=name)\n",
" except:\n",
" nodetypes[name] = NodeType(name=name)\n",
" nodetypes[name].save()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Node.objects.all().delete()\n",
"corpus = Node(name='PubMed corpus', user=user, type=nodetypes['Corpus'])\n",
"corpus.save()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fileparser = PubmedFileParser.PubmedFileParser(file='/home/mat/projects/gargantext/data_samples/pubmed.zip')"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fileparser.parse(corpus)\n",
"print('Ok!')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Warning: parsing empty text\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Warning: parsing empty text\n",
"Warning: parsing empty text"
]
}
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for node_ngram in corpus.children.first().node_ngram_set.all():\n",
" print(node_ngram.ngram.terms)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
\ No newline at end of file
Install the requirements
------------------------
1) Install all the Debian packages listed in dependances.deb
(also: sudo apt-get install postgresql-contrib)
2) Create a virtual enironnement with pyvenv: apt-get install python-virtualenv
3) Type: source [your virtual environment directory]/bin/activate
4) Do your work!
5) Type: deactivate
Configure stuff
---------------
1) ln -s [the project folder] /srv/gargantext
2) ln -s [your folder for tree tagger] [the project folder]/parsing/Tagger/treetagger
Warning: for ln, path has to be absolute!
In PostreSQL
-------------
1) Ensure postgres is started: sudo /etc/init.d/postgresql start
2) sudo su postgres
3) psql
4) CREATE USER alexandre WITH PASSWORD 'C8kdcUrAQy66U';
(see gargantext_web/settings.py, DATABASES = { ... })
5) CREATE DATABASE gargandb WITH OWNER alexandre;
6) Ctrl + D
7) psql gargandb
6) CREATE EXTENSION hstore;
7) Ctrl + D
Populate the database
---------------------
python manage.py syncdb
Start the Python Notebook server
--------------------------------
1) In Pyvenv: python manage.py shell_plus --notebook
2) Work from your browser!
Start the Django server
-----------------------
python manage.py runserver
\ No newline at end of file
...@@ -114,8 +114,7 @@ class FileParser: ...@@ -114,8 +114,7 @@ class FileParser:
# we are already in a transaction, so no use doing another one (or is there?) # we are already in a transaction, so no use doing another one (or is there?)
ngramcache = self._ngramcaches[language] ngramcache = self._ngramcaches[language]
for terms, occurences in ngrams.items(): for terms, occurences in ngrams.items():
ngram_text = ' '.join([term[0] for term in terms]) ngram = ngramcache[terms]
ngram = ngramcache[ngram_text]
Node_Ngram( Node_Ngram(
node = childNode, node = childNode,
ngram = ngram, ngram = ngram,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment