Commit b4b83c05 authored by sim's avatar sim

Major changes in database handling

parent 60e4bce9
from calendar import timegm
from django.conf import settings
from rest_framework_jwt.settings import api_settings
from calendar import timegm
from gargantext.utils.dates import datetime
from gargantext.util.dates import datetime
def jwt_payload_handler(user):
username = user.username
......
from django.core.management.base import BaseCommand
from gargantext.util.show_nodes import tree_show, nodes
from gargantext.utils.show_nodes import tree_show, nodes
import colorama
......
......@@ -35,9 +35,10 @@ contents:
import os
import re
import importlib
from gargantext.util.lists import *
from gargantext.util import datetime, convert_to_datetime
from django.conf import settings
from gargantext.utils.lists import *
from gargantext.utils.dates import datetime, convert_to_datetime
# types & models (nodes, lists, hyperdata, resource) ---------------------------------------------
LISTTYPES = {
......
from django.conf import settings
from gargantext.util.json import json_dumps
import logging
import psycopg2
########################################################################
# get engine, session, etc.
########################################################################
from sqlalchemy import event
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy import delete
def get_engine():
from sqlalchemy import create_engine
return create_engine( settings.DATABASES['default']['SECRET_URL']
, use_native_hstore = True
, json_serializer = json_dumps
, pool_size=20, max_overflow=0
)
engine = get_engine()
session = scoped_session(sessionmaker(bind=engine))
########################################################################
# useful for queries
########################################################################
from sqlalchemy.orm import aliased
from sqlalchemy import func, desc
from sqlalchemy.sql.expression import case
########################################################################
# bulk insertions
########################################################################
import psycopg2
from sqlalchemy.orm.session import Session
from django.conf import settings
from gargantext.utils.json import json_dumps
__all__ = ['Session', 'session']
logger = logging.getLogger(__name__)
class TrackRollbackMixin(object):
"""Track whether or not session has been rollbacked"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
event.listen(self, 'after_rollback', self._after_rollback, named=True)
def close(self):
if self.info.get('is_rollbacked'):
self.info['is_rollbacked'] = False
super().close()
@property
def is_rollbacked(self):
return self.info.get('is_rollbacked')
def _after_rollback(self, **kw):
self.info['is_rollbacked'] = True
class ProtectedSession(TrackRollbackMixin, Session):
"""Imitate PostgREST authentication mecanism at each transaction"""
def close(self):
self._logout()
super().close()
logout = close
def login(self, user):
if settings.DEBUG and isinstance(user, str):
# For debugging purposes
from gargantext.models import User
from sqlalchemy.orm.exc import NoResultFound
username = user
try:
user = session.query(User).filter_by(username=username).one()
except NoResultFound:
raise Exception("User %s not found!" % username)
self.user_id = user and user.id
self.user_name = user and user.username
self.role = settings.ROLE_SUPERUSER if user.is_superuser else \
settings.ROLE_STAFF if user.is_staff else \
settings.ROLE_USER if user.id else \
'anon'
logger.debug("Plug authenticator for: %s (%s, %s)" % (
self.role, self.user_name, self.user_id))
event.listen(self, 'after_begin', self._postgres_auth)
def _logout(self):
if event.contains(self, 'after_begin', self._postgres_auth):
logger.debug("Unplug authenticator for: %s (%s, %s)" % (
self.role, self.user_name, self.user_id))
event.remove(self, 'after_begin', self._postgres_auth)
self.connection().info['authenticated'] = False
self.user_id = self.user_name = self.role = None
def _postgres_auth(self, *args, **kwargs):
logger.debug("Authenticate in postgres as %s (%s, %s)" % (
self.role, self.user_name, self.user_id))
ops = [('set role %s', self.role),
('set local "request.jwt.claim.role" = \'%s\'', self.role),
('set local "request.jwt.claim.sub" = %r', self.user_name),
('set local "request.jwt.claim.user_id" = %d', self.user_id)]
sql = ';'.join(s % p for s, p in ops if p) + ';'
conn = self.connection()
conn.info['authenticated'] = True
conn.execute(sql)
# FIXME Keeped for backward compatibility but should be removed
engine = create_engine(settings.DATABASES['default']['SECRET_URL'])
session = scoped_session(sessionmaker(
bind=engine.execution_options(isolation_level='READ COMMITTED'),
autoflush=False))
protected_engine = create_engine(
settings.DATABASES['protected']['SECRET_URL'],
use_native_hstore=True,
json_serializer=json_dumps,
pool_size=20,
max_overflow=0)
Session = sessionmaker(
class_=ProtectedSession,
# This is the default postgresql isolation level, we state it here
# to be explicit
bind=protected_engine.execution_options(isolation_level='READ COMMITTED'),
# Disable autoflush to have more control over transactions
autoflush=False,)
# FIXME Should rewrite bulk queries with SQLAlchemy Core: beware, those
# functions are bypassing row level security!
# See: http://docs.sqlalchemy.org/en/latest/faq/performance.html#i-m-inserting-400-000-rows-with-the-orm-and-it-s-really-slow
def get_cursor():
db_settings = settings.DATABASES['default']
db = psycopg2.connect(**{
......@@ -43,6 +132,7 @@ def get_cursor():
})
return db, db.cursor()
class bulk_insert:
def __init__(self, table, fields, data, cursor=None):
# prepare the iterator
......@@ -74,6 +164,7 @@ class bulk_insert:
readline = read
def bulk_insert_ifnotexists(model, uniquekey, fields, data, cursor=None, do_stats=False):
"""
Inserts bulk data with an intermediate check on a uniquekey
......@@ -157,5 +248,3 @@ def bulk_insert_ifnotexists(model, uniquekey, fields, data, cursor=None, do_stat
cursor.execute('COMMIT WORK;')
cursor.close()
from gargantext.util.db import session
from gargantext.core.db import session
from gargantext.constants import NODETYPES, LISTTYPES
from datetime import datetime
......@@ -39,7 +39,7 @@ class Node(ValidatorMixin, Base):
<Node(id=None, typename=None, user_id=None, parent_id=None, name='without-type', date=None)>
>>> Node(typename='CORPUS')
<CorpusNode(id=None, typename='CORPUS', user_id=None, parent_id=None, name=None, date=None)>
>>> from gargantext.util.db import session
>>> from gargantext.core.db import session
>>> session.query(Node).filter_by(typename='USER').first() # doctest: +ELLIPSIS
<UserNode(...)>
......
from sqlalchemy.orm import aliased
from django.contrib.auth import models
from gargantext.util.db import session, aliased
from gargantext.core.db import session
from datetime import datetime
......@@ -8,6 +9,7 @@ from .base import DjangoBase, Base, Column, ForeignKey, UniqueConstraint, \
__all__ = ['User', 'Contact']
class User(DjangoBase):
# The properties below are a reflection of Django's auth module's models.
__tablename__ = models.User._meta.db_table
......
......@@ -11,8 +11,8 @@ https://docs.djangoproject.com/en/1.11/ref/settings/
"""
import os
from gargantext.util.config import config
import datetime
from gargantext.utils.config import config
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
......@@ -118,23 +118,25 @@ LOGGING = {
# Database
# https://docs.djangoproject.com/en/1.11/ref/settings/#databases
DEFAULT_DATABASE = {
'ENGINE': 'django.db.backends.postgresql_psycopg2',
'NAME': config('DB_NAME', default='gargandb'),
'USER': config('DB_USER', default='gargantua'),
'PASSWORD': config('DB_PASS'),
'HOST': config('DB_HOST', default='127.0.0.1'),
'PORT': config('DB_PORT', default='5432'),
}
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.postgresql_psycopg2',
'NAME': config('DB_NAME', default='gargandb'),
'USER': config('DB_USER', default='gargantua'),
'PASSWORD': config('DB_PASS'),
'HOST': config('DB_HOST', default='127.0.0.1'),
'PORT': config('DB_PORT', default='5432'),
'TEST': {
'NAME': 'test_gargandb',
},
}
'default': DEFAULT_DATABASE,
'protected': dict(DEFAULT_DATABASE,
USER=config('DB_PROTECTED_USER', default='authenticator'),
PASSWORD=config('DB_PROTECTED_PASS'))
}
DATABASES['default']['SECRET_URL'] = \
'postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}:{PORT}/{NAME}'.format(
**DATABASES['default']
)
for db in DATABASES:
DATABASES[db]['SECRET_URL'] = \
'postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}:{PORT}/{NAME}'.format(
**DATABASES[db]
)
# Password validation
......
from .dates import datetime, convert_to_datetime, MINYEAR
import json
import types
import datetime
import traceback
import inspect
__all__ = ['json_encoder', 'json_dumps']
......@@ -25,10 +23,11 @@ class JSONEncoder(json.JSONEncoder):
elif hasattr(obj, '__iter__') and not isinstance(obj, dict):
return list(obj)
else:
return super(self.__class__, self).default(obj)
return super().default(obj)
json_encoder = JSONEncoder()
# json_encoder = JSONEncoder(indent=4)
json_encoder = JSONEncoder() # compact json
def json_dumps(obj):
return json.dumps(obj, cls=JSONEncoder)
......@@ -2,15 +2,13 @@
"""
__all__ = ['Translations', 'WeightedMatrix', 'UnweightedList', 'WeightedList', 'WeightedIndex']
from gargantext.util.db import session, bulk_insert
from collections import defaultdict
from math import sqrt
from gargantext.core.db import session, bulk_insert
__all__ = ['Translations', 'WeightedMatrix', 'UnweightedList', 'WeightedList', 'WeightedIndex']
class _BaseClass:
......@@ -303,6 +301,7 @@ class WeightedMatrix(_BaseClass):
result.items[key1, key2] = value / sqrt(other.items[key1] * other.items[key2])
return result
# ?TODO rename Wordlist
class UnweightedList(_BaseClass):
......
......@@ -13,8 +13,10 @@ import itertools
import colorama
from colorama import Fore
from sqlalchemy.sql.expression import literal_column
from sqlalchemy.orm import aliased
from sqlalchemy import func
from gargantext.util.db import session, func, aliased
from gargantext.core.db import session
from gargantext.models import Node
......
......@@ -14,6 +14,8 @@ DB_PORT = {DB_PORT}
DB_NAME = {DB_NAME}
DB_USER = {DB_USER}
DB_PASS = {DB_PASS}
DB_PROTECTED_USER = {DB_PROTECTED_USER}
DB_PROTECTED_PASS = {DB_PROTECTED_PASS}
# Logs
LOG_FILE = /var/log/gargantext/backend/django.log
LOG_LEVEL = {LOG_LEVEL}
......
......@@ -165,6 +165,8 @@ sed -E -e "s/[{]DEBUG[}]/$DEBUG/g" \
-e "s/[{]DB_NAME[}]/$DB_NAME/g" \
-e "s/[{]DB_USER[}]/$DB_USER/g" \
-e "s/[{]DB_PASS[}]/$DB_PASS/g" \
-e "s/[{]DB_PROTECTED_USER[}]/$PGREST_USER/g" \
-e "s/[{]DB_PROTECTED_PASS[}]/$PGREST_PASS/g" \
-e "s/[{]LOG_LEVEL[}]/$LOG_LEVEL/g" \
-e "s/[{]VENV[}]/$VENV/g" \
"$GARGANTEXT_TEMPLATE" > "$GARGANTEXT_CONF" \
......@@ -183,6 +185,6 @@ if ! $DB_ACCESS; then
fi
if ! $PGREST_AUTO; then
echo "WARNING: Didn't configure PostgREST user $PGREST_USER," \
"you may need to edit $POSTGREST_CONF manually."
echo "WARNING: You didn't configure protected user $PGREST_USER," \
"you may need to edit $POSTGREST_CONF and $GARGANTEXT_CONF manually."
fi
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment