Commit b4b83c05 authored by sim's avatar sim

Major changes in database handling

parent 60e4bce9
from calendar import timegm
from django.conf import settings from django.conf import settings
from rest_framework_jwt.settings import api_settings from rest_framework_jwt.settings import api_settings
from calendar import timegm from gargantext.utils.dates import datetime
from gargantext.util.dates import datetime
def jwt_payload_handler(user): def jwt_payload_handler(user):
username = user.username username = user.username
......
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from gargantext.util.show_nodes import tree_show, nodes from gargantext.utils.show_nodes import tree_show, nodes
import colorama import colorama
......
...@@ -35,9 +35,10 @@ contents: ...@@ -35,9 +35,10 @@ contents:
import os import os
import re import re
import importlib import importlib
from gargantext.util.lists import *
from gargantext.util import datetime, convert_to_datetime
from django.conf import settings from django.conf import settings
from gargantext.utils.lists import *
from gargantext.utils.dates import datetime, convert_to_datetime
# types & models (nodes, lists, hyperdata, resource) --------------------------------------------- # types & models (nodes, lists, hyperdata, resource) ---------------------------------------------
LISTTYPES = { LISTTYPES = {
......
from django.conf import settings import logging
from gargantext.util.json import json_dumps import psycopg2
######################################################################## from sqlalchemy import event
# get engine, session, etc. from sqlalchemy import create_engine
########################################################################
from sqlalchemy.orm import sessionmaker, scoped_session from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy import delete from sqlalchemy.orm.session import Session
def get_engine(): from django.conf import settings
from sqlalchemy import create_engine
return create_engine( settings.DATABASES['default']['SECRET_URL'] from gargantext.utils.json import json_dumps
, use_native_hstore = True
, json_serializer = json_dumps
, pool_size=20, max_overflow=0 __all__ = ['Session', 'session']
)
engine = get_engine() logger = logging.getLogger(__name__)
session = scoped_session(sessionmaker(bind=engine))
class TrackRollbackMixin(object):
######################################################################## """Track whether or not session has been rollbacked"""
# useful for queries
######################################################################## def __init__(self, *args, **kwargs):
from sqlalchemy.orm import aliased super().__init__(*args, **kwargs)
from sqlalchemy import func, desc event.listen(self, 'after_rollback', self._after_rollback, named=True)
from sqlalchemy.sql.expression import case
def close(self):
######################################################################## if self.info.get('is_rollbacked'):
# bulk insertions self.info['is_rollbacked'] = False
######################################################################## super().close()
import psycopg2
@property
def is_rollbacked(self):
return self.info.get('is_rollbacked')
def _after_rollback(self, **kw):
self.info['is_rollbacked'] = True
class ProtectedSession(TrackRollbackMixin, Session):
"""Imitate PostgREST authentication mecanism at each transaction"""
def close(self):
self._logout()
super().close()
logout = close
def login(self, user):
if settings.DEBUG and isinstance(user, str):
# For debugging purposes
from gargantext.models import User
from sqlalchemy.orm.exc import NoResultFound
username = user
try:
user = session.query(User).filter_by(username=username).one()
except NoResultFound:
raise Exception("User %s not found!" % username)
self.user_id = user and user.id
self.user_name = user and user.username
self.role = settings.ROLE_SUPERUSER if user.is_superuser else \
settings.ROLE_STAFF if user.is_staff else \
settings.ROLE_USER if user.id else \
'anon'
logger.debug("Plug authenticator for: %s (%s, %s)" % (
self.role, self.user_name, self.user_id))
event.listen(self, 'after_begin', self._postgres_auth)
def _logout(self):
if event.contains(self, 'after_begin', self._postgres_auth):
logger.debug("Unplug authenticator for: %s (%s, %s)" % (
self.role, self.user_name, self.user_id))
event.remove(self, 'after_begin', self._postgres_auth)
self.connection().info['authenticated'] = False
self.user_id = self.user_name = self.role = None
def _postgres_auth(self, *args, **kwargs):
logger.debug("Authenticate in postgres as %s (%s, %s)" % (
self.role, self.user_name, self.user_id))
ops = [('set role %s', self.role),
('set local "request.jwt.claim.role" = \'%s\'', self.role),
('set local "request.jwt.claim.sub" = %r', self.user_name),
('set local "request.jwt.claim.user_id" = %d', self.user_id)]
sql = ';'.join(s % p for s, p in ops if p) + ';'
conn = self.connection()
conn.info['authenticated'] = True
conn.execute(sql)
# FIXME Keeped for backward compatibility but should be removed
engine = create_engine(settings.DATABASES['default']['SECRET_URL'])
session = scoped_session(sessionmaker(
bind=engine.execution_options(isolation_level='READ COMMITTED'),
autoflush=False))
protected_engine = create_engine(
settings.DATABASES['protected']['SECRET_URL'],
use_native_hstore=True,
json_serializer=json_dumps,
pool_size=20,
max_overflow=0)
Session = sessionmaker(
class_=ProtectedSession,
# This is the default postgresql isolation level, we state it here
# to be explicit
bind=protected_engine.execution_options(isolation_level='READ COMMITTED'),
# Disable autoflush to have more control over transactions
autoflush=False,)
# FIXME Should rewrite bulk queries with SQLAlchemy Core: beware, those
# functions are bypassing row level security!
# See: http://docs.sqlalchemy.org/en/latest/faq/performance.html#i-m-inserting-400-000-rows-with-the-orm-and-it-s-really-slow
def get_cursor(): def get_cursor():
db_settings = settings.DATABASES['default'] db_settings = settings.DATABASES['default']
db = psycopg2.connect(**{ db = psycopg2.connect(**{
...@@ -43,6 +132,7 @@ def get_cursor(): ...@@ -43,6 +132,7 @@ def get_cursor():
}) })
return db, db.cursor() return db, db.cursor()
class bulk_insert: class bulk_insert:
def __init__(self, table, fields, data, cursor=None): def __init__(self, table, fields, data, cursor=None):
# prepare the iterator # prepare the iterator
...@@ -74,6 +164,7 @@ class bulk_insert: ...@@ -74,6 +164,7 @@ class bulk_insert:
readline = read readline = read
def bulk_insert_ifnotexists(model, uniquekey, fields, data, cursor=None, do_stats=False): def bulk_insert_ifnotexists(model, uniquekey, fields, data, cursor=None, do_stats=False):
""" """
Inserts bulk data with an intermediate check on a uniquekey Inserts bulk data with an intermediate check on a uniquekey
...@@ -157,5 +248,3 @@ def bulk_insert_ifnotexists(model, uniquekey, fields, data, cursor=None, do_stat ...@@ -157,5 +248,3 @@ def bulk_insert_ifnotexists(model, uniquekey, fields, data, cursor=None, do_stat
cursor.execute('COMMIT WORK;') cursor.execute('COMMIT WORK;')
cursor.close() cursor.close()
from gargantext.util.db import session from gargantext.core.db import session
from gargantext.constants import NODETYPES, LISTTYPES from gargantext.constants import NODETYPES, LISTTYPES
from datetime import datetime from datetime import datetime
...@@ -39,7 +39,7 @@ class Node(ValidatorMixin, Base): ...@@ -39,7 +39,7 @@ class Node(ValidatorMixin, Base):
<Node(id=None, typename=None, user_id=None, parent_id=None, name='without-type', date=None)> <Node(id=None, typename=None, user_id=None, parent_id=None, name='without-type', date=None)>
>>> Node(typename='CORPUS') >>> Node(typename='CORPUS')
<CorpusNode(id=None, typename='CORPUS', user_id=None, parent_id=None, name=None, date=None)> <CorpusNode(id=None, typename='CORPUS', user_id=None, parent_id=None, name=None, date=None)>
>>> from gargantext.util.db import session >>> from gargantext.core.db import session
>>> session.query(Node).filter_by(typename='USER').first() # doctest: +ELLIPSIS >>> session.query(Node).filter_by(typename='USER').first() # doctest: +ELLIPSIS
<UserNode(...)> <UserNode(...)>
......
from sqlalchemy.orm import aliased
from django.contrib.auth import models from django.contrib.auth import models
from gargantext.util.db import session, aliased from gargantext.core.db import session
from datetime import datetime from datetime import datetime
...@@ -8,6 +9,7 @@ from .base import DjangoBase, Base, Column, ForeignKey, UniqueConstraint, \ ...@@ -8,6 +9,7 @@ from .base import DjangoBase, Base, Column, ForeignKey, UniqueConstraint, \
__all__ = ['User', 'Contact'] __all__ = ['User', 'Contact']
class User(DjangoBase): class User(DjangoBase):
# The properties below are a reflection of Django's auth module's models. # The properties below are a reflection of Django's auth module's models.
__tablename__ = models.User._meta.db_table __tablename__ = models.User._meta.db_table
......
...@@ -11,8 +11,8 @@ https://docs.djangoproject.com/en/1.11/ref/settings/ ...@@ -11,8 +11,8 @@ https://docs.djangoproject.com/en/1.11/ref/settings/
""" """
import os import os
from gargantext.util.config import config
import datetime import datetime
from gargantext.utils.config import config
# Build paths inside the project like this: os.path.join(BASE_DIR, ...) # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
...@@ -118,23 +118,25 @@ LOGGING = { ...@@ -118,23 +118,25 @@ LOGGING = {
# Database # Database
# https://docs.djangoproject.com/en/1.11/ref/settings/#databases # https://docs.djangoproject.com/en/1.11/ref/settings/#databases
DEFAULT_DATABASE = {
'ENGINE': 'django.db.backends.postgresql_psycopg2',
'NAME': config('DB_NAME', default='gargandb'),
'USER': config('DB_USER', default='gargantua'),
'PASSWORD': config('DB_PASS'),
'HOST': config('DB_HOST', default='127.0.0.1'),
'PORT': config('DB_PORT', default='5432'),
}
DATABASES = { DATABASES = {
'default': { 'default': DEFAULT_DATABASE,
'ENGINE': 'django.db.backends.postgresql_psycopg2', 'protected': dict(DEFAULT_DATABASE,
'NAME': config('DB_NAME', default='gargandb'), USER=config('DB_PROTECTED_USER', default='authenticator'),
'USER': config('DB_USER', default='gargantua'), PASSWORD=config('DB_PROTECTED_PASS'))
'PASSWORD': config('DB_PASS'),
'HOST': config('DB_HOST', default='127.0.0.1'),
'PORT': config('DB_PORT', default='5432'),
'TEST': {
'NAME': 'test_gargandb',
},
}
} }
DATABASES['default']['SECRET_URL'] = \ for db in DATABASES:
'postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}:{PORT}/{NAME}'.format( DATABASES[db]['SECRET_URL'] = \
**DATABASES['default'] 'postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}:{PORT}/{NAME}'.format(
) **DATABASES[db]
)
# Password validation # Password validation
......
from .dates import datetime, convert_to_datetime, MINYEAR
import json import json
import types
import datetime import datetime
import traceback import traceback
import inspect
__all__ = ['json_encoder', 'json_dumps'] __all__ = ['json_encoder', 'json_dumps']
...@@ -25,10 +23,11 @@ class JSONEncoder(json.JSONEncoder): ...@@ -25,10 +23,11 @@ class JSONEncoder(json.JSONEncoder):
elif hasattr(obj, '__iter__') and not isinstance(obj, dict): elif hasattr(obj, '__iter__') and not isinstance(obj, dict):
return list(obj) return list(obj)
else: else:
return super(self.__class__, self).default(obj) return super().default(obj)
json_encoder = JSONEncoder()
# json_encoder = JSONEncoder(indent=4)
json_encoder = JSONEncoder() # compact json
def json_dumps(obj): def json_dumps(obj):
return json.dumps(obj, cls=JSONEncoder) return json.dumps(obj, cls=JSONEncoder)
...@@ -2,15 +2,13 @@ ...@@ -2,15 +2,13 @@
""" """
__all__ = ['Translations', 'WeightedMatrix', 'UnweightedList', 'WeightedList', 'WeightedIndex']
from gargantext.util.db import session, bulk_insert
from collections import defaultdict from collections import defaultdict
from math import sqrt from math import sqrt
from gargantext.core.db import session, bulk_insert
__all__ = ['Translations', 'WeightedMatrix', 'UnweightedList', 'WeightedList', 'WeightedIndex']
class _BaseClass: class _BaseClass:
...@@ -303,6 +301,7 @@ class WeightedMatrix(_BaseClass): ...@@ -303,6 +301,7 @@ class WeightedMatrix(_BaseClass):
result.items[key1, key2] = value / sqrt(other.items[key1] * other.items[key2]) result.items[key1, key2] = value / sqrt(other.items[key1] * other.items[key2])
return result return result
# ?TODO rename Wordlist # ?TODO rename Wordlist
class UnweightedList(_BaseClass): class UnweightedList(_BaseClass):
......
...@@ -13,8 +13,10 @@ import itertools ...@@ -13,8 +13,10 @@ import itertools
import colorama import colorama
from colorama import Fore from colorama import Fore
from sqlalchemy.sql.expression import literal_column from sqlalchemy.sql.expression import literal_column
from sqlalchemy.orm import aliased
from sqlalchemy import func
from gargantext.util.db import session, func, aliased from gargantext.core.db import session
from gargantext.models import Node from gargantext.models import Node
......
...@@ -14,6 +14,8 @@ DB_PORT = {DB_PORT} ...@@ -14,6 +14,8 @@ DB_PORT = {DB_PORT}
DB_NAME = {DB_NAME} DB_NAME = {DB_NAME}
DB_USER = {DB_USER} DB_USER = {DB_USER}
DB_PASS = {DB_PASS} DB_PASS = {DB_PASS}
DB_PROTECTED_USER = {DB_PROTECTED_USER}
DB_PROTECTED_PASS = {DB_PROTECTED_PASS}
# Logs # Logs
LOG_FILE = /var/log/gargantext/backend/django.log LOG_FILE = /var/log/gargantext/backend/django.log
LOG_LEVEL = {LOG_LEVEL} LOG_LEVEL = {LOG_LEVEL}
......
...@@ -165,6 +165,8 @@ sed -E -e "s/[{]DEBUG[}]/$DEBUG/g" \ ...@@ -165,6 +165,8 @@ sed -E -e "s/[{]DEBUG[}]/$DEBUG/g" \
-e "s/[{]DB_NAME[}]/$DB_NAME/g" \ -e "s/[{]DB_NAME[}]/$DB_NAME/g" \
-e "s/[{]DB_USER[}]/$DB_USER/g" \ -e "s/[{]DB_USER[}]/$DB_USER/g" \
-e "s/[{]DB_PASS[}]/$DB_PASS/g" \ -e "s/[{]DB_PASS[}]/$DB_PASS/g" \
-e "s/[{]DB_PROTECTED_USER[}]/$PGREST_USER/g" \
-e "s/[{]DB_PROTECTED_PASS[}]/$PGREST_PASS/g" \
-e "s/[{]LOG_LEVEL[}]/$LOG_LEVEL/g" \ -e "s/[{]LOG_LEVEL[}]/$LOG_LEVEL/g" \
-e "s/[{]VENV[}]/$VENV/g" \ -e "s/[{]VENV[}]/$VENV/g" \
"$GARGANTEXT_TEMPLATE" > "$GARGANTEXT_CONF" \ "$GARGANTEXT_TEMPLATE" > "$GARGANTEXT_CONF" \
...@@ -183,6 +185,6 @@ if ! $DB_ACCESS; then ...@@ -183,6 +185,6 @@ if ! $DB_ACCESS; then
fi fi
if ! $PGREST_AUTO; then if ! $PGREST_AUTO; then
echo "WARNING: Didn't configure PostgREST user $PGREST_USER," \ echo "WARNING: You didn't configure protected user $PGREST_USER," \
"you may need to edit $POSTGREST_CONF manually." "you may need to edit $POSTGREST_CONF and $GARGANTEXT_CONF manually."
fi fi
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment