Commit c2672e4a authored by sim's avatar sim

Major changes in database handling

parent 60e4bce9
from calendar import timegm
from django.conf import settings from django.conf import settings
from rest_framework_jwt.settings import api_settings from rest_framework_jwt.settings import api_settings
from calendar import timegm from gargantext.utils.dates import datetime
from gargantext.util.dates import datetime
def jwt_payload_handler(user): def jwt_payload_handler(user):
username = user.username username = user.username
......
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from gargantext.util.show_nodes import tree_show, nodes from gargantext.utils.show_nodes import tree_show, nodes
import colorama import colorama
......
...@@ -35,9 +35,10 @@ contents: ...@@ -35,9 +35,10 @@ contents:
import os import os
import re import re
import importlib import importlib
from gargantext.util.lists import *
from gargantext.util import datetime, convert_to_datetime
from django.conf import settings from django.conf import settings
from gargantext.utils.lists import *
from gargantext.utils.dates import datetime, convert_to_datetime
# types & models (nodes, lists, hyperdata, resource) --------------------------------------------- # types & models (nodes, lists, hyperdata, resource) ---------------------------------------------
LISTTYPES = { LISTTYPES = {
......
from django.conf import settings import logging
from gargantext.util.json import json_dumps import psycopg2
######################################################################## from sqlalchemy import event
# get engine, session, etc. from sqlalchemy import create_engine
########################################################################
from sqlalchemy.orm import sessionmaker, scoped_session from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy import delete from sqlalchemy.orm.session import Session
def get_engine(): from django.conf import settings
from sqlalchemy import create_engine
return create_engine( settings.DATABASES['default']['SECRET_URL'] from gargantext.utils.json import json_dumps
, use_native_hstore = True
, json_serializer = json_dumps
, pool_size=20, max_overflow=0 __all__ = ['Session', 'session']
)
engine = get_engine() logger = logging.getLogger(__name__)
session = scoped_session(sessionmaker(bind=engine))
class TrackRollbackMixin(object):
######################################################################## """Track whether or not session has been rollbacked"""
# useful for queries
######################################################################## def __init__(self, *args, **kwargs):
from sqlalchemy.orm import aliased super().__init__(*args, **kwargs)
from sqlalchemy import func, desc event.listen(self, 'after_rollback', self._after_rollback, named=True)
from sqlalchemy.sql.expression import case
def close(self):
######################################################################## if self.info.get('is_rollbacked'):
# bulk insertions self.info['is_rollbacked'] = False
######################################################################## super().close()
import psycopg2
@property
def is_rollbacked(self):
return self.info.get('is_rollbacked')
def _after_rollback(self, **kw):
self.info['is_rollbacked'] = True
class ProtectedSession(TrackRollbackMixin, Session):
"""Imitate PostgREST authentication mecanism at each transaction"""
def close(self):
self._logout()
super().close()
logout = close
def login(self, user):
if settings.DEBUG and isinstance(user, str):
# For debugging purposes
from gargantext.models import User
from sqlalchemy.orm.exc import NoResultFound
username = user
try:
user = session.query(User).filter_by(username=username).one()
except NoResultFound:
raise Exception("User %s not found!" % username)
self.user_id = user and user.id
self.user_name = user and user.username
self.role = settings.ROLE_SUPERUSER if user.is_superuser else \
settings.ROLE_STAFF if user.is_staff else \
settings.ROLE_USER if user.id else \
'anon'
logger.debug("Plug authenticator for: %s (%s, %s)" % (
self.role, self.user_name, self.user_id))
event.listen(self, 'after_begin', self._postgres_auth)
def _logout(self):
if event.contains(self, 'after_begin', self._postgres_auth):
logger.debug("Unplug authenticator for: %s (%s, %s)" % (
self.role, self.user_name, self.user_id))
event.remove(self, 'after_begin', self._postgres_auth)
self.connection().info['authenticated'] = False
self.user_id = self.user_name = self.role = None
def _postgres_auth(self, *args, **kwargs):
logger.debug("Authenticate in postgres as %s (%s, %s)" % (
self.role, self.user_name, self.user_id))
ops = [('set role %s', self.role),
('set local "request.jwt.claim.role" = \'%s\'', self.role),
('set local "request.jwt.claim.sub" = %r', self.user_name),
('set local "request.jwt.claim.user_id" = %d', self.user_id)]
sql = ';'.join(s % p for s, p in ops if p) + ';'
conn = self.connection()
conn.info['authenticated'] = True
conn.execute(sql)
# FIXME Keeped for backward compatibility but should be removed
engine = create_engine(settings.DATABASES['default']['SECRET_URL'])
session = scoped_session(sessionmaker(
bind=engine.execution_options(isolation_level='READ COMMITTED'),
autoflush=False))
protected_engine = create_engine(
settings.DATABASES['protected']['SECRET_URL'],
use_native_hstore=True,
json_serializer=json_dumps,
pool_size=20,
max_overflow=0)
Session = sessionmaker(
class_=ProtectedSession,
# This is the default postgresql isolation level, we state it here
# to be explicit
bind=protected_engine.execution_options(isolation_level='READ COMMITTED'),
# Disable autoflush to have more control over transactions
autoflush=False,)
# FIXME Should rewrite bulk queries with SQLAlchemy Core: beware, those
# functions are bypassing row level security!
# See: http://docs.sqlalchemy.org/en/latest/faq/performance.html#i-m-inserting-400-000-rows-with-the-orm-and-it-s-really-slow
def get_cursor(): def get_cursor():
db_settings = settings.DATABASES['default'] db_settings = settings.DATABASES['default']
db = psycopg2.connect(**{ db = psycopg2.connect(**{
...@@ -43,6 +132,7 @@ def get_cursor(): ...@@ -43,6 +132,7 @@ def get_cursor():
}) })
return db, db.cursor() return db, db.cursor()
class bulk_insert: class bulk_insert:
def __init__(self, table, fields, data, cursor=None): def __init__(self, table, fields, data, cursor=None):
# prepare the iterator # prepare the iterator
...@@ -74,6 +164,7 @@ class bulk_insert: ...@@ -74,6 +164,7 @@ class bulk_insert:
readline = read readline = read
def bulk_insert_ifnotexists(model, uniquekey, fields, data, cursor=None, do_stats=False): def bulk_insert_ifnotexists(model, uniquekey, fields, data, cursor=None, do_stats=False):
""" """
Inserts bulk data with an intermediate check on a uniquekey Inserts bulk data with an intermediate check on a uniquekey
...@@ -157,5 +248,3 @@ def bulk_insert_ifnotexists(model, uniquekey, fields, data, cursor=None, do_stat ...@@ -157,5 +248,3 @@ def bulk_insert_ifnotexists(model, uniquekey, fields, data, cursor=None, do_stat
cursor.execute('COMMIT WORK;') cursor.execute('COMMIT WORK;')
cursor.close() cursor.close()
from gargantext.util.db import session from gargantext.core.db import session
from gargantext.constants import NODETYPES, LISTTYPES from gargantext.constants import NODETYPES, LISTTYPES
from datetime import datetime from datetime import datetime
...@@ -39,7 +39,7 @@ class Node(ValidatorMixin, Base): ...@@ -39,7 +39,7 @@ class Node(ValidatorMixin, Base):
<Node(id=None, typename=None, user_id=None, parent_id=None, name='without-type', date=None)> <Node(id=None, typename=None, user_id=None, parent_id=None, name='without-type', date=None)>
>>> Node(typename='CORPUS') >>> Node(typename='CORPUS')
<CorpusNode(id=None, typename='CORPUS', user_id=None, parent_id=None, name=None, date=None)> <CorpusNode(id=None, typename='CORPUS', user_id=None, parent_id=None, name=None, date=None)>
>>> from gargantext.util.db import session >>> from gargantext.core.db import session
>>> session.query(Node).filter_by(typename='USER').first() # doctest: +ELLIPSIS >>> session.query(Node).filter_by(typename='USER').first() # doctest: +ELLIPSIS
<UserNode(...)> <UserNode(...)>
......
from sqlalchemy.orm import aliased
from django.contrib.auth import models from django.contrib.auth import models
from gargantext.util.db import session, aliased from gargantext.core.db import session
from datetime import datetime from datetime import datetime
...@@ -8,6 +9,7 @@ from .base import DjangoBase, Base, Column, ForeignKey, UniqueConstraint, \ ...@@ -8,6 +9,7 @@ from .base import DjangoBase, Base, Column, ForeignKey, UniqueConstraint, \
__all__ = ['User', 'Contact'] __all__ = ['User', 'Contact']
class User(DjangoBase): class User(DjangoBase):
# The properties below are a reflection of Django's auth module's models. # The properties below are a reflection of Django's auth module's models.
__tablename__ = models.User._meta.db_table __tablename__ = models.User._meta.db_table
......
...@@ -11,8 +11,8 @@ https://docs.djangoproject.com/en/1.11/ref/settings/ ...@@ -11,8 +11,8 @@ https://docs.djangoproject.com/en/1.11/ref/settings/
""" """
import os import os
from gargantext.util.config import config
import datetime import datetime
from gargantext.utils.config import config
# Build paths inside the project like this: os.path.join(BASE_DIR, ...) # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
...@@ -118,23 +118,25 @@ LOGGING = { ...@@ -118,23 +118,25 @@ LOGGING = {
# Database # Database
# https://docs.djangoproject.com/en/1.11/ref/settings/#databases # https://docs.djangoproject.com/en/1.11/ref/settings/#databases
DEFAULT_DATABASE = {
'ENGINE': 'django.db.backends.postgresql_psycopg2',
'NAME': config('DB_NAME', default='gargandb'),
'USER': config('DB_USER', default='gargantua'),
'PASSWORD': config('DB_PASS'),
'HOST': config('DB_HOST', default='127.0.0.1'),
'PORT': config('DB_PORT', default='5432'),
}
DATABASES = { DATABASES = {
'default': { 'default': DEFAULT_DATABASE,
'ENGINE': 'django.db.backends.postgresql_psycopg2', 'protected': dict(DEFAULT_DATABASE,
'NAME': config('DB_NAME', default='gargandb'), USER=config('DB_PROTECTED_USER', default='authenticator'),
'USER': config('DB_USER', default='gargantua'), PASSWORD=config('DB_PROTECTED_PASS'))
'PASSWORD': config('DB_PASS'),
'HOST': config('DB_HOST', default='127.0.0.1'),
'PORT': config('DB_PORT', default='5432'),
'TEST': {
'NAME': 'test_gargandb',
},
}
} }
DATABASES['default']['SECRET_URL'] = \ for db in DATABASES:
'postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}:{PORT}/{NAME}'.format( DATABASES[db]['SECRET_URL'] = \
**DATABASES['default'] 'postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}:{PORT}/{NAME}'.format(
) **DATABASES[db]
)
# Password validation # Password validation
......
from .dates import datetime, convert_to_datetime, MINYEAR
import json import json
import types
import datetime import datetime
import traceback import traceback
import inspect
__all__ = ['json_encoder', 'json_dumps'] __all__ = ['json_encoder', 'json_dumps']
...@@ -25,10 +23,11 @@ class JSONEncoder(json.JSONEncoder): ...@@ -25,10 +23,11 @@ class JSONEncoder(json.JSONEncoder):
elif hasattr(obj, '__iter__') and not isinstance(obj, dict): elif hasattr(obj, '__iter__') and not isinstance(obj, dict):
return list(obj) return list(obj)
else: else:
return super(self.__class__, self).default(obj) return super().default(obj)
json_encoder = JSONEncoder()
# json_encoder = JSONEncoder(indent=4)
json_encoder = JSONEncoder() # compact json
def json_dumps(obj): def json_dumps(obj):
return json.dumps(obj, cls=JSONEncoder) return json.dumps(obj, cls=JSONEncoder)
...@@ -2,15 +2,13 @@ ...@@ -2,15 +2,13 @@
""" """
__all__ = ['Translations', 'WeightedMatrix', 'UnweightedList', 'WeightedList', 'WeightedIndex']
from gargantext.util.db import session, bulk_insert
from collections import defaultdict from collections import defaultdict
from math import sqrt from math import sqrt
from gargantext.core.db import session, bulk_insert
__all__ = ['Translations', 'WeightedMatrix', 'UnweightedList', 'WeightedList', 'WeightedIndex']
class _BaseClass: class _BaseClass:
...@@ -303,6 +301,7 @@ class WeightedMatrix(_BaseClass): ...@@ -303,6 +301,7 @@ class WeightedMatrix(_BaseClass):
result.items[key1, key2] = value / sqrt(other.items[key1] * other.items[key2]) result.items[key1, key2] = value / sqrt(other.items[key1] * other.items[key2])
return result return result
# ?TODO rename Wordlist # ?TODO rename Wordlist
class UnweightedList(_BaseClass): class UnweightedList(_BaseClass):
......
...@@ -13,8 +13,10 @@ import itertools ...@@ -13,8 +13,10 @@ import itertools
import colorama import colorama
from colorama import Fore from colorama import Fore
from sqlalchemy.sql.expression import literal_column from sqlalchemy.sql.expression import literal_column
from sqlalchemy.orm import aliased
from sqlalchemy import func
from gargantext.util.db import session, func, aliased from gargantext.core.db import session
from gargantext.models import Node from gargantext.models import Node
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment