Commit 78e9b062 authored by delanoe's avatar delanoe

Merge branch 'simon-unstable' into unstable

parents 76f06436 39666398
# A generic, single database configuration.
[alembic]
# path to migration scripts
script_location = alembic
# template used to generate migration files
# file_template = %%(rev)s_%%(slug)s
# timezone to use when rendering the date
# within the migration file as well as the filename.
# string value is passed to dateutil.tz.gettz()
# leave blank for localtime
# timezone =
# max length of characters to apply to the
# "slug" field
#truncate_slug_length = 40
# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false
# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
# sourceless = false
# version location specification; this defaults
# to alembic/versions. When using multiple version
# directories, initial revisions must be specified with --version-path
# version_locations = %(here)s/bar %(here)s/bat alembic/versions
# the output encoding used when revision files
# are written from script.py.mako
# output_encoding = utf-8
# XXX For database access configuration, see alembic/env.py
#sqlalchemy.url = driver://user:pass@localhost/dbname
[alembic:exclude]
tables = django_* celery_* djcelery_* auth_*
# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
qualname =
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S
Alembic must be installed in the virtualenv in order to use right python paths,
so it's installed with pip. Commands described in this little documentation
must be executed from gargantext root directory, ie. /srv/gargantext.
Keep in mind that Alembic only handles SQLAlchemy models: tables created from
Django ORM must be put out of Alembic sight. See [alembic:exclude] section in
alembic.ini.
TELL ALEMBIC TO NOT START FROM SCRATCH
# To upgrade a database populated before Alembic usage in Gargantext,
# don't forget to tell Alembic your current version before to run
# "upgrade head" command. If you don't want to do this, you can of course
# drop your database and really start from scratch.
alembic stamp 601e9d9baa4c
UPGRADE TO LATEST DATABASE VERSION
alembic upgrade head
DOWNGRADE TO INITIAL DATABASE STATE
# /!\ RUNNING THIS COMMAND WILL CAUSE ALL DATA LOST WITHOUT ASKING !!
alembic downgrade base
GENERATE A NEW REVISION
alembic revision -m "Message for this migration"
# A migration script is then created in alembic/versions directory. For
# example alembic/versions/3adcc9a56557_message_for_this_migration.py
# where 3adcc9a56557 is the revision id generated by Alembic.
#
# This script must be edited to write the migration itself, mainly
# in `upgrade` and `downgrade` functions. See Alembic documentation for
# further details.
GENERATE A REVISION FROM CURRENT STATE
alembic revision --autogenerate -m "Message for this migration"
# Alembic should generate a script reflecting changes already made in
# database. However it is always a good idea to check it and edit it
# manually, Alembic is not always accurate and can't see all alterations.
# It should work with basic changes such as model or column creation. See
# http://alembic.zzzcomputing.com/en/latest/autogenerate.html#what-does-autogenerate-detect-and-what-does-it-not-detect
from __future__ import with_statement
from alembic import context
from sqlalchemy import engine_from_config, pool
from logging.config import fileConfig
import re
# Add projet root directory in path and setup Django...
import os
import django
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'gargantext.settings')
django.setup()
# ...to be able to import gargantext.
from gargantext import settings, models
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config
config.set_main_option("sqlalchemy.url", settings.DATABASES['default']['URL'])
# Interpret the config file for Python logging.
# This line sets up loggers basically.
fileConfig(config.config_file_name)
# add your model's MetaData object here
# for 'autogenerate' support
# from myapp import mymodel
# target_metadata = mymodel.Base.metadata
target_metadata = models.Base.metadata
# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.
# Inspired from https://gist.github.com/utek/6163250
def exclude_tables_from_config(config):
tables = config.get("tables", '').replace('*', '.*').split(' ')
pattern = '|'.join(tables)
return re.compile(pattern)
exclude_tables = exclude_tables_from_config(config.get_section('alembic:exclude'))
def include_object(obj, name, typ, reflected, compare_to):
if typ == "table" and exclude_tables.match(name):
return False
else:
return True
def run_migrations_offline():
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url, target_metadata=target_metadata, literal_binds=True,
include_object=include_object)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online():
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
connectable = engine_from_config(
config.get_section(config.config_ini_section),
prefix='sqlalchemy.',
poolclass=pool.NullPool)
with connectable.connect() as connection:
context.configure(
connection=connection,
target_metadata=target_metadata,
include_object=include_object
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from alembic import op
import sqlalchemy as sa
import gargantext
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision = ${repr(up_revision)}
down_revision = ${repr(down_revision)}
branch_labels = ${repr(branch_labels)}
depends_on = ${repr(depends_on)}
def upgrade():
${upgrades if upgrades else "pass"}
def downgrade():
${downgrades if downgrades else "pass"}
"""Put a timezone on Node.date
Revision ID: 08230100f262
Revises: 601e9d9baa4c
Create Date: 2017-07-06 13:47:10.788569
"""
from alembic import op
import sqlalchemy as sa
import gargantext
# revision identifiers, used by Alembic.
revision = '08230100f262'
down_revision = '601e9d9baa4c'
branch_labels = None
depends_on = None
def upgrade():
op.alter_column('nodes', 'date', type_=sa.DateTime(timezone=True))
def downgrade():
op.alter_column('nodes', 'date', type_=sa.DateTime(timezone=False))
"""Add OCC_HIST & OCC_HIST_PART functions
Revision ID: 601e9d9baa4c
Revises: 932dbf3e8c43
Create Date: 2017-07-06 10:52:16.161118
"""
from alembic import op
import sqlalchemy as sa
from gargantext.tools.alembic import ReplaceableObject
# revision identifiers, used by Alembic.
revision = '601e9d9baa4c'
down_revision = '932dbf3e8c43'
branch_labels = None
depends_on = None
# -- OCC_HIST_PART :: Corpus.id -> GroupList.id -> Start -> End
occ_hist_part = ReplaceableObject(
"OCC_HIST_PART(int, int, timestamp, timestamp)",
"""
RETURNS TABLE (ng_id int, score float8)
AS $$
-- EXPLAIN ANALYZE
SELECT
COALESCE(gr.ngram1_id, ng1.ngram_id) as ng_id,
SUM(ng1.weight) as score
from nodes n
-- BEFORE
INNER JOIN nodes as n1 ON n1.id = n.id
INNER JOIN nodes_ngrams ng1 ON ng1.node_id = n1.id
-- Limit with timestamps: ]start, end]
INNER JOIN nodes_hyperdata nh1 ON nh1.node_id = n1.id
AND nh1.value_utc > $3
AND nh1.value_utc <= $4
-- Group List
LEFT JOIN nodes_ngrams_ngrams gr ON ng1.ngram_id = gr.ngram2_id
AND gr.node_id = $2
WHERE
n.typename = 4
AND n.parent_id = $1
GROUP BY 1
$$
LANGUAGE SQL;
"""
)
# -- OCC_HIST :: Corpus.id -> GroupList.id -> MapList.id -> Start -> EndFirst -> EndLast
# -- EXEMPLE USAGE
# -- SELECT * FROM OCC_HIST(182856, 183859, 183866, '1800-03-15 17:00:00+01', '2000-03-15 17:00:00+01', '2017-03-15 17:00:00+01')
occ_hist = ReplaceableObject(
"OCC_HIST(int, int, int, timestamp, timestamp, timestamp)",
"""
RETURNS TABLE (ng_id int, score numeric)
AS $$
WITH OCC1 as (SELECT * from OCC_HIST_PART($1, $2, $4, $5))
, OCC2 as (SELECT * from OCC_HIST_PART($1, $2, $5, $6))
, GROWTH as (SELECT ml.ngram_id as ngram_id
, COALESCE(OCC1.score, null) as score1
, COALESCE(OCC2.score, null) as score2
FROM nodes_ngrams ml
LEFT JOIN OCC1 ON OCC1.ng_id = ml.ngram_id
LEFT JOIN OCC2 ON OCC2.ng_id = ml.ngram_id
WHERE ml.node_id = $3
ORDER by score2 DESC)
SELECT ngram_id, COALESCE(ROUND(CAST((100 * (score2 - score1) / COALESCE((score2 + score1), 1)) as numeric), 2), 0) from GROWTH
$$
LANGUAGE SQL;
"""
)
# -- BEHAVIORAL TEST (should be equal to occ in terms table)
# -- WITH OCC as (SELECT * from OCC_HIST(182856, 183859, '1800-03-15 17:00:00+01', '2300-03-15 17:00:00+01'))
# -- SELECT ng_id, score from OCC
# -- INNER JOIN nodes_ngrams ml on ml.ngram_id = ng_id
# -- AND ml.node_id = 183866
# -- ORDER BY score DESC;
def upgrade():
op.create_sp(occ_hist_part)
op.create_sp(occ_hist)
def downgrade():
op.drop_sp(occ_hist)
op.drop_sp(occ_hist_part)
"""Initial migration
Revision ID: 932dbf3e8c43
Revises:
Create Date: 2017-07-05 16:41:23.951422
"""
from alembic import op
import sqlalchemy as sa
import gargantext
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '932dbf3e8c43'
down_revision = None
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('contacts',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('user1_id', sa.Integer(), nullable=True),
sa.Column('user2_id', sa.Integer(), nullable=True),
sa.Column('is_blocked', sa.Boolean(), nullable=True),
sa.Column('date_creation', sa.DateTime(), nullable=True),
sa.ForeignKeyConstraint(['user1_id'], ['auth_user.id'], ondelete='CASCADE'),
sa.ForeignKeyConstraint(['user2_id'], ['auth_user.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('user1_id', 'user2_id')
)
op.create_table('ngrams',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('terms', sa.String(length=255), nullable=True),
sa.Column('n', sa.Integer(), nullable=True),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('terms')
)
op.create_index('ngrams_id_n_idx', 'ngrams', ['id', 'n'], unique=False)
op.create_index('ngrams_n_idx', 'ngrams', ['n'], unique=False)
op.create_table('nodes',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('typename', gargantext.models.nodes.NodeType(), nullable=True),
sa.Column('user_id', sa.Integer(), nullable=True),
sa.Column('parent_id', sa.Integer(), nullable=True),
sa.Column('name', sa.String(length=255), nullable=True),
sa.Column('date', sa.DateTime(), nullable=True),
sa.Column('hyperdata', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
sa.ForeignKeyConstraint(['parent_id'], ['nodes.id'], ondelete='CASCADE'),
sa.ForeignKeyConstraint(['user_id'], ['auth_user.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_nodes_typename'), 'nodes', ['typename'], unique=False)
op.create_index('nodes_hyperdata_idx', 'nodes', ['hyperdata'], unique=False)
op.create_index('nodes_user_id_typename_parent_id_idx', 'nodes', ['user_id', 'typename', 'parent_id'], unique=False)
op.create_table('nodes_hyperdata',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('node_id', sa.Integer(), nullable=True),
sa.Column('key', gargantext.models.hyperdata.HyperdataKey(), nullable=True),
sa.Column('value_int', sa.Integer(), nullable=True),
sa.Column('value_flt', postgresql.DOUBLE_PRECISION(), nullable=True),
sa.Column('value_utc', sa.DateTime(timezone=True), nullable=True),
sa.Column('value_str', sa.String(length=255), nullable=True),
sa.Column('value_txt', sa.Text(), nullable=True),
sa.ForeignKeyConstraint(['node_id'], ['nodes.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_nodes_hyperdata_value_flt'), 'nodes_hyperdata', ['value_flt'], unique=False)
op.create_index(op.f('ix_nodes_hyperdata_value_int'), 'nodes_hyperdata', ['value_int'], unique=False)
op.create_index(op.f('ix_nodes_hyperdata_value_str'), 'nodes_hyperdata', ['value_str'], unique=False)
op.create_index(op.f('ix_nodes_hyperdata_value_utc'), 'nodes_hyperdata', ['value_utc'], unique=False)
op.create_index('nodes_hyperdata_node_id_key_idx', 'nodes_hyperdata', ['node_id', 'key'], unique=False)
op.create_index('nodes_hyperdata_node_id_key_value_flt_idx', 'nodes_hyperdata', ['node_id', 'key', 'value_flt'], unique=False)
op.create_index('nodes_hyperdata_node_id_key_value_int_idx', 'nodes_hyperdata', ['node_id', 'key', 'value_int'], unique=False)
op.create_index('nodes_hyperdata_node_id_key_value_str_idx', 'nodes_hyperdata', ['node_id', 'key', 'value_str'], unique=False)
op.create_index('nodes_hyperdata_node_id_key_value_utc_idx', 'nodes_hyperdata', ['node_id', 'key', 'value_utc'], unique=False)
op.create_index('nodes_hyperdata_node_id_value_utc_idx', 'nodes_hyperdata', ['node_id', 'value_utc'], unique=False)
op.create_table('nodes_ngrams',
sa.Column('node_id', sa.Integer(), nullable=False),
sa.Column('ngram_id', sa.Integer(), nullable=False),
sa.Column('weight', sa.Float(), nullable=True),
sa.ForeignKeyConstraint(['ngram_id'], ['ngrams.id'], ondelete='CASCADE'),
sa.ForeignKeyConstraint(['node_id'], ['nodes.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('node_id', 'ngram_id')
)
op.create_index('nodes_ngrams_ngram_id_idx', 'nodes_ngrams', ['ngram_id'], unique=False)
op.create_index('nodes_ngrams_node_id_idx', 'nodes_ngrams', ['node_id'], unique=False)
op.create_index('nodes_ngrams_node_id_ngram_id_idx', 'nodes_ngrams', ['node_id', 'ngram_id'], unique=False)
op.create_table('nodes_ngrams_ngrams',
sa.Column('node_id', sa.Integer(), nullable=False),
sa.Column('ngram1_id', sa.Integer(), nullable=False),
sa.Column('ngram2_id', sa.Integer(), nullable=False),
sa.Column('weight', sa.Float(precision=24), nullable=True),
sa.ForeignKeyConstraint(['ngram1_id'], ['ngrams.id'], ondelete='CASCADE'),
sa.ForeignKeyConstraint(['ngram2_id'], ['ngrams.id'], ondelete='CASCADE'),
sa.ForeignKeyConstraint(['node_id'], ['nodes.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('node_id', 'ngram1_id', 'ngram2_id')
)
op.create_index('nodes_ngrams_ngrams_ngram1_id_idx', 'nodes_ngrams_ngrams', ['ngram1_id'], unique=False)
op.create_index('nodes_ngrams_ngrams_ngram2_id_idx', 'nodes_ngrams_ngrams', ['ngram2_id'], unique=False)
op.create_index('nodes_ngrams_ngrams_node_id_idx', 'nodes_ngrams_ngrams', ['node_id'], unique=False)
op.create_index('nodes_ngrams_ngrams_node_id_ngram1_id_ngram2_id_idx', 'nodes_ngrams_ngrams', ['node_id', 'ngram1_id', 'ngram2_id'], unique=False)
op.create_table('nodes_nodes',
sa.Column('node1_id', sa.Integer(), nullable=False),
sa.Column('node2_id', sa.Integer(), nullable=False),
sa.Column('score', sa.Float(precision=24), nullable=True),
sa.ForeignKeyConstraint(['node1_id'], ['nodes.id'], ondelete='CASCADE'),
sa.ForeignKeyConstraint(['node2_id'], ['nodes.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('node1_id', 'node2_id')
)
op.create_index('nodes_nodes_node1_id_node2_id_idx', 'nodes_nodes', ['node1_id', 'node2_id'], unique=False)
op.create_table('nodes_nodes_ngrams',
sa.Column('node1_id', sa.Integer(), nullable=False),
sa.Column('node2_id', sa.Integer(), nullable=False),
sa.Column('ngram_id', sa.Integer(), nullable=False),
sa.Column('score', sa.Float(precision=24), nullable=True),
sa.ForeignKeyConstraint(['ngram_id'], ['ngrams.id'], ondelete='CASCADE'),
sa.ForeignKeyConstraint(['node1_id'], ['nodes.id'], ondelete='CASCADE'),
sa.ForeignKeyConstraint(['node2_id'], ['nodes.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('node1_id', 'node2_id', 'ngram_id')
)
op.create_index('nodes_nodes_ngrams_node1_id_idx', 'nodes_nodes_ngrams', ['node1_id'], unique=False)
op.create_index('nodes_nodes_ngrams_node2_id_idx', 'nodes_nodes_ngrams', ['node2_id'], unique=False)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index('nodes_nodes_ngrams_node2_id_idx', table_name='nodes_nodes_ngrams')
op.drop_index('nodes_nodes_ngrams_node1_id_idx', table_name='nodes_nodes_ngrams')
op.drop_table('nodes_nodes_ngrams')
op.drop_index('nodes_nodes_node1_id_node2_id_idx', table_name='nodes_nodes')
op.drop_table('nodes_nodes')
op.drop_index('nodes_ngrams_ngrams_node_id_ngram1_id_ngram2_id_idx', table_name='nodes_ngrams_ngrams')
op.drop_index('nodes_ngrams_ngrams_node_id_idx', table_name='nodes_ngrams_ngrams')
op.drop_index('nodes_ngrams_ngrams_ngram2_id_idx', table_name='nodes_ngrams_ngrams')
op.drop_index('nodes_ngrams_ngrams_ngram1_id_idx', table_name='nodes_ngrams_ngrams')
op.drop_table('nodes_ngrams_ngrams')
op.drop_index('nodes_ngrams_node_id_ngram_id_idx', table_name='nodes_ngrams')
op.drop_index('nodes_ngrams_node_id_idx', table_name='nodes_ngrams')
op.drop_index('nodes_ngrams_ngram_id_idx', table_name='nodes_ngrams')
op.drop_table('nodes_ngrams')
op.drop_index('nodes_hyperdata_node_id_value_utc_idx', table_name='nodes_hyperdata')
op.drop_index('nodes_hyperdata_node_id_key_value_utc_idx', table_name='nodes_hyperdata')
op.drop_index('nodes_hyperdata_node_id_key_value_str_idx', table_name='nodes_hyperdata')
op.drop_index('nodes_hyperdata_node_id_key_value_int_idx', table_name='nodes_hyperdata')
op.drop_index('nodes_hyperdata_node_id_key_value_flt_idx', table_name='nodes_hyperdata')
op.drop_index('nodes_hyperdata_node_id_key_idx', table_name='nodes_hyperdata')
op.drop_index(op.f('ix_nodes_hyperdata_value_utc'), table_name='nodes_hyperdata')
op.drop_index(op.f('ix_nodes_hyperdata_value_str'), table_name='nodes_hyperdata')
op.drop_index(op.f('ix_nodes_hyperdata_value_int'), table_name='nodes_hyperdata')
op.drop_index(op.f('ix_nodes_hyperdata_value_flt'), table_name='nodes_hyperdata')
op.drop_table('nodes_hyperdata')
op.drop_index('nodes_user_id_typename_parent_id_idx', table_name='nodes')
op.drop_index('nodes_hyperdata_idx', table_name='nodes')
op.drop_index(op.f('ix_nodes_typename'), table_name='nodes')
op.drop_table('nodes')
op.drop_index('ngrams_n_idx', table_name='ngrams')
op.drop_index('ngrams_id_n_idx', table_name='ngrams')
op.drop_table('ngrams')
op.drop_table('contacts')
# ### end Alembic commands ###
#!/usr/bin/env python
import sys
import os
if __name__ == "__main__":
# Django settings
dirname = os.path.dirname(os.path.realpath(__file__))
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext.settings")
# initialize Django application
from django.core.wsgi import get_wsgi_application
application = get_wsgi_application()
# retrieve Django models
import django.apps
django_models = django.apps.apps.get_models()
django_models_names = set(model._meta.db_table for model in django_models)
# migrate SQLAlchemy models
from gargantext import models
from gargantext.util.db import Base, engine
sqla_models_names = (
model for model in Base.metadata.tables.keys()
if model not in django_models_names
)
sqla_models = (
Base.metadata.tables[model_name]
for model_name in sqla_models_names
)
print()
for model in sqla_models:
try:
model.create(engine)
print('created model: `%s`' % model)
except Exception as e:
print('could not create model: `%s`, %s' % (model, e))
print()
from django.core.management.base import BaseCommand, CommandError
from gargantext.tools.show_nodes import tree_show, nodes
import colorama
class Command(BaseCommand):
help = 'Nodes'
def add_arguments(self, parser):
parser.add_argument(dest='action', default='show')
def handle(self, *args, **options):
action = options.get('action')
if action == 'show':
colorama.init(strip=False)
for root in nodes():
tree_show(root)
from django.core.management.base import BaseCommand, CommandError
from gargantext.models import Node
class Command(BaseCommand):
help = 'Something'
def handle(self, *args, **options):
self.stdout.write(self.style.SUCCESS('Oh yeah!'))
from .base import Base
from .nodes import *
from .hyperdata import *
from .users import *
......
from sqlalchemy.schema import Column, ForeignKey, UniqueConstraint, Index
from sqlalchemy.orm import relationship
from sqlalchemy.types import TypeDecorator, \
Integer, Float, Boolean, DateTime, String, Text
from sqlalchemy.dialects.postgresql import JSONB, DOUBLE_PRECISION as Double
from sqlalchemy.ext.mutable import MutableDict, MutableList
from sqlalchemy.ext.declarative import declarative_base
__all__ = ["Column", "ForeignKey", "UniqueConstraint", "relationship",
"Integer", "Float", "Boolean", "DateTime", "String", "Text",
"TypeDecorator",
"JSONB", "Double",
"MutableDict", "MutableList",
"Base", "DjangoBase"]
# All the models should derive from this base class, so Base.metadata keeps
# all tables handled by Alembic migration scripts.
Base = declarative_base()
# To be used by tables already handled by Django ORM, such as User model. We
# separate them in order to keep those out of Alembic sight.
DjangoBase = declarative_base()
from gargantext.util.db import *
from gargantext.constants import INDEXED_HYPERDATA
from .base import Base, Column, ForeignKey, TypeDecorator, Index, \
Integer, Double, DateTime, String, Text
from .nodes import Node
import datetime
......@@ -64,6 +65,14 @@ class NodeHyperdata(Base):
)
"""
__tablename__ = 'nodes_hyperdata'
__table_args__ = (
Index('nodes_hyperdata_node_id_value_utc_idx', 'node_id', 'value_utc'),
Index('nodes_hyperdata_node_id_key_value_utc_idx', 'node_id', 'key', 'value_utc'),
Index('nodes_hyperdata_node_id_key_value_str_idx', 'node_id', 'key', 'value_str'),
Index('nodes_hyperdata_node_id_key_value_int_idx', 'node_id', 'key', 'value_int'),
Index('nodes_hyperdata_node_id_key_value_flt_idx', 'node_id', 'key', 'value_flt'),
Index('nodes_hyperdata_node_id_key_idx', 'node_id', 'key'))
id = Column( Integer, primary_key=True )
node_id = Column( Integer, ForeignKey(Node.id, ondelete='CASCADE'))
key = Column( HyperdataKey )
......
from gargantext.util.db import *
from .base import Base, Column, ForeignKey, relationship, Index, \
Integer, Float, String
from .nodes import Node
__all__ = ['Ngram', 'NodeNgram', 'NodeNodeNgram', 'NodeNgramNgram']
......@@ -7,6 +7,9 @@ __all__ = ['Ngram', 'NodeNgram', 'NodeNodeNgram', 'NodeNgramNgram']
class Ngram(Base):
__tablename__ = 'ngrams'
__table_args__ = (
Index('ngrams_id_n_idx', 'id', 'n'),
Index('ngrams_n_idx', 'n'))
id = Column(Integer, primary_key=True)
terms = Column(String(255), unique=True)
......@@ -21,6 +24,10 @@ class Ngram(Base):
class NodeNgram(Base):
__tablename__ = 'nodes_ngrams'
__table_args__ = (
Index('nodes_ngrams_node_id_ngram_id_idx', 'node_id', 'ngram_id'),
Index('nodes_ngrams_node_id_idx', 'node_id'),
Index('nodes_ngrams_ngram_id_idx', 'ngram_id'))
node_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE'), primary_key=True)
ngram_id = Column(Integer, ForeignKey(Ngram.id, ondelete='CASCADE'), primary_key=True)
......@@ -43,6 +50,9 @@ class NodeNodeNgram(Base):
)
"""
__tablename__ = 'nodes_nodes_ngrams'
__table_args__ = (
Index('nodes_nodes_ngrams_node2_id_idx', 'node2_id'),
Index('nodes_nodes_ngrams_node1_id_idx', 'node1_id'))
node1_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE'), primary_key=True)
node2_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE'), primary_key=True)
......@@ -70,6 +80,11 @@ class NodeNgramNgram(Base):
)
"""
__tablename__ = 'nodes_ngrams_ngrams'
__table_args__ = (
Index('nodes_ngrams_ngrams_node_id_ngram1_id_ngram2_id_idx', 'node_id', 'ngram1_id', 'ngram2_id'),
Index('nodes_ngrams_ngrams_node_id_idx', 'node_id'),
Index('nodes_ngrams_ngrams_ngram1_id_idx', 'ngram1_id'),
Index('nodes_ngrams_ngrams_ngram2_id_idx', 'ngram2_id'))
node_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE'), primary_key=True)
ngram1_id = Column(Integer, ForeignKey(Ngram.id, ondelete='CASCADE'), primary_key=True)
......
from gargantext.util.db import *
from gargantext.util.db import session
from gargantext.util.files import upload
from gargantext.constants import *
from datetime import datetime
from .base import Base, Column, ForeignKey, relationship, TypeDecorator, Index, \
Integer, Float, String, DateTime, JSONB, \
MutableList, MutableDict
from .users import User
__all__ = ['Node', 'NodeNode', 'CorpusNode']
......@@ -22,7 +25,7 @@ class NodeType(TypeDecorator):
class Node(Base):
"""This model can fit many purposes:
myFirstCorpus = session.query(CorpusNode).first()
It intends to provide a generic model, allowing hierarchical structure
......@@ -50,6 +53,9 @@ class Node(Base):
# Right: only user nodes are deleted.
"""
__tablename__ = 'nodes'
__table_args__ = (
Index('nodes_user_id_typename_parent_id_idx', 'user_id', 'typename', 'parent_id'),
Index('nodes_hyperdata_idx', 'hyperdata'))
id = Column(Integer, primary_key=True)
typename = Column(NodeType, index=True)
......@@ -58,7 +64,7 @@ class Node(Base):
parent_id = Column(Integer, ForeignKey('nodes.id', ondelete='CASCADE'))
# main data
name = Column(String(255))
date = Column(DateTime(), default=datetime.now)
date = Column(DateTime(timezone=True), default=datetime.now)
# metadata (see https://bashelton.com/2014/03/updating-postgresql-json-fields-via-sqlalchemy/)
hyperdata = Column(JSONB, default=dict)
......@@ -237,6 +243,8 @@ class CorpusNode(Node):
class NodeNode(Base):
__tablename__ = 'nodes_nodes'
__table_args__ = (
Index('nodes_nodes_node1_id_node2_id_idx', 'node1_id', 'node2_id'),)
node1_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE'), primary_key=True)
node2_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE'), primary_key=True)
......@@ -271,6 +279,8 @@ for nodetype in NODETYPES:
"polymorphic_identity": nodetype
}
})
# Add class to exports
__all__.append(class_name)
# ------ End of hack ------
......
from django.contrib.auth import models
from gargantext.util.db import *
from gargantext.util.db import session, aliased
from datetime import datetime
from .base import DjangoBase, Base, Column, ForeignKey, UniqueConstraint, \
Integer, Boolean, DateTime, String
__all__ = ['User', 'Contact']
class User(Base):
class User(DjangoBase):
# The properties below are a reflection of Django's auth module's models.
__tablename__ = models.User._meta.db_table
id = Column(Integer, primary_key=True)
......@@ -60,7 +63,7 @@ class User(Base):
"""check if a given node is owned by the user"""
return (node.user_id == self.id) or \
node.id in (contact.id for contact in self.contacts())
def get_params(self, username=None):
print(self.__dict__.items())
return self.hyperdata
......
"""Define ReplaceableObject and related operations
Implements operations to create/drop SQL objects such as views, stored
procedures and triggers that can't be "altered" but can be replaced -- hence
the name of "ReplaceableObject" class.
This recipe is directly borrowed from Alembic documentation, see
http://alembic.zzzcomputing.com/en/latest/cookbook.html#replaceable-objects
"""
from alembic.operations import Operations, MigrateOperation
__all__ = ['ReplaceableObject']
class ReplaceableObject(object):
def __init__(self, name, sqltext):
self.name = name
self.sqltext = sqltext
class ReversibleOp(MigrateOperation):
def __init__(self, target):
self.target = target
@classmethod
def invoke_for_target(cls, operations, target):
op = cls(target)
return operations.invoke(op)
def reverse(self):
raise NotImplementedError()
@classmethod
def _get_object_from_version(cls, operations, ident):
version, objname = ident.split(".")
module = operations.get_context().script.get_revision(version).module
obj = getattr(module, objname)
return obj
@classmethod
def replace(cls, operations, target, replaces=None, replace_with=None):
if replaces:
old_obj = cls._get_object_from_version(operations, replaces)
drop_old = cls(old_obj).reverse()
create_new = cls(target)
elif replace_with:
old_obj = cls._get_object_from_version(operations, replace_with)
drop_old = cls(target).reverse()
create_new = cls(old_obj)
else:
raise TypeError("replaces or replace_with is required")
operations.invoke(drop_old)
operations.invoke(create_new)
@Operations.register_operation("create_view", "invoke_for_target")
@Operations.register_operation("replace_view", "replace")
class CreateViewOp(ReversibleOp):
def reverse(self):
return DropViewOp(self.target)
@Operations.register_operation("drop_view", "invoke_for_target")
class DropViewOp(ReversibleOp):
def reverse(self):
return CreateViewOp(self.view)
@Operations.register_operation("create_sp", "invoke_for_target")
@Operations.register_operation("replace_sp", "replace")
class CreateSPOp(ReversibleOp):
def reverse(self):
return DropSPOp(self.target)
@Operations.register_operation("drop_sp", "invoke_for_target")
class DropSPOp(ReversibleOp):
def reverse(self):
return CreateSPOp(self.target)
@Operations.implementation_for(CreateViewOp)
def create_view(operations, operation):
operations.execute("CREATE VIEW %s AS %s" % (
operation.target.name,
operation.target.sqltext
))
@Operations.implementation_for(DropViewOp)
def drop_view(operations, operation):
operations.execute("DROP VIEW %s" % operation.target.name)
@Operations.implementation_for(CreateSPOp)
def create_sp(operations, operation):
operations.execute(
"CREATE FUNCTION %s %s" % (
operation.target.name, operation.target.sqltext
)
)
@Operations.implementation_for(DropSPOp)
def drop_sp(operations, operation):
operations.execute("DROP FUNCTION %s" % operation.target.name)
# Make this a standalone script...
# Can be called this way: python3 gargantext/tools/show_nodes.py
import os
import django
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'gargantext.settings')
django.setup()
# ...End of jiberish.
import itertools
import colorama
from colorama import Fore
from sqlalchemy.sql.expression import literal_column
from gargantext.util.db import session, func, aliased
from gargantext.models import Node
NODE_BULLET = '‣'
# https://en.wikipedia.org/wiki/Box-drawing_character
TREE_ROOT = '╾'
TREE_VERT = '│'
TREE_HORI = '─'
TREE_FORK = '├'
TREE_CORN = '└'
FIRST = 0x01
LAST = 0x02
def nodes(parent=None, group_by='typename', order_by='typename', has_child='check'):
if group_by or has_child is not None:
select = [func.min(Node.id).label('id'),
func.min(Node.name).label('name'),
func.min(Node.typename).label('typename'),
func.count(Node.id).label('cnt')]
else:
select = [Node.id.label('id'),
Node.name.label('name'),
Node.typename.label('typename'),
literal_column('1').label('cnt')]
if has_child is not None:
N = aliased(Node)
select.append(func.count(N.id).label('children'))
else:
select.append(literal_column('NULL').label('children'))
parent_id = getattr(parent, 'id', parent)
q = session.query(*select).filter_by(parent_id=parent_id) \
.group_by(getattr(Node, group_by if group_by else 'id'))
if has_child is not None:
q = q.outerjoin(N, N.parent_id == Node.id).group_by(N.parent_id)
return q.order_by(order_by)
def node_show(node, prefix='', maxlen=60):
if node.children > 0 or node.cnt == 1:
name = node.name[:maxlen] + '..' if len(node.name) > maxlen else node.name
label = Fore.CYAN + name + Fore.RESET
else:
label = Fore.MAGENTA + str(node.cnt) + Fore.RESET
print(prefix, '%s%s %s' % (Fore.GREEN, node.typename, label), sep='')
def tree_show(node, pos=FIRST|LAST, level=0, prefix='', maxlen=60, compact=True):
#print('%02d %x' % (level, pos), end='')
branch = TREE_ROOT if pos&FIRST and level == 0 else TREE_FORK if not pos&LAST else TREE_CORN
node_prefix = prefix + branch + 2*TREE_HORI + ' '
node_show(node, node_prefix, maxlen)
childs = iter(nodes(parent=node, group_by=compact and 'typename'))
try:
node = next(childs)
except StopIteration:
return
prefix = prefix + (' ' if pos&LAST else TREE_VERT) + ' '
for i, next_node in enumerate(itertools.chain(childs, [None])):
pos = (FIRST if i == 0 else 0) | (LAST if next_node is None else 0)
tree_show(node, pos, level + 1, prefix, maxlen, compact)
node = next_node
if __name__ == "__main__":
import sys
if len(sys.argv) == 1:
compact = True
elif len(sys.argv) == 2 and sys.argv[1] in ('-a', '--all'):
compact = False
else:
print("Usage: %s [-a|--all]" % sys.argv[0], file=sys.stderr)
sys.exit(1)
colorama.init(strip=False)
for root in nodes():
tree_show(root, compact=compact)
......@@ -6,15 +6,11 @@ from gargantext.util.json import json_dumps
# get engine, session, etc.
########################################################################
from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import delete
def get_engine():
from sqlalchemy import create_engine
url = 'postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}:{PORT}/{NAME}'.format(
**settings.DATABASES['default']
)
return create_engine( url
return create_engine( settings.DATABASES['default']['URL']
, use_native_hstore = True
, json_serializer = json_dumps
, pool_size=20, max_overflow=0
......@@ -22,24 +18,13 @@ def get_engine():
engine = get_engine()
Base = declarative_base()
session = scoped_session(sessionmaker(bind=engine))
########################################################################
# tools to build models
########################################################################
from sqlalchemy.types import *
from sqlalchemy.schema import Column, ForeignKey, UniqueConstraint
from sqlalchemy.dialects.postgresql import JSONB, DOUBLE_PRECISION
from sqlalchemy.ext.mutable import MutableDict, MutableList
Double = DOUBLE_PRECISION
########################################################################
# useful for queries
########################################################################
from sqlalchemy.orm import aliased, relationship
from sqlalchemy.orm import aliased
from sqlalchemy import func, desc
########################################################################
......
......@@ -10,7 +10,7 @@ __all__ = ['json_encoder', 'json_dumps']
class JSONEncoder(json.JSONEncoder):
def default(self, obj):
from gargantext.util.db import Base
from gargantext.models import Base
if isinstance(obj, Base):
return {
key: value
......
......@@ -54,6 +54,7 @@ update-locale LC_ALL=fr_FR.UTF-8
cd /srv/
pip3 install virtualenv
virtualenv /srv/env_3-5 -p /usr/bin/python3.5
echo '/srv/gargantext' > /srv/env_3-5/lib/python3.5/site-packages/gargantext.pth
echo 'alias venv="source /srv/env_3-5/bin/activate"' >> ~/.bashrc
# CONFIG FILES
......
-- ____
-- / ___|
-- | | _
-- | |_| |
-- \____|arganTexT
----------------------------------------------------------------------
-- Gargantext optimization of Database --
----------------------------------------------------------------------
--> Manual optimization with indexes according to usages
-- Weakness and Strengths of indexes:
--> it can slow down the insertion(s)
--> it can speed up the selection(s)
--> Conventions for this document:
--> indexes commented already have been created
--> indexes not commented have not been created yet
----------------------------------------------------------------------
-- Retrieve Nodes
----------------------------------------------------------------------
create INDEX on nodes (user_id, typename, parent_id) ;
create INDEX on nodes_hyperdata (node_id, key);
create INDEX on ngrams (id, n) ;
create INDEX on ngrams (n) ;
create INDEX on nodes_ngrams (node_id, ngram_id) ;
create INDEX on nodes_ngrams (node_id) ;
create INDEX on nodes_ngrams (ngram_id) ;
create INDEX on nodes_ngrams_ngrams (node_id, ngram1_id, ngram2_id) ;
create INDEX on nodes_ngrams_ngrams (node_id) ;
create INDEX on nodes_ngrams_ngrams (ngram1_id) ;
create INDEX on nodes_ngrams_ngrams (ngram2_id) ;
----------------------------------------------------------------------
-- DELETE optimization of Nodes -- todo on dev
create INDEX on nodes_nodes_ngrams (node1_id);
create INDEX on nodes_nodes_ngrams (node2_id);
create INDEX on nodes_nodes (node1_id, node2_id);
-- Maybe needed soon:
-- create INDEX on nodes_nodes_ngrams (node1_id, node2_id);
----------------------------------------------------------------------
-- Analytics
create INDEX on nodes_hyperdata (node_id,value_utc); -- remove ?
create INDEX on nodes_hyperdata (node_id,key,value_utc);
create INDEX on nodes_hyperdata (node_id,key,value_int);
create INDEX on nodes_hyperdata (node_id,key,value_flt);
create INDEX on nodes_hyperdata (node_id,key,value_str);
----------------------------------------------------------------------
----------------------------------------------------------------------
create index on nodes using GIN (hyperdata);
----------------------------------------------------------------------
......@@ -33,3 +33,4 @@ lxml==3.5.0
requests-futures==0.9.7
bs4==0.0.1
requests==2.10.0
alembic>=0.9.2
......@@ -26,8 +26,7 @@ environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext.settings")
DATABASES['default']['NAME'] = DATABASES['default']['TEST']['NAME']
setup() # models can now be imported
from gargantext import models # Base is now filled
from gargantext.util.db import Base # contains metadata.tables
from gargantext.models import Base # contains metadata.tables
# ------------------------------------------------------------------------------
# thanks to our hack, util.db.engine and util.db.session already use the test DB
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment