Commit cc7461f0 authored by sim's avatar sim

[FEAT] Add Node.title_abstract to search in doc title and abstract

parent 1f5457df
"""Add english fulltext index on Nodes.hyperdata for abstract and title
Revision ID: 1fb4405b59e1
Revises: bedce47c9e34
Create Date: 2017-09-13 16:31:36.926692
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy_utils.types import TSVectorType
from gargantext.util.alembic import ReplaceableObject
# revision identifiers, used by Alembic.
revision = '1fb4405b59e1'
down_revision = 'bedce47c9e34'
branch_labels = None
depends_on = None
title_abstract_update_trigger = ReplaceableObject(
'title_abstract_update_trigger()',
"""
RETURNS trigger AS $$
begin
new.title_abstract := to_tsvector('english', (new.hyperdata ->> 'title') || ' ' || (new.hyperdata ->> 'abstract'));
return new;
end
$$ LANGUAGE plpgsql;
"""
)
title_abstract_update = ReplaceableObject(
'title_abstract_update',
'BEFORE INSERT OR UPDATE',
'nodes',
'FOR EACH ROW EXECUTE PROCEDURE title_abstract_update_trigger()'
)
def upgrade():
op.add_column('nodes', sa.Column('title_abstract', TSVectorType))
op.create_sp(title_abstract_update_trigger)
op.create_trigger(title_abstract_update)
# Initialize index with already existing data
op.execute('UPDATE nodes SET hyperdata = hyperdata');
def downgrade():
op.drop_trigger(title_abstract_update)
op.drop_sp(title_abstract_update_trigger)
op.drop_column('nodes', 'title_abstract')
...@@ -2,6 +2,7 @@ from sqlalchemy.schema import Column, ForeignKey, UniqueConstraint, Index ...@@ -2,6 +2,7 @@ from sqlalchemy.schema import Column, ForeignKey, UniqueConstraint, Index
from sqlalchemy.orm import relationship, validates from sqlalchemy.orm import relationship, validates
from sqlalchemy.types import TypeDecorator, \ from sqlalchemy.types import TypeDecorator, \
Integer, Float, Boolean, DateTime, String, Text Integer, Float, Boolean, DateTime, String, Text
from sqlalchemy_utils.types import TSVectorType
from sqlalchemy.dialects.postgresql import JSONB, DOUBLE_PRECISION as Double from sqlalchemy.dialects.postgresql import JSONB, DOUBLE_PRECISION as Double
from sqlalchemy.ext.mutable import MutableDict, MutableList from sqlalchemy.ext.mutable import MutableDict, MutableList
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
...@@ -9,6 +10,7 @@ from sqlalchemy.ext.declarative import declarative_base ...@@ -9,6 +10,7 @@ from sqlalchemy.ext.declarative import declarative_base
__all__ = ["Column", "ForeignKey", "UniqueConstraint", "Index", "relationship", __all__ = ["Column", "ForeignKey", "UniqueConstraint", "Index", "relationship",
"validates", "ValidatorMixin", "validates", "ValidatorMixin",
"Integer", "Float", "Boolean", "DateTime", "String", "Text", "Integer", "Float", "Boolean", "DateTime", "String", "Text",
"TSVectorType",
"TypeDecorator", "TypeDecorator",
"JSONB", "Double", "JSONB", "Double",
"MutableDict", "MutableList", "MutableDict", "MutableList",
......
...@@ -2,13 +2,10 @@ from gargantext.util.db import session ...@@ -2,13 +2,10 @@ from gargantext.util.db import session
from gargantext.util.files import upload from gargantext.util.files import upload
from gargantext.constants import * from gargantext.constants import *
# Uncomment to make column full text searchable
#from sqlalchemy_utils.types import TSVectorType
from datetime import datetime from datetime import datetime
from .base import Base, Column, ForeignKey, relationship, TypeDecorator, Index, \ from .base import Base, Column, ForeignKey, relationship, TypeDecorator, Index, \
Integer, Float, String, DateTime, JSONB, \ Integer, Float, String, DateTime, JSONB, TSVectorType, \
MutableList, MutableDict, validates, ValidatorMixin MutableList, MutableDict, validates, ValidatorMixin
from .users import User from .users import User
...@@ -60,9 +57,6 @@ class Node(ValidatorMixin, Base): ...@@ -60,9 +57,6 @@ class Node(ValidatorMixin, Base):
Index('nodes_user_id_typename_parent_id_idx', 'user_id', 'typename', 'parent_id'), Index('nodes_user_id_typename_parent_id_idx', 'user_id', 'typename', 'parent_id'),
Index('nodes_hyperdata_idx', 'hyperdata', postgresql_using='gin')) Index('nodes_hyperdata_idx', 'hyperdata', postgresql_using='gin'))
# TODO
# create INDEX full_text_idx on nodes using gin(to_tsvector('english', hyperdata ->> 'abstract' || 'title'));
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
typename = Column(NodeType, index=True) typename = Column(NodeType, index=True)
...@@ -78,10 +72,15 @@ class Node(ValidatorMixin, Base): ...@@ -78,10 +72,15 @@ class Node(ValidatorMixin, Base):
name = Column(String(255)) name = Column(String(255))
date = Column(DateTime(timezone=True), default=datetime.now) date = Column(DateTime(timezone=True), default=datetime.now)
hyperdata = Column(JSONB, default=dict) hyperdata = Column(JSONB, default=dict)
# metadata (see https://bashelton.com/2014/03/updating-postgresql-json-fields-via-sqlalchemy/)
# To make search possible uncomment the line below # Create a TSVECTOR column to use fulltext search feature of PostgreSQL.
#search_vector = Column(TSVectorType('hyperdata')) # We need to create a trigger to update this column on update and insert,
# it's created in alembic/version/1fb4405b59e1_add_english_fulltext_index_on_nodes_.py
#
# To use this column: session.query(DocumentNode) \
# .filter(Node.title_abstract.match('keyword'))
title_abstract = Column(TSVectorType(regconfig='english'))
def __new__(cls, *args, **kwargs): def __new__(cls, *args, **kwargs):
if cls is Node and kwargs.get('typename'): if cls is Node and kwargs.get('typename'):
......
...@@ -35,4 +35,6 @@ requests-futures==0.9.7 ...@@ -35,4 +35,6 @@ requests-futures==0.9.7
bs4==0.0.1 bs4==0.0.1
requests==2.10.0 requests==2.10.0
alembic>=0.9.2 alembic>=0.9.2
# SQLAlchemy-Searchable==0.10.4 SQLAlchemy==1.1.14
SQLAlchemy-Searchable==0.10.4
SQLAlchemy-Utils==0.32.16
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment