Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
cc7461f0
Commit
cc7461f0
authored
Sep 14, 2017
by
sim
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] Add Node.title_abstract to search in doc title and abstract
parent
1f5457df
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
69 additions
and
12 deletions
+69
-12
1fb4405b59e1_add_english_fulltext_index_on_nodes_.py
...ions/1fb4405b59e1_add_english_fulltext_index_on_nodes_.py
+54
-0
base.py
gargantext/models/base.py
+2
-0
nodes.py
gargantext/models/nodes.py
+10
-11
requirements.txt
install/gargamelle/requirements.txt
+3
-1
No files found.
alembic/versions/1fb4405b59e1_add_english_fulltext_index_on_nodes_.py
0 → 100644
View file @
cc7461f0
"""Add english fulltext index on Nodes.hyperdata for abstract and title
Revision ID: 1fb4405b59e1
Revises: bedce47c9e34
Create Date: 2017-09-13 16:31:36.926692
"""
from
alembic
import
op
import
sqlalchemy
as
sa
from
sqlalchemy_utils.types
import
TSVectorType
from
gargantext.util.alembic
import
ReplaceableObject
# revision identifiers, used by Alembic.
revision
=
'1fb4405b59e1'
down_revision
=
'bedce47c9e34'
branch_labels
=
None
depends_on
=
None
title_abstract_update_trigger
=
ReplaceableObject
(
'title_abstract_update_trigger()'
,
"""
RETURNS trigger AS $$
begin
new.title_abstract := to_tsvector('english', (new.hyperdata ->> 'title') || ' ' || (new.hyperdata ->> 'abstract'));
return new;
end
$$ LANGUAGE plpgsql;
"""
)
title_abstract_update
=
ReplaceableObject
(
'title_abstract_update'
,
'BEFORE INSERT OR UPDATE'
,
'nodes'
,
'FOR EACH ROW EXECUTE PROCEDURE title_abstract_update_trigger()'
)
def
upgrade
():
op
.
add_column
(
'nodes'
,
sa
.
Column
(
'title_abstract'
,
TSVectorType
))
op
.
create_sp
(
title_abstract_update_trigger
)
op
.
create_trigger
(
title_abstract_update
)
# Initialize index with already existing data
op
.
execute
(
'UPDATE nodes SET hyperdata = hyperdata'
);
def
downgrade
():
op
.
drop_trigger
(
title_abstract_update
)
op
.
drop_sp
(
title_abstract_update_trigger
)
op
.
drop_column
(
'nodes'
,
'title_abstract'
)
gargantext/models/base.py
View file @
cc7461f0
...
...
@@ -2,6 +2,7 @@ from sqlalchemy.schema import Column, ForeignKey, UniqueConstraint, Index
from
sqlalchemy.orm
import
relationship
,
validates
from
sqlalchemy.types
import
TypeDecorator
,
\
Integer
,
Float
,
Boolean
,
DateTime
,
String
,
Text
from
sqlalchemy_utils.types
import
TSVectorType
from
sqlalchemy.dialects.postgresql
import
JSONB
,
DOUBLE_PRECISION
as
Double
from
sqlalchemy.ext.mutable
import
MutableDict
,
MutableList
from
sqlalchemy.ext.declarative
import
declarative_base
...
...
@@ -9,6 +10,7 @@ from sqlalchemy.ext.declarative import declarative_base
__all__
=
[
"Column"
,
"ForeignKey"
,
"UniqueConstraint"
,
"Index"
,
"relationship"
,
"validates"
,
"ValidatorMixin"
,
"Integer"
,
"Float"
,
"Boolean"
,
"DateTime"
,
"String"
,
"Text"
,
"TSVectorType"
,
"TypeDecorator"
,
"JSONB"
,
"Double"
,
"MutableDict"
,
"MutableList"
,
...
...
gargantext/models/nodes.py
View file @
cc7461f0
...
...
@@ -2,13 +2,10 @@ from gargantext.util.db import session
from
gargantext.util.files
import
upload
from
gargantext.constants
import
*
# Uncomment to make column full text searchable
#from sqlalchemy_utils.types import TSVectorType
from
datetime
import
datetime
from
.base
import
Base
,
Column
,
ForeignKey
,
relationship
,
TypeDecorator
,
Index
,
\
Integer
,
Float
,
String
,
DateTime
,
JSONB
,
\
Integer
,
Float
,
String
,
DateTime
,
JSONB
,
TSVectorType
,
\
MutableList
,
MutableDict
,
validates
,
ValidatorMixin
from
.users
import
User
...
...
@@ -60,9 +57,6 @@ class Node(ValidatorMixin, Base):
Index
(
'nodes_user_id_typename_parent_id_idx'
,
'user_id'
,
'typename'
,
'parent_id'
),
Index
(
'nodes_hyperdata_idx'
,
'hyperdata'
,
postgresql_using
=
'gin'
))
# TODO
# create INDEX full_text_idx on nodes using gin(to_tsvector('english', hyperdata ->> 'abstract' || 'title'));
id
=
Column
(
Integer
,
primary_key
=
True
)
typename
=
Column
(
NodeType
,
index
=
True
)
...
...
@@ -78,10 +72,15 @@ class Node(ValidatorMixin, Base):
name
=
Column
(
String
(
255
))
date
=
Column
(
DateTime
(
timezone
=
True
),
default
=
datetime
.
now
)
hyperdata
=
Column
(
JSONB
,
default
=
dict
)
# metadata (see https://bashelton.com/2014/03/updating-postgresql-json-fields-via-sqlalchemy/)
# To make search possible uncomment the line below
#search_vector = Column(TSVectorType('hyperdata'))
hyperdata
=
Column
(
JSONB
,
default
=
dict
)
# Create a TSVECTOR column to use fulltext search feature of PostgreSQL.
# We need to create a trigger to update this column on update and insert,
# it's created in alembic/version/1fb4405b59e1_add_english_fulltext_index_on_nodes_.py
#
# To use this column: session.query(DocumentNode) \
# .filter(Node.title_abstract.match('keyword'))
title_abstract
=
Column
(
TSVectorType
(
regconfig
=
'english'
))
def
__new__
(
cls
,
*
args
,
**
kwargs
):
if
cls
is
Node
and
kwargs
.
get
(
'typename'
):
...
...
install/gargamelle/requirements.txt
View file @
cc7461f0
...
...
@@ -35,4 +35,6 @@ requests-futures==0.9.7
bs4==0.0.1
requests==2.10.0
alembic>=0.9.2
# SQLAlchemy-Searchable==0.10.4
SQLAlchemy==1.1.14
SQLAlchemy-Searchable==0.10.4
SQLAlchemy-Utils==0.32.16
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment