Commit e6a6f379 authored by sim's avatar sim

Model: rename typename, date, hyperdata of Node to type, created and data

parent 7110ae5b
......@@ -89,12 +89,12 @@ is represented as a JSON object with these fields:
| parent_id | int | Yes | Node id of this node's parent |
| name | string | Default = empty string | Label of this node, can be used for different purposes depending on its type |
| created | datetime as a string | Default = now | Creation date of this node |
| data | object | Default = {} | [Hyperdata](#hyperdata) of this node |
| data | object | Default = {} | [Data](#data) of this node |
| title_abstract | string | Automatic* | Only meaningful for DOCUMENT nodes: full-text index on title + abstract |
\* Please don't provide any value, database will generate one automatically.
#### Hyperdata
#### Data
Type specific data of each node is stored in the `data` field, as a JSON
object. For example documents abstract and title are stored here. There is no
......@@ -40,7 +40,7 @@ from gargantext.utils.lists import *
from gargantext.utils.dates import datetime, to_datetime
# types & models (nodes, lists, hyperdata, resource) ---------------------------------------------
# types & models (nodes, lists, data, resource) ---------------------------------------------
'DOCUMENT' : WeightedList,
'GROUPLIST' : Translations, # todo remove "LIST" from name
......@@ -69,7 +69,7 @@ class DatabasePipeline(object):
self.corpus = self.db.query(CorpusNode).filter_by(id=scraper.corpus).one_or_none()
def close_spider(self, scraper):
resources = self.corpus.hyperdata.get('resources', [])
resources ='resources', [])
# TODO Raw files storage and listing in paths
......@@ -81,14 +81,15 @@ class DatabasePipeline(object):
self.corpus['resources'] = resources
def process_item(self, item, scraper):
doc = DocumentNode(name=item.get('title')[:DocumentNode.NAME_MAXLEN],
title = item.get('title', '')[:DocumentNode.NAME_MAXLEN]
doc = DocumentNode(name=title,,
......@@ -19,8 +19,8 @@ class NodeType(TypeDecorator):
Values are detailed in `gargantext.constants.NODETYPES`.
impl = Integer
def process_bind_param(self, typename, dialect):
return NODETYPES.index(typename)
def process_bind_param(self, type, dialect):
return NODETYPES.index(type)
def process_result_value(self, typeindex, dialect):
return NODETYPES[typeindex]
......@@ -35,36 +35,36 @@ class Node(ValidatorMixin, Base):
The possible types are defined in `gargantext.constants.NODETYPES`.
Thanks to __new__ overriding and SQLAlchemy's polymorphism, every Node
instance is automagically casted to its sub-class, assuming a typename
instance is automagically casted to its sub-class, assuming a type
is specified.
>>> Node(name='without-type')
<Node(id=None, typename=None, user_id=None, parent_id=None, name='without-type', date=None)>
>>> Node(typename='CORPUS')
<CorpusNode(id=None, typename='CORPUS', user_id=None, parent_id=None, name=None, date=None)>
<Node(id=None, type=None, user_id=None, parent_id=None, name='without-type')>
>>> Node(type='CORPUS')
<CorpusNode(id=None, type='CORPUS', user_id=None, parent_id=None, name=None)>
>>> from gargantext.core.db import session
>>> session.query(Node).filter_by(typename='USER').first() # doctest: +ELLIPSIS
>>> session.query(Node).filter_by(type='USER').first() # doctest: +ELLIPSIS
But beware, there are some pitfalls with bulk queries. In this case typename
But beware, there are some pitfalls with bulk queries. In this case type
MUST be specified manually.
>>> session.query(UserNode).delete() # doctest: +SKIP
# Wrong: all nodes are deleted!
>>> session.query(UserNode).filter_by(typename='USER').delete() # doctest: +SKIP
>>> session.query(UserNode).filter_by(type='USER').delete() # doctest: +SKIP
# Right: only user nodes are deleted.
__tablename__ = 'nodes'
__table_args__ = (
Index('nodes_user_id_typename_parent_id_idx', 'user_id', 'typename', 'parent_id'),
Index('nodes_hyperdata_idx', 'hyperdata', postgresql_using='gin'))
Index('nodes_user_id_type_parent_id_idx', 'user_id', 'type', 'parent_id'),
Index('nodes_data_idx', 'data', postgresql_using='gin'))
id = Column(Integer, primary_key=True)
typename = Column(NodeType, index=True, nullable=False)
__mapper_args__ = { 'polymorphic_on': typename }
type = Column(NodeType, index=True, nullable=False)
__mapper_args__ = { 'polymorphic_on': type }
# foreign keys
user_id = Column(Integer, ForeignKey(, ondelete='CASCADE'),
......@@ -76,11 +76,11 @@ class Node(ValidatorMixin, Base):
parent = relationship('Node', remote_side=[id])
name = Column(String(NAME_MAXLEN), nullable=False, server_default='')
date = Column(DateTime(timezone=True), nullable=False,
created = Column(DateTime(timezone=True), nullable=False,
hyperdata = Column(JSONB, default=dict, nullable=False,
data = Column(JSONB, default=dict, nullable=False,
# Create a TSVECTOR column to use fulltext search feature of PostgreSQL.
# We need to create a trigger to update this column on update and insert,
......@@ -96,51 +96,51 @@ class Node(ValidatorMixin, Base):
def __new__(cls, *args, **kwargs):
if cls is Node and kwargs.get('typename'):
typename = kwargs.pop('typename')
return _NODE_MODELS[typename](*args, **kwargs)
if cls is Node and kwargs.get('type'):
type = kwargs.pop('type')
return _NODE_MODELS[type](*args, **kwargs)
return super(Node, cls).__new__(cls)
def __init__(self, **kwargs):
"""Node's constructor.
Initialize the `hyperdata` as a dictionary if no value was given.
Initialize the `data` as a dictionary if no value was given.
if 'hyperdata' not in kwargs:
kwargs['hyperdata'] = kwargs.get('hyperdata', MutableDict())
if 'data' not in kwargs:
kwargs['data'] = kwargs.get('data', MutableDict())
Base.__init__(self, **kwargs)
def __getitem__(self, key):
"""Allow direct access to hyperdata via the bracket operator."""
return self.hyperdata[key]
"""Allow direct access to data via the bracket operator."""
def __setitem__(self, key, value):
"""Allow direct access to hyperdata via the bracket operator."""
old_value = self.hyperdata.get(key, _UNDEFINED)
"""Allow direct access to data via the bracket operator."""
old_value =, _UNDEFINED)
if old_value != value:
self.hyperdata[key] = value
flag_modified(self, 'hyperdata')[key] = value
flag_modified(self, 'data')
def __delitem(self, key):
"""Allow direct access to hyperdata via the bracket operator."""
if key in self.hyperdata:
del self.hyperdata[key]
flag_modified(self, 'hyperdata')
"""Allow direct access to data via the bracket operator."""
if key in
flag_modified(self, 'data')
def __repr__(self):
return '<{0.__class__.__name__}(id={}, typename={0.typename!r}, ' \
return '<{0.__class__.__name__}(id={}, type={0.type!r}, ' \
'user_id={0.user_id}, parent_id={0.parent_id}, ' \
'name={!r}, date={})>'.format(self)
'name={!r}, created={0.created})>'.format(self)
def validate_name(self, key, value):
return self.enforce_length(key, value)
def save_hyperdata(self):
def save_data(self):
"""This is a necessary, yet ugly trick.
Indeed, PostgreSQL does not yet manage incremental updates (see
flag_modified(self, 'hyperdata')
flag_modified(self, 'data')
class DocumentNode(Node):
......@@ -163,7 +163,7 @@ class CorpusNode(Node):
'type': 1,
'url': None}]
if 'resources' not in self.hyperdata:
if 'resources' not in
self['resources'] = MutableList()
return self['resources']
......@@ -206,7 +206,7 @@ class NodeNode(Base):
# We could manually write a class for every NodeType, or find a way to
# tell SQLAlchemy that it should stick to instantiate a Node when a
# class is not defined for the wanted typename.
# class is not defined for the wanted type.
set(cls.__mapper_args__.get('polymorphic_identity') for cls in Node.__subclasses__())
