Commit e6a6f379 authored by sim's avatar sim

Model: rename typename, date, hyperdata of Node to type, created and data

parent 7110ae5b
......@@ -89,12 +89,12 @@ is represented as a JSON object with these fields:
| parent_id | int | Yes | Node id of this node's parent |
| name | string | Default = empty string | Label of this node, can be used for different purposes depending on its type |
| created | datetime as a string | Default = now | Creation date of this node |
| data | object | Default = {} | [Hyperdata](#hyperdata) of this node |
| data | object | Default = {} | [Data](#data) of this node |
| title_abstract | string | Automatic* | Only meaningful for DOCUMENT nodes: full-text index on title + abstract |
\* Please don't provide any value, database will generate one automatically.
#### Hyperdata
#### Data
Type specific data of each node is stored in the `data` field, as a JSON
object. For example documents abstract and title are stored here. There is no
......
......@@ -40,7 +40,7 @@ from gargantext.utils.lists import *
from gargantext.utils.dates import datetime, to_datetime
# types & models (nodes, lists, hyperdata, resource) ---------------------------------------------
# types & models (nodes, lists, data, resource) ---------------------------------------------
LISTTYPES = {
'DOCUMENT' : WeightedList,
'GROUPLIST' : Translations, # todo remove "LIST" from name
......
......@@ -69,7 +69,7 @@ class DatabasePipeline(object):
self.corpus = self.db.query(CorpusNode).filter_by(id=scraper.corpus).one_or_none()
def close_spider(self, scraper):
resources = self.corpus.hyperdata.get('resources', [])
resources = self.corpus.data.get('resources', [])
resources.append({
"date": datetime.now(),
# TODO Raw files storage and listing in paths
......@@ -81,14 +81,15 @@ class DatabasePipeline(object):
})
self.corpus['resources'] = resources
self.corpus.save_hyperdata()
self.corpus.save_data()
self.db.commit()
def process_item(self, item, scraper):
doc = DocumentNode(name=item.get('title')[:DocumentNode.NAME_MAXLEN],
title = item.get('title', '')[:DocumentNode.NAME_MAXLEN]
doc = DocumentNode(name=title,
parent_id=self.corpus.id,
hyperdata=dict(item))
data=dict(item))
self.corpus.related.append(doc)
......
......@@ -19,8 +19,8 @@ class NodeType(TypeDecorator):
Values are detailed in `gargantext.constants.NODETYPES`.
"""
impl = Integer
def process_bind_param(self, typename, dialect):
return NODETYPES.index(typename)
def process_bind_param(self, type, dialect):
return NODETYPES.index(type)
def process_result_value(self, typeindex, dialect):
return NODETYPES[typeindex]
......@@ -35,36 +35,36 @@ class Node(ValidatorMixin, Base):
The possible types are defined in `gargantext.constants.NODETYPES`.
Thanks to __new__ overriding and SQLAlchemy's polymorphism, every Node
instance is automagically casted to its sub-class, assuming a typename
instance is automagically casted to its sub-class, assuming a type
is specified.
>>> Node(name='without-type')
<Node(id=None, typename=None, user_id=None, parent_id=None, name='without-type', date=None)>
>>> Node(typename='CORPUS')
<CorpusNode(id=None, typename='CORPUS', user_id=None, parent_id=None, name=None, date=None)>
<Node(id=None, type=None, user_id=None, parent_id=None, name='without-type')>
>>> Node(type='CORPUS')
<CorpusNode(id=None, type='CORPUS', user_id=None, parent_id=None, name=None)>
>>> from gargantext.core.db import session
>>> session.query(Node).filter_by(typename='USER').first() # doctest: +ELLIPSIS
>>> session.query(Node).filter_by(type='USER').first() # doctest: +ELLIPSIS
<UserNode(...)>
But beware, there are some pitfalls with bulk queries. In this case typename
But beware, there are some pitfalls with bulk queries. In this case type
MUST be specified manually.
>>> session.query(UserNode).delete() # doctest: +SKIP
# Wrong: all nodes are deleted!
>>> session.query(UserNode).filter_by(typename='USER').delete() # doctest: +SKIP
>>> session.query(UserNode).filter_by(type='USER').delete() # doctest: +SKIP
# Right: only user nodes are deleted.
"""
NAME_MAXLEN = 255
__tablename__ = 'nodes'
__table_args__ = (
Index('nodes_user_id_typename_parent_id_idx', 'user_id', 'typename', 'parent_id'),
Index('nodes_hyperdata_idx', 'hyperdata', postgresql_using='gin'))
Index('nodes_user_id_type_parent_id_idx', 'user_id', 'type', 'parent_id'),
Index('nodes_data_idx', 'data', postgresql_using='gin'))
id = Column(Integer, primary_key=True)
typename = Column(NodeType, index=True, nullable=False)
__mapper_args__ = { 'polymorphic_on': typename }
type = Column(NodeType, index=True, nullable=False)
__mapper_args__ = { 'polymorphic_on': type }
# foreign keys
user_id = Column(Integer, ForeignKey(User.id, ondelete='CASCADE'),
......@@ -76,11 +76,11 @@ class Node(ValidatorMixin, Base):
parent = relationship('Node', remote_side=[id])
name = Column(String(NAME_MAXLEN), nullable=False, server_default='')
date = Column(DateTime(timezone=True), nullable=False,
server_default=text('CURRENT_TIMESTAMP'))
created = Column(DateTime(timezone=True), nullable=False,
server_default=text('CURRENT_TIMESTAMP'))
hyperdata = Column(JSONB, default=dict, nullable=False,
server_default=text("'{}'::jsonb"))
data = Column(JSONB, default=dict, nullable=False,
server_default=text("'{}'::jsonb"))
# Create a TSVECTOR column to use fulltext search feature of PostgreSQL.
# We need to create a trigger to update this column on update and insert,
......@@ -96,51 +96,51 @@ class Node(ValidatorMixin, Base):
secondaryjoin='NodeNode.node2_id==Node.id')
def __new__(cls, *args, **kwargs):
if cls is Node and kwargs.get('typename'):
typename = kwargs.pop('typename')
return _NODE_MODELS[typename](*args, **kwargs)
if cls is Node and kwargs.get('type'):
type = kwargs.pop('type')
return _NODE_MODELS[type](*args, **kwargs)
return super(Node, cls).__new__(cls)
def __init__(self, **kwargs):
"""Node's constructor.
Initialize the `hyperdata` as a dictionary if no value was given.
Initialize the `data` as a dictionary if no value was given.
"""
if 'hyperdata' not in kwargs:
kwargs['hyperdata'] = kwargs.get('hyperdata', MutableDict())
if 'data' not in kwargs:
kwargs['data'] = kwargs.get('data', MutableDict())
Base.__init__(self, **kwargs)
def __getitem__(self, key):
"""Allow direct access to hyperdata via the bracket operator."""
return self.hyperdata[key]
"""Allow direct access to data via the bracket operator."""
return self.data[key]
def __setitem__(self, key, value):
"""Allow direct access to hyperdata via the bracket operator."""
old_value = self.hyperdata.get(key, _UNDEFINED)
"""Allow direct access to data via the bracket operator."""
old_value = self.data.get(key, _UNDEFINED)
if old_value != value:
self.hyperdata[key] = value
flag_modified(self, 'hyperdata')
self.data[key] = value
flag_modified(self, 'data')
def __delitem(self, key):
"""Allow direct access to hyperdata via the bracket operator."""
if key in self.hyperdata:
del self.hyperdata[key]
flag_modified(self, 'hyperdata')
"""Allow direct access to data via the bracket operator."""
if key in self.data:
del self.data[key]
flag_modified(self, 'data')
def __repr__(self):
return '<{0.__class__.__name__}(id={0.id}, typename={0.typename!r}, ' \
return '<{0.__class__.__name__}(id={0.id}, type={0.type!r}, ' \
'user_id={0.user_id}, parent_id={0.parent_id}, ' \
'name={0.name!r}, date={0.date})>'.format(self)
'name={0.name!r}, created={0.created})>'.format(self)
@validates('name')
def validate_name(self, key, value):
return self.enforce_length(key, value)
def save_hyperdata(self):
def save_data(self):
"""This is a necessary, yet ugly trick.
Indeed, PostgreSQL does not yet manage incremental updates (see
https://bashelton.com/2014/03/updating-postgresql-json-fields-via-sqlalchemy/)
"""
flag_modified(self, 'hyperdata')
flag_modified(self, 'data')
class DocumentNode(Node):
......@@ -163,7 +163,7 @@ class CorpusNode(Node):
'type': 1,
'url': None}]
"""
if 'resources' not in self.hyperdata:
if 'resources' not in self.data:
self['resources'] = MutableList()
return self['resources']
......@@ -206,7 +206,7 @@ class NodeNode(Base):
#
# We could manually write a class for every NodeType, or find a way to
# tell SQLAlchemy that it should stick to instantiate a Node when a
# class is not defined for the wanted typename.
# class is not defined for the wanted type.
_ALREADY_IMPLEMENTED_NODE_TYPES = \
set(cls.__mapper_args__.get('polymorphic_identity') for cls in Node.__subclasses__())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment