Commit e6a6f379 authored by sim's avatar sim

Model: rename typename, date, hyperdata of Node to type, created and data

parent 7110ae5b
...@@ -89,12 +89,12 @@ is represented as a JSON object with these fields: ...@@ -89,12 +89,12 @@ is represented as a JSON object with these fields:
| parent_id | int | Yes | Node id of this node's parent | | parent_id | int | Yes | Node id of this node's parent |
| name | string | Default = empty string | Label of this node, can be used for different purposes depending on its type | | name | string | Default = empty string | Label of this node, can be used for different purposes depending on its type |
| created | datetime as a string | Default = now | Creation date of this node | | created | datetime as a string | Default = now | Creation date of this node |
| data | object | Default = {} | [Hyperdata](#hyperdata) of this node | | data | object | Default = {} | [Data](#data) of this node |
| title_abstract | string | Automatic* | Only meaningful for DOCUMENT nodes: full-text index on title + abstract | | title_abstract | string | Automatic* | Only meaningful for DOCUMENT nodes: full-text index on title + abstract |
\* Please don't provide any value, database will generate one automatically. \* Please don't provide any value, database will generate one automatically.
#### Hyperdata #### Data
Type specific data of each node is stored in the `data` field, as a JSON Type specific data of each node is stored in the `data` field, as a JSON
object. For example documents abstract and title are stored here. There is no object. For example documents abstract and title are stored here. There is no
......
...@@ -40,7 +40,7 @@ from gargantext.utils.lists import * ...@@ -40,7 +40,7 @@ from gargantext.utils.lists import *
from gargantext.utils.dates import datetime, to_datetime from gargantext.utils.dates import datetime, to_datetime
# types & models (nodes, lists, hyperdata, resource) --------------------------------------------- # types & models (nodes, lists, data, resource) ---------------------------------------------
LISTTYPES = { LISTTYPES = {
'DOCUMENT' : WeightedList, 'DOCUMENT' : WeightedList,
'GROUPLIST' : Translations, # todo remove "LIST" from name 'GROUPLIST' : Translations, # todo remove "LIST" from name
......
...@@ -69,7 +69,7 @@ class DatabasePipeline(object): ...@@ -69,7 +69,7 @@ class DatabasePipeline(object):
self.corpus = self.db.query(CorpusNode).filter_by(id=scraper.corpus).one_or_none() self.corpus = self.db.query(CorpusNode).filter_by(id=scraper.corpus).one_or_none()
def close_spider(self, scraper): def close_spider(self, scraper):
resources = self.corpus.hyperdata.get('resources', []) resources = self.corpus.data.get('resources', [])
resources.append({ resources.append({
"date": datetime.now(), "date": datetime.now(),
# TODO Raw files storage and listing in paths # TODO Raw files storage and listing in paths
...@@ -81,14 +81,15 @@ class DatabasePipeline(object): ...@@ -81,14 +81,15 @@ class DatabasePipeline(object):
}) })
self.corpus['resources'] = resources self.corpus['resources'] = resources
self.corpus.save_hyperdata() self.corpus.save_data()
self.db.commit() self.db.commit()
def process_item(self, item, scraper): def process_item(self, item, scraper):
doc = DocumentNode(name=item.get('title')[:DocumentNode.NAME_MAXLEN], title = item.get('title', '')[:DocumentNode.NAME_MAXLEN]
doc = DocumentNode(name=title,
parent_id=self.corpus.id, parent_id=self.corpus.id,
hyperdata=dict(item)) data=dict(item))
self.corpus.related.append(doc) self.corpus.related.append(doc)
......
...@@ -19,8 +19,8 @@ class NodeType(TypeDecorator): ...@@ -19,8 +19,8 @@ class NodeType(TypeDecorator):
Values are detailed in `gargantext.constants.NODETYPES`. Values are detailed in `gargantext.constants.NODETYPES`.
""" """
impl = Integer impl = Integer
def process_bind_param(self, typename, dialect): def process_bind_param(self, type, dialect):
return NODETYPES.index(typename) return NODETYPES.index(type)
def process_result_value(self, typeindex, dialect): def process_result_value(self, typeindex, dialect):
return NODETYPES[typeindex] return NODETYPES[typeindex]
...@@ -35,36 +35,36 @@ class Node(ValidatorMixin, Base): ...@@ -35,36 +35,36 @@ class Node(ValidatorMixin, Base):
The possible types are defined in `gargantext.constants.NODETYPES`. The possible types are defined in `gargantext.constants.NODETYPES`.
Thanks to __new__ overriding and SQLAlchemy's polymorphism, every Node Thanks to __new__ overriding and SQLAlchemy's polymorphism, every Node
instance is automagically casted to its sub-class, assuming a typename instance is automagically casted to its sub-class, assuming a type
is specified. is specified.
>>> Node(name='without-type') >>> Node(name='without-type')
<Node(id=None, typename=None, user_id=None, parent_id=None, name='without-type', date=None)> <Node(id=None, type=None, user_id=None, parent_id=None, name='without-type')>
>>> Node(typename='CORPUS') >>> Node(type='CORPUS')
<CorpusNode(id=None, typename='CORPUS', user_id=None, parent_id=None, name=None, date=None)> <CorpusNode(id=None, type='CORPUS', user_id=None, parent_id=None, name=None)>
>>> from gargantext.core.db import session >>> from gargantext.core.db import session
>>> session.query(Node).filter_by(typename='USER').first() # doctest: +ELLIPSIS >>> session.query(Node).filter_by(type='USER').first() # doctest: +ELLIPSIS
<UserNode(...)> <UserNode(...)>
But beware, there are some pitfalls with bulk queries. In this case typename But beware, there are some pitfalls with bulk queries. In this case type
MUST be specified manually. MUST be specified manually.
>>> session.query(UserNode).delete() # doctest: +SKIP >>> session.query(UserNode).delete() # doctest: +SKIP
# Wrong: all nodes are deleted! # Wrong: all nodes are deleted!
>>> session.query(UserNode).filter_by(typename='USER').delete() # doctest: +SKIP >>> session.query(UserNode).filter_by(type='USER').delete() # doctest: +SKIP
# Right: only user nodes are deleted. # Right: only user nodes are deleted.
""" """
NAME_MAXLEN = 255 NAME_MAXLEN = 255
__tablename__ = 'nodes' __tablename__ = 'nodes'
__table_args__ = ( __table_args__ = (
Index('nodes_user_id_typename_parent_id_idx', 'user_id', 'typename', 'parent_id'), Index('nodes_user_id_type_parent_id_idx', 'user_id', 'type', 'parent_id'),
Index('nodes_hyperdata_idx', 'hyperdata', postgresql_using='gin')) Index('nodes_data_idx', 'data', postgresql_using='gin'))
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
typename = Column(NodeType, index=True, nullable=False) type = Column(NodeType, index=True, nullable=False)
__mapper_args__ = { 'polymorphic_on': typename } __mapper_args__ = { 'polymorphic_on': type }
# foreign keys # foreign keys
user_id = Column(Integer, ForeignKey(User.id, ondelete='CASCADE'), user_id = Column(Integer, ForeignKey(User.id, ondelete='CASCADE'),
...@@ -76,11 +76,11 @@ class Node(ValidatorMixin, Base): ...@@ -76,11 +76,11 @@ class Node(ValidatorMixin, Base):
parent = relationship('Node', remote_side=[id]) parent = relationship('Node', remote_side=[id])
name = Column(String(NAME_MAXLEN), nullable=False, server_default='') name = Column(String(NAME_MAXLEN), nullable=False, server_default='')
date = Column(DateTime(timezone=True), nullable=False, created = Column(DateTime(timezone=True), nullable=False,
server_default=text('CURRENT_TIMESTAMP')) server_default=text('CURRENT_TIMESTAMP'))
hyperdata = Column(JSONB, default=dict, nullable=False, data = Column(JSONB, default=dict, nullable=False,
server_default=text("'{}'::jsonb")) server_default=text("'{}'::jsonb"))
# Create a TSVECTOR column to use fulltext search feature of PostgreSQL. # Create a TSVECTOR column to use fulltext search feature of PostgreSQL.
# We need to create a trigger to update this column on update and insert, # We need to create a trigger to update this column on update and insert,
...@@ -96,51 +96,51 @@ class Node(ValidatorMixin, Base): ...@@ -96,51 +96,51 @@ class Node(ValidatorMixin, Base):
secondaryjoin='NodeNode.node2_id==Node.id') secondaryjoin='NodeNode.node2_id==Node.id')
def __new__(cls, *args, **kwargs): def __new__(cls, *args, **kwargs):
if cls is Node and kwargs.get('typename'): if cls is Node and kwargs.get('type'):
typename = kwargs.pop('typename') type = kwargs.pop('type')
return _NODE_MODELS[typename](*args, **kwargs) return _NODE_MODELS[type](*args, **kwargs)
return super(Node, cls).__new__(cls) return super(Node, cls).__new__(cls)
def __init__(self, **kwargs): def __init__(self, **kwargs):
"""Node's constructor. """Node's constructor.
Initialize the `hyperdata` as a dictionary if no value was given. Initialize the `data` as a dictionary if no value was given.
""" """
if 'hyperdata' not in kwargs: if 'data' not in kwargs:
kwargs['hyperdata'] = kwargs.get('hyperdata', MutableDict()) kwargs['data'] = kwargs.get('data', MutableDict())
Base.__init__(self, **kwargs) Base.__init__(self, **kwargs)
def __getitem__(self, key): def __getitem__(self, key):
"""Allow direct access to hyperdata via the bracket operator.""" """Allow direct access to data via the bracket operator."""
return self.hyperdata[key] return self.data[key]
def __setitem__(self, key, value): def __setitem__(self, key, value):
"""Allow direct access to hyperdata via the bracket operator.""" """Allow direct access to data via the bracket operator."""
old_value = self.hyperdata.get(key, _UNDEFINED) old_value = self.data.get(key, _UNDEFINED)
if old_value != value: if old_value != value:
self.hyperdata[key] = value self.data[key] = value
flag_modified(self, 'hyperdata') flag_modified(self, 'data')
def __delitem(self, key): def __delitem(self, key):
"""Allow direct access to hyperdata via the bracket operator.""" """Allow direct access to data via the bracket operator."""
if key in self.hyperdata: if key in self.data:
del self.hyperdata[key] del self.data[key]
flag_modified(self, 'hyperdata') flag_modified(self, 'data')
def __repr__(self): def __repr__(self):
return '<{0.__class__.__name__}(id={0.id}, typename={0.typename!r}, ' \ return '<{0.__class__.__name__}(id={0.id}, type={0.type!r}, ' \
'user_id={0.user_id}, parent_id={0.parent_id}, ' \ 'user_id={0.user_id}, parent_id={0.parent_id}, ' \
'name={0.name!r}, date={0.date})>'.format(self) 'name={0.name!r}, created={0.created})>'.format(self)
@validates('name') @validates('name')
def validate_name(self, key, value): def validate_name(self, key, value):
return self.enforce_length(key, value) return self.enforce_length(key, value)
def save_hyperdata(self): def save_data(self):
"""This is a necessary, yet ugly trick. """This is a necessary, yet ugly trick.
Indeed, PostgreSQL does not yet manage incremental updates (see Indeed, PostgreSQL does not yet manage incremental updates (see
https://bashelton.com/2014/03/updating-postgresql-json-fields-via-sqlalchemy/) https://bashelton.com/2014/03/updating-postgresql-json-fields-via-sqlalchemy/)
""" """
flag_modified(self, 'hyperdata') flag_modified(self, 'data')
class DocumentNode(Node): class DocumentNode(Node):
...@@ -163,7 +163,7 @@ class CorpusNode(Node): ...@@ -163,7 +163,7 @@ class CorpusNode(Node):
'type': 1, 'type': 1,
'url': None}] 'url': None}]
""" """
if 'resources' not in self.hyperdata: if 'resources' not in self.data:
self['resources'] = MutableList() self['resources'] = MutableList()
return self['resources'] return self['resources']
...@@ -206,7 +206,7 @@ class NodeNode(Base): ...@@ -206,7 +206,7 @@ class NodeNode(Base):
# #
# We could manually write a class for every NodeType, or find a way to # We could manually write a class for every NodeType, or find a way to
# tell SQLAlchemy that it should stick to instantiate a Node when a # tell SQLAlchemy that it should stick to instantiate a Node when a
# class is not defined for the wanted typename. # class is not defined for the wanted type.
_ALREADY_IMPLEMENTED_NODE_TYPES = \ _ALREADY_IMPLEMENTED_NODE_TYPES = \
set(cls.__mapper_args__.get('polymorphic_identity') for cls in Node.__subclasses__()) set(cls.__mapper_args__.get('polymorphic_identity') for cls in Node.__subclasses__())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment