Model: rename typename, date, hyperdata of Node to type, created and data

e6a6f379 · sim · 7110ae5b · e6a6f379 · e6a6f379 · e6a6f379
Commit e6a6f379 authored Mar 29, 2018 by sim
Showing with 47 additions and 46 deletions

REST_API.md doc/REST_API.md +2 -2

constants.py gargantext/constants.py +1 -1

pipelines.py gargantext/datasource/pipelines.py +5 -4

nodes.py gargantext/models/nodes.py +39 -39

No files found.
--- a/doc/REST_API.md
+++ b/doc/REST_API.md
@@ -89,12 +89,12 @@ is represented as a JSON object with these fields:
 |      parent_id | int                  | Yes                    | Node id of this node's parent                                                |
 |           name | string               | Default = empty string | Label of this node, can be used for different purposes depending on its type |
 |        created | datetime as a string | Default = now          | Creation date of this node                                                   |
-|           data | object               | Default = {}           | [Hyperdata](#hyperdata) of this node                                         |
+|           data | object               | Default = {}           | [Data](#data) of this node                                         |
 | title_abstract | string               | Automatic*             | Only meaningful for DOCUMENT nodes: full-text index on title + abstract      |

 \* Please don't provide any value, database will generate one automatically.

-#### Hyperdata
+#### Data

 Type specific data of each node is stored in the `data` field, as a JSON
 object. For example documents abstract and title are stored here. There is no

--- a/gargantext/constants.py
+++ b/gargantext/constants.py
@@ -40,7 +40,7 @@ from gargantext.utils.lists import *
 from gargantext.utils.dates import datetime, to_datetime


-# types & models (nodes, lists, hyperdata, resource) ---------------------------------------------
+# types & models (nodes, lists, data, resource) ---------------------------------------------
 LISTTYPES = {
    'DOCUMENT'     : WeightedList,
    'GROUPLIST'    : Translations,   # todo remove "LIST" from name

--- a/gargantext/datasource/pipelines.py
+++ b/gargantext/datasource/pipelines.py
@@ -69,7 +69,7 @@ class DatabasePipeline(object):
        self.corpus = self.db.query(CorpusNode).filter_by(id=scraper.corpus).one_or_none()

    def close_spider(self, scraper):
-        resources = self.corpus.hyperdata.get('resources', [])
+        resources = self.corpus.data.get('resources', [])
        resources.append({
            "date": datetime.now(),
            # TODO Raw files storage and listing in paths
@@ -81,14 +81,15 @@ class DatabasePipeline(object):
        })

        self.corpus['resources'] = resources
-        self.corpus.save_hyperdata()
+        self.corpus.save_data()

        self.db.commit()

    def process_item(self, item, scraper):
-        doc = DocumentNode(name=item.get('title')[:DocumentNode.NAME_MAXLEN],
+        title = item.get('title', '')[:DocumentNode.NAME_MAXLEN]
+        doc = DocumentNode(name=title,
                           parent_id=self.corpus.id,
-                           hyperdata=dict(item))
+                           data=dict(item))

        self.corpus.related.append(doc)


--- a/gargantext/models/nodes.py
+++ b/gargantext/models/nodes.py
@@ -19,8 +19,8 @@ class NodeType(TypeDecorator):
    Values are detailed in `gargantext.constants.NODETYPES`.
    """
    impl = Integer
-    def process_bind_param(self, typename, dialect):
-        return NODETYPES.index(typename)
+    def process_bind_param(self, type, dialect):
+        return NODETYPES.index(type)
    def process_result_value(self, typeindex, dialect):
        return NODETYPES[typeindex]

@@ -35,36 +35,36 @@ class Node(ValidatorMixin, Base):
    The possible types are defined in `gargantext.constants.NODETYPES`.

    Thanks to __new__ overriding and SQLAlchemy's polymorphism, every Node
-    instance is automagically casted to its sub-class, assuming a typename
+    instance is automagically casted to its sub-class, assuming a type
    is specified.

    >>> Node(name='without-type')
-    <Node(id=None, typename=None, user_id=None, parent_id=None, name='without-type', date=None)>
-    >>> Node(typename='CORPUS')
-    <CorpusNode(id=None, typename='CORPUS', user_id=None, parent_id=None, name=None, date=None)>
+    <Node(id=None, type=None, user_id=None, parent_id=None, name='without-type')>
+    >>> Node(type='CORPUS')
+    <CorpusNode(id=None, type='CORPUS', user_id=None, parent_id=None, name=None)>
    >>> from gargantext.core.db import session
-    >>> session.query(Node).filter_by(typename='USER').first() # doctest: +ELLIPSIS
+    >>> session.query(Node).filter_by(type='USER').first() # doctest: +ELLIPSIS
    <UserNode(...)>

-    But beware, there are some pitfalls with bulk queries. In this case typename
+    But beware, there are some pitfalls with bulk queries. In this case type
    MUST be specified manually.

    >>> session.query(UserNode).delete() # doctest: +SKIP
    # Wrong: all nodes are deleted!
-    >>> session.query(UserNode).filter_by(typename='USER').delete() # doctest: +SKIP
+    >>> session.query(UserNode).filter_by(type='USER').delete() # doctest: +SKIP
    # Right: only user nodes are deleted.
    """
    NAME_MAXLEN = 255

    __tablename__ = 'nodes'
    __table_args__ = (
-            Index('nodes_user_id_typename_parent_id_idx', 'user_id', 'typename', 'parent_id'),
-            Index('nodes_hyperdata_idx', 'hyperdata', postgresql_using='gin'))
+            Index('nodes_user_id_type_parent_id_idx', 'user_id', 'type', 'parent_id'),
+            Index('nodes_data_idx', 'data', postgresql_using='gin'))

    id = Column(Integer, primary_key=True)

-    typename = Column(NodeType, index=True, nullable=False)
-    __mapper_args__ = { 'polymorphic_on': typename }
+    type = Column(NodeType, index=True, nullable=False)
+    __mapper_args__ = { 'polymorphic_on': type }

    # foreign keys
    user_id       = Column(Integer, ForeignKey(User.id, ondelete='CASCADE'),
@@ -76,11 +76,11 @@ class Node(ValidatorMixin, Base):
    parent        = relationship('Node', remote_side=[id])

    name = Column(String(NAME_MAXLEN), nullable=False, server_default='')
-    date = Column(DateTime(timezone=True), nullable=False,
-                  server_default=text('CURRENT_TIMESTAMP'))
+    created = Column(DateTime(timezone=True), nullable=False,
+                     server_default=text('CURRENT_TIMESTAMP'))

-    hyperdata = Column(JSONB, default=dict, nullable=False,
-                       server_default=text("'{}'::jsonb"))
+    data = Column(JSONB, default=dict, nullable=False,
+                  server_default=text("'{}'::jsonb"))

    # Create a TSVECTOR column to use fulltext search feature of PostgreSQL.
    # We need to create a trigger to update this column on update and insert,
@@ -96,51 +96,51 @@ class Node(ValidatorMixin, Base):
                           secondaryjoin='NodeNode.node2_id==Node.id')

    def __new__(cls, *args, **kwargs):
-        if cls is Node and kwargs.get('typename'):
-            typename = kwargs.pop('typename')
-            return _NODE_MODELS[typename](*args, **kwargs)
+        if cls is Node and kwargs.get('type'):
+            type = kwargs.pop('type')
+            return _NODE_MODELS[type](*args, **kwargs)
        return super(Node, cls).__new__(cls)

    def __init__(self, **kwargs):
        """Node's constructor.
-        Initialize the `hyperdata` as a dictionary if no value was given.
+        Initialize the `data` as a dictionary if no value was given.
        """
-        if 'hyperdata' not in kwargs:
-            kwargs['hyperdata'] = kwargs.get('hyperdata', MutableDict())
+        if 'data' not in kwargs:
+            kwargs['data'] = kwargs.get('data', MutableDict())
        Base.__init__(self, **kwargs)

    def __getitem__(self, key):
-        """Allow direct access to hyperdata via the bracket operator."""
-        return self.hyperdata[key]
+        """Allow direct access to data via the bracket operator."""
+        return self.data[key]

    def __setitem__(self, key, value):
-        """Allow direct access to hyperdata via the bracket operator."""
-        old_value = self.hyperdata.get(key, _UNDEFINED)
+        """Allow direct access to data via the bracket operator."""
+        old_value = self.data.get(key, _UNDEFINED)
        if old_value != value:
-            self.hyperdata[key] = value
-            flag_modified(self, 'hyperdata')
+            self.data[key] = value
+            flag_modified(self, 'data')

    def __delitem(self, key):
-        """Allow direct access to hyperdata via the bracket operator."""
-        if key in self.hyperdata:
-            del self.hyperdata[key]
-            flag_modified(self, 'hyperdata')
+        """Allow direct access to data via the bracket operator."""
+        if key in self.data:
+            del self.data[key]
+            flag_modified(self, 'data')

    def __repr__(self):
-        return '<{0.__class__.__name__}(id={0.id}, typename={0.typename!r}, ' \
+        return '<{0.__class__.__name__}(id={0.id}, type={0.type!r}, ' \
               'user_id={0.user_id}, parent_id={0.parent_id}, ' \
-               'name={0.name!r}, date={0.date})>'.format(self)
+               'name={0.name!r}, created={0.created})>'.format(self)

    @validates('name')
    def validate_name(self, key, value):
        return self.enforce_length(key, value)

-    def save_hyperdata(self):
+    def save_data(self):
        """This is a necessary, yet ugly trick.
        Indeed, PostgreSQL does not yet manage incremental updates (see
        https://bashelton.com/2014/03/updating-postgresql-json-fields-via-sqlalchemy/)
        """
-        flag_modified(self, 'hyperdata')
+        flag_modified(self, 'data')


 class DocumentNode(Node):
@@ -163,7 +163,7 @@ class CorpusNode(Node):
          'type': 1,
          'url': None}]
        """
-        if 'resources' not in self.hyperdata:
+        if 'resources' not in self.data:
            self['resources'] = MutableList()
        return self['resources']

@@ -206,7 +206,7 @@ class NodeNode(Base):
 #
 #     We could manually write a class for every NodeType, or find a way to
 #     tell SQLAlchemy that it should stick to instantiate a Node when a
-#     class is not defined for the wanted typename.
+#     class is not defined for the wanted type.

 _ALREADY_IMPLEMENTED_NODE_TYPES = \
    set(cls.__mapper_args__.get('polymorphic_identity') for cls in Node.__subclasses__())