Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
e6a6f379
Commit
e6a6f379
authored
Mar 29, 2018
by
sim
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Model: rename typename, date, hyperdata of Node to type, created and data
parent
7110ae5b
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
47 additions
and
46 deletions
+47
-46
REST_API.md
doc/REST_API.md
+2
-2
constants.py
gargantext/constants.py
+1
-1
pipelines.py
gargantext/datasource/pipelines.py
+5
-4
nodes.py
gargantext/models/nodes.py
+39
-39
No files found.
doc/REST_API.md
View file @
e6a6f379
...
...
@@ -89,12 +89,12 @@ is represented as a JSON object with these fields:
| parent_id | int | Yes | Node id of this node's parent |
| name | string | Default = empty string | Label of this node, can be used for different purposes depending on its type |
| created | datetime as a string | Default = now | Creation date of this node |
| data | object | Default = {} |
[
Hyperdata
](
#hyper
data
)
of this node |
| data | object | Default = {} |
[
Data
](
#
data
)
of this node |
| title_abstract | string | Automatic
*
| Only meaningful for DOCUMENT nodes: full-text index on title + abstract |
\*
Please don't provide any value, database will generate one automatically.
####
Hyperd
ata
####
D
ata
Type specific data of each node is stored in the
`data`
field, as a JSON
object. For example documents abstract and title are stored here. There is no
...
...
gargantext/constants.py
View file @
e6a6f379
...
...
@@ -40,7 +40,7 @@ from gargantext.utils.lists import *
from
gargantext.utils.dates
import
datetime
,
to_datetime
# types & models (nodes, lists,
hyper
data, resource) ---------------------------------------------
# types & models (nodes, lists, data, resource) ---------------------------------------------
LISTTYPES
=
{
'DOCUMENT'
:
WeightedList
,
'GROUPLIST'
:
Translations
,
# todo remove "LIST" from name
...
...
gargantext/datasource/pipelines.py
View file @
e6a6f379
...
...
@@ -69,7 +69,7 @@ class DatabasePipeline(object):
self
.
corpus
=
self
.
db
.
query
(
CorpusNode
)
.
filter_by
(
id
=
scraper
.
corpus
)
.
one_or_none
()
def
close_spider
(
self
,
scraper
):
resources
=
self
.
corpus
.
hyper
data
.
get
(
'resources'
,
[])
resources
=
self
.
corpus
.
data
.
get
(
'resources'
,
[])
resources
.
append
({
"date"
:
datetime
.
now
(),
# TODO Raw files storage and listing in paths
...
...
@@ -81,14 +81,15 @@ class DatabasePipeline(object):
})
self
.
corpus
[
'resources'
]
=
resources
self
.
corpus
.
save_
hyper
data
()
self
.
corpus
.
save_data
()
self
.
db
.
commit
()
def
process_item
(
self
,
item
,
scraper
):
doc
=
DocumentNode
(
name
=
item
.
get
(
'title'
)[:
DocumentNode
.
NAME_MAXLEN
],
title
=
item
.
get
(
'title'
,
''
)[:
DocumentNode
.
NAME_MAXLEN
]
doc
=
DocumentNode
(
name
=
title
,
parent_id
=
self
.
corpus
.
id
,
hyper
data
=
dict
(
item
))
data
=
dict
(
item
))
self
.
corpus
.
related
.
append
(
doc
)
...
...
gargantext/models/nodes.py
View file @
e6a6f379
...
...
@@ -19,8 +19,8 @@ class NodeType(TypeDecorator):
Values are detailed in `gargantext.constants.NODETYPES`.
"""
impl
=
Integer
def
process_bind_param
(
self
,
type
name
,
dialect
):
return
NODETYPES
.
index
(
type
name
)
def
process_bind_param
(
self
,
type
,
dialect
):
return
NODETYPES
.
index
(
type
)
def
process_result_value
(
self
,
typeindex
,
dialect
):
return
NODETYPES
[
typeindex
]
...
...
@@ -35,36 +35,36 @@ class Node(ValidatorMixin, Base):
The possible types are defined in `gargantext.constants.NODETYPES`.
Thanks to __new__ overriding and SQLAlchemy's polymorphism, every Node
instance is automagically casted to its sub-class, assuming a type
name
instance is automagically casted to its sub-class, assuming a type
is specified.
>>> Node(name='without-type')
<Node(id=None, type
name=None, user_id=None, parent_id=None, name='without-type', date=None
)>
>>> Node(type
name
='CORPUS')
<CorpusNode(id=None, type
name='CORPUS', user_id=None, parent_id=None, name=None, dat
e=None)>
<Node(id=None, type
=None, user_id=None, parent_id=None, name='without-type'
)>
>>> Node(type='CORPUS')
<CorpusNode(id=None, type
='CORPUS', user_id=None, parent_id=None, nam
e=None)>
>>> from gargantext.core.db import session
>>> session.query(Node).filter_by(type
name
='USER').first() # doctest: +ELLIPSIS
>>> session.query(Node).filter_by(type='USER').first() # doctest: +ELLIPSIS
<UserNode(...)>
But beware, there are some pitfalls with bulk queries. In this case type
name
But beware, there are some pitfalls with bulk queries. In this case type
MUST be specified manually.
>>> session.query(UserNode).delete() # doctest: +SKIP
# Wrong: all nodes are deleted!
>>> session.query(UserNode).filter_by(type
name
='USER').delete() # doctest: +SKIP
>>> session.query(UserNode).filter_by(type='USER').delete() # doctest: +SKIP
# Right: only user nodes are deleted.
"""
NAME_MAXLEN
=
255
__tablename__
=
'nodes'
__table_args__
=
(
Index
(
'nodes_user_id_type
name_parent_id_idx'
,
'user_id'
,
'typenam
e'
,
'parent_id'
),
Index
(
'nodes_
hyperdata_idx'
,
'hyper
data'
,
postgresql_using
=
'gin'
))
Index
(
'nodes_user_id_type
_parent_id_idx'
,
'user_id'
,
'typ
e'
,
'parent_id'
),
Index
(
'nodes_
data_idx'
,
'
data'
,
postgresql_using
=
'gin'
))
id
=
Column
(
Integer
,
primary_key
=
True
)
type
name
=
Column
(
NodeType
,
index
=
True
,
nullable
=
False
)
__mapper_args__
=
{
'polymorphic_on'
:
type
name
}
type
=
Column
(
NodeType
,
index
=
True
,
nullable
=
False
)
__mapper_args__
=
{
'polymorphic_on'
:
type
}
# foreign keys
user_id
=
Column
(
Integer
,
ForeignKey
(
User
.
id
,
ondelete
=
'CASCADE'
),
...
...
@@ -76,11 +76,11 @@ class Node(ValidatorMixin, Base):
parent
=
relationship
(
'Node'
,
remote_side
=
[
id
])
name
=
Column
(
String
(
NAME_MAXLEN
),
nullable
=
False
,
server_default
=
''
)
date
=
Column
(
DateTime
(
timezone
=
True
),
nullable
=
False
,
server_default
=
text
(
'CURRENT_TIMESTAMP'
))
created
=
Column
(
DateTime
(
timezone
=
True
),
nullable
=
False
,
server_default
=
text
(
'CURRENT_TIMESTAMP'
))
hyper
data
=
Column
(
JSONB
,
default
=
dict
,
nullable
=
False
,
server_default
=
text
(
"'{}'::jsonb"
))
data
=
Column
(
JSONB
,
default
=
dict
,
nullable
=
False
,
server_default
=
text
(
"'{}'::jsonb"
))
# Create a TSVECTOR column to use fulltext search feature of PostgreSQL.
# We need to create a trigger to update this column on update and insert,
...
...
@@ -96,51 +96,51 @@ class Node(ValidatorMixin, Base):
secondaryjoin
=
'NodeNode.node2_id==Node.id'
)
def
__new__
(
cls
,
*
args
,
**
kwargs
):
if
cls
is
Node
and
kwargs
.
get
(
'type
name
'
):
type
name
=
kwargs
.
pop
(
'typenam
e'
)
return
_NODE_MODELS
[
type
name
](
*
args
,
**
kwargs
)
if
cls
is
Node
and
kwargs
.
get
(
'type'
):
type
=
kwargs
.
pop
(
'typ
e'
)
return
_NODE_MODELS
[
type
](
*
args
,
**
kwargs
)
return
super
(
Node
,
cls
)
.
__new__
(
cls
)
def
__init__
(
self
,
**
kwargs
):
"""Node's constructor.
Initialize the `
hyper
data` as a dictionary if no value was given.
Initialize the `data` as a dictionary if no value was given.
"""
if
'
hyper
data'
not
in
kwargs
:
kwargs
[
'
hyperdata'
]
=
kwargs
.
get
(
'hyper
data'
,
MutableDict
())
if
'data'
not
in
kwargs
:
kwargs
[
'
data'
]
=
kwargs
.
get
(
'
data'
,
MutableDict
())
Base
.
__init__
(
self
,
**
kwargs
)
def
__getitem__
(
self
,
key
):
"""Allow direct access to
hyper
data via the bracket operator."""
return
self
.
hyper
data
[
key
]
"""Allow direct access to data via the bracket operator."""
return
self
.
data
[
key
]
def
__setitem__
(
self
,
key
,
value
):
"""Allow direct access to
hyper
data via the bracket operator."""
old_value
=
self
.
hyper
data
.
get
(
key
,
_UNDEFINED
)
"""Allow direct access to data via the bracket operator."""
old_value
=
self
.
data
.
get
(
key
,
_UNDEFINED
)
if
old_value
!=
value
:
self
.
hyper
data
[
key
]
=
value
flag_modified
(
self
,
'
hyper
data'
)
self
.
data
[
key
]
=
value
flag_modified
(
self
,
'data'
)
def
__delitem
(
self
,
key
):
"""Allow direct access to
hyper
data via the bracket operator."""
if
key
in
self
.
hyper
data
:
del
self
.
hyper
data
[
key
]
flag_modified
(
self
,
'
hyper
data'
)
"""Allow direct access to data via the bracket operator."""
if
key
in
self
.
data
:
del
self
.
data
[
key
]
flag_modified
(
self
,
'data'
)
def
__repr__
(
self
):
return
'<{0.__class__.__name__}(id={0.id}, type
name={0.typenam
e!r}, '
\
return
'<{0.__class__.__name__}(id={0.id}, type
={0.typ
e!r}, '
\
'user_id={0.user_id}, parent_id={0.parent_id}, '
\
'name={0.name!r},
date={0.date
})>'
.
format
(
self
)
'name={0.name!r},
created={0.created
})>'
.
format
(
self
)
@
validates
(
'name'
)
def
validate_name
(
self
,
key
,
value
):
return
self
.
enforce_length
(
key
,
value
)
def
save_
hyper
data
(
self
):
def
save_data
(
self
):
"""This is a necessary, yet ugly trick.
Indeed, PostgreSQL does not yet manage incremental updates (see
https://bashelton.com/2014/03/updating-postgresql-json-fields-via-sqlalchemy/)
"""
flag_modified
(
self
,
'
hyper
data'
)
flag_modified
(
self
,
'data'
)
class
DocumentNode
(
Node
):
...
...
@@ -163,7 +163,7 @@ class CorpusNode(Node):
'type': 1,
'url': None}]
"""
if
'resources'
not
in
self
.
hyper
data
:
if
'resources'
not
in
self
.
data
:
self
[
'resources'
]
=
MutableList
()
return
self
[
'resources'
]
...
...
@@ -206,7 +206,7 @@ class NodeNode(Base):
#
# We could manually write a class for every NodeType, or find a way to
# tell SQLAlchemy that it should stick to instantiate a Node when a
# class is not defined for the wanted type
name
.
# class is not defined for the wanted type.
_ALREADY_IMPLEMENTED_NODE_TYPES
=
\
set
(
cls
.
__mapper_args__
.
get
(
'polymorphic_identity'
)
for
cls
in
Node
.
__subclasses__
())
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment