Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
e6a6f379
Commit
e6a6f379
authored
Mar 29, 2018
by
sim
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Model: rename typename, date, hyperdata of Node to type, created and data
parent
7110ae5b
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
47 additions
and
46 deletions
+47
-46
REST_API.md
doc/REST_API.md
+2
-2
constants.py
gargantext/constants.py
+1
-1
pipelines.py
gargantext/datasource/pipelines.py
+5
-4
nodes.py
gargantext/models/nodes.py
+39
-39
No files found.
doc/REST_API.md
View file @
e6a6f379
...
@@ -89,12 +89,12 @@ is represented as a JSON object with these fields:
...
@@ -89,12 +89,12 @@ is represented as a JSON object with these fields:
| parent_id | int | Yes | Node id of this node's parent |
| parent_id | int | Yes | Node id of this node's parent |
| name | string | Default = empty string | Label of this node, can be used for different purposes depending on its type |
| name | string | Default = empty string | Label of this node, can be used for different purposes depending on its type |
| created | datetime as a string | Default = now | Creation date of this node |
| created | datetime as a string | Default = now | Creation date of this node |
| data | object | Default = {} |
[
Hyperdata
](
#hyper
data
)
of this node |
| data | object | Default = {} |
[
Data
](
#
data
)
of this node |
| title_abstract | string | Automatic
*
| Only meaningful for DOCUMENT nodes: full-text index on title + abstract |
| title_abstract | string | Automatic
*
| Only meaningful for DOCUMENT nodes: full-text index on title + abstract |
\*
Please don't provide any value, database will generate one automatically.
\*
Please don't provide any value, database will generate one automatically.
####
Hyperd
ata
####
D
ata
Type specific data of each node is stored in the
`data`
field, as a JSON
Type specific data of each node is stored in the
`data`
field, as a JSON
object. For example documents abstract and title are stored here. There is no
object. For example documents abstract and title are stored here. There is no
...
...
gargantext/constants.py
View file @
e6a6f379
...
@@ -40,7 +40,7 @@ from gargantext.utils.lists import *
...
@@ -40,7 +40,7 @@ from gargantext.utils.lists import *
from
gargantext.utils.dates
import
datetime
,
to_datetime
from
gargantext.utils.dates
import
datetime
,
to_datetime
# types & models (nodes, lists,
hyper
data, resource) ---------------------------------------------
# types & models (nodes, lists, data, resource) ---------------------------------------------
LISTTYPES
=
{
LISTTYPES
=
{
'DOCUMENT'
:
WeightedList
,
'DOCUMENT'
:
WeightedList
,
'GROUPLIST'
:
Translations
,
# todo remove "LIST" from name
'GROUPLIST'
:
Translations
,
# todo remove "LIST" from name
...
...
gargantext/datasource/pipelines.py
View file @
e6a6f379
...
@@ -69,7 +69,7 @@ class DatabasePipeline(object):
...
@@ -69,7 +69,7 @@ class DatabasePipeline(object):
self
.
corpus
=
self
.
db
.
query
(
CorpusNode
)
.
filter_by
(
id
=
scraper
.
corpus
)
.
one_or_none
()
self
.
corpus
=
self
.
db
.
query
(
CorpusNode
)
.
filter_by
(
id
=
scraper
.
corpus
)
.
one_or_none
()
def
close_spider
(
self
,
scraper
):
def
close_spider
(
self
,
scraper
):
resources
=
self
.
corpus
.
hyper
data
.
get
(
'resources'
,
[])
resources
=
self
.
corpus
.
data
.
get
(
'resources'
,
[])
resources
.
append
({
resources
.
append
({
"date"
:
datetime
.
now
(),
"date"
:
datetime
.
now
(),
# TODO Raw files storage and listing in paths
# TODO Raw files storage and listing in paths
...
@@ -81,14 +81,15 @@ class DatabasePipeline(object):
...
@@ -81,14 +81,15 @@ class DatabasePipeline(object):
})
})
self
.
corpus
[
'resources'
]
=
resources
self
.
corpus
[
'resources'
]
=
resources
self
.
corpus
.
save_
hyper
data
()
self
.
corpus
.
save_data
()
self
.
db
.
commit
()
self
.
db
.
commit
()
def
process_item
(
self
,
item
,
scraper
):
def
process_item
(
self
,
item
,
scraper
):
doc
=
DocumentNode
(
name
=
item
.
get
(
'title'
)[:
DocumentNode
.
NAME_MAXLEN
],
title
=
item
.
get
(
'title'
,
''
)[:
DocumentNode
.
NAME_MAXLEN
]
doc
=
DocumentNode
(
name
=
title
,
parent_id
=
self
.
corpus
.
id
,
parent_id
=
self
.
corpus
.
id
,
hyper
data
=
dict
(
item
))
data
=
dict
(
item
))
self
.
corpus
.
related
.
append
(
doc
)
self
.
corpus
.
related
.
append
(
doc
)
...
...
gargantext/models/nodes.py
View file @
e6a6f379
...
@@ -19,8 +19,8 @@ class NodeType(TypeDecorator):
...
@@ -19,8 +19,8 @@ class NodeType(TypeDecorator):
Values are detailed in `gargantext.constants.NODETYPES`.
Values are detailed in `gargantext.constants.NODETYPES`.
"""
"""
impl
=
Integer
impl
=
Integer
def
process_bind_param
(
self
,
type
name
,
dialect
):
def
process_bind_param
(
self
,
type
,
dialect
):
return
NODETYPES
.
index
(
type
name
)
return
NODETYPES
.
index
(
type
)
def
process_result_value
(
self
,
typeindex
,
dialect
):
def
process_result_value
(
self
,
typeindex
,
dialect
):
return
NODETYPES
[
typeindex
]
return
NODETYPES
[
typeindex
]
...
@@ -35,36 +35,36 @@ class Node(ValidatorMixin, Base):
...
@@ -35,36 +35,36 @@ class Node(ValidatorMixin, Base):
The possible types are defined in `gargantext.constants.NODETYPES`.
The possible types are defined in `gargantext.constants.NODETYPES`.
Thanks to __new__ overriding and SQLAlchemy's polymorphism, every Node
Thanks to __new__ overriding and SQLAlchemy's polymorphism, every Node
instance is automagically casted to its sub-class, assuming a type
name
instance is automagically casted to its sub-class, assuming a type
is specified.
is specified.
>>> Node(name='without-type')
>>> Node(name='without-type')
<Node(id=None, type
name=None, user_id=None, parent_id=None, name='without-type', date=None
)>
<Node(id=None, type
=None, user_id=None, parent_id=None, name='without-type'
)>
>>> Node(type
name
='CORPUS')
>>> Node(type='CORPUS')
<CorpusNode(id=None, type
name='CORPUS', user_id=None, parent_id=None, name=None, dat
e=None)>
<CorpusNode(id=None, type
='CORPUS', user_id=None, parent_id=None, nam
e=None)>
>>> from gargantext.core.db import session
>>> from gargantext.core.db import session
>>> session.query(Node).filter_by(type
name
='USER').first() # doctest: +ELLIPSIS
>>> session.query(Node).filter_by(type='USER').first() # doctest: +ELLIPSIS
<UserNode(...)>
<UserNode(...)>
But beware, there are some pitfalls with bulk queries. In this case type
name
But beware, there are some pitfalls with bulk queries. In this case type
MUST be specified manually.
MUST be specified manually.
>>> session.query(UserNode).delete() # doctest: +SKIP
>>> session.query(UserNode).delete() # doctest: +SKIP
# Wrong: all nodes are deleted!
# Wrong: all nodes are deleted!
>>> session.query(UserNode).filter_by(type
name
='USER').delete() # doctest: +SKIP
>>> session.query(UserNode).filter_by(type='USER').delete() # doctest: +SKIP
# Right: only user nodes are deleted.
# Right: only user nodes are deleted.
"""
"""
NAME_MAXLEN
=
255
NAME_MAXLEN
=
255
__tablename__
=
'nodes'
__tablename__
=
'nodes'
__table_args__
=
(
__table_args__
=
(
Index
(
'nodes_user_id_type
name_parent_id_idx'
,
'user_id'
,
'typenam
e'
,
'parent_id'
),
Index
(
'nodes_user_id_type
_parent_id_idx'
,
'user_id'
,
'typ
e'
,
'parent_id'
),
Index
(
'nodes_
hyperdata_idx'
,
'hyper
data'
,
postgresql_using
=
'gin'
))
Index
(
'nodes_
data_idx'
,
'
data'
,
postgresql_using
=
'gin'
))
id
=
Column
(
Integer
,
primary_key
=
True
)
id
=
Column
(
Integer
,
primary_key
=
True
)
type
name
=
Column
(
NodeType
,
index
=
True
,
nullable
=
False
)
type
=
Column
(
NodeType
,
index
=
True
,
nullable
=
False
)
__mapper_args__
=
{
'polymorphic_on'
:
type
name
}
__mapper_args__
=
{
'polymorphic_on'
:
type
}
# foreign keys
# foreign keys
user_id
=
Column
(
Integer
,
ForeignKey
(
User
.
id
,
ondelete
=
'CASCADE'
),
user_id
=
Column
(
Integer
,
ForeignKey
(
User
.
id
,
ondelete
=
'CASCADE'
),
...
@@ -76,10 +76,10 @@ class Node(ValidatorMixin, Base):
...
@@ -76,10 +76,10 @@ class Node(ValidatorMixin, Base):
parent
=
relationship
(
'Node'
,
remote_side
=
[
id
])
parent
=
relationship
(
'Node'
,
remote_side
=
[
id
])
name
=
Column
(
String
(
NAME_MAXLEN
),
nullable
=
False
,
server_default
=
''
)
name
=
Column
(
String
(
NAME_MAXLEN
),
nullable
=
False
,
server_default
=
''
)
date
=
Column
(
DateTime
(
timezone
=
True
),
nullable
=
False
,
created
=
Column
(
DateTime
(
timezone
=
True
),
nullable
=
False
,
server_default
=
text
(
'CURRENT_TIMESTAMP'
))
server_default
=
text
(
'CURRENT_TIMESTAMP'
))
hyper
data
=
Column
(
JSONB
,
default
=
dict
,
nullable
=
False
,
data
=
Column
(
JSONB
,
default
=
dict
,
nullable
=
False
,
server_default
=
text
(
"'{}'::jsonb"
))
server_default
=
text
(
"'{}'::jsonb"
))
# Create a TSVECTOR column to use fulltext search feature of PostgreSQL.
# Create a TSVECTOR column to use fulltext search feature of PostgreSQL.
...
@@ -96,51 +96,51 @@ class Node(ValidatorMixin, Base):
...
@@ -96,51 +96,51 @@ class Node(ValidatorMixin, Base):
secondaryjoin
=
'NodeNode.node2_id==Node.id'
)
secondaryjoin
=
'NodeNode.node2_id==Node.id'
)
def
__new__
(
cls
,
*
args
,
**
kwargs
):
def
__new__
(
cls
,
*
args
,
**
kwargs
):
if
cls
is
Node
and
kwargs
.
get
(
'type
name
'
):
if
cls
is
Node
and
kwargs
.
get
(
'type'
):
type
name
=
kwargs
.
pop
(
'typenam
e'
)
type
=
kwargs
.
pop
(
'typ
e'
)
return
_NODE_MODELS
[
type
name
](
*
args
,
**
kwargs
)
return
_NODE_MODELS
[
type
](
*
args
,
**
kwargs
)
return
super
(
Node
,
cls
)
.
__new__
(
cls
)
return
super
(
Node
,
cls
)
.
__new__
(
cls
)
def
__init__
(
self
,
**
kwargs
):
def
__init__
(
self
,
**
kwargs
):
"""Node's constructor.
"""Node's constructor.
Initialize the `
hyper
data` as a dictionary if no value was given.
Initialize the `data` as a dictionary if no value was given.
"""
"""
if
'
hyper
data'
not
in
kwargs
:
if
'data'
not
in
kwargs
:
kwargs
[
'
hyperdata'
]
=
kwargs
.
get
(
'hyper
data'
,
MutableDict
())
kwargs
[
'
data'
]
=
kwargs
.
get
(
'
data'
,
MutableDict
())
Base
.
__init__
(
self
,
**
kwargs
)
Base
.
__init__
(
self
,
**
kwargs
)
def
__getitem__
(
self
,
key
):
def
__getitem__
(
self
,
key
):
"""Allow direct access to
hyper
data via the bracket operator."""
"""Allow direct access to data via the bracket operator."""
return
self
.
hyper
data
[
key
]
return
self
.
data
[
key
]
def
__setitem__
(
self
,
key
,
value
):
def
__setitem__
(
self
,
key
,
value
):
"""Allow direct access to
hyper
data via the bracket operator."""
"""Allow direct access to data via the bracket operator."""
old_value
=
self
.
hyper
data
.
get
(
key
,
_UNDEFINED
)
old_value
=
self
.
data
.
get
(
key
,
_UNDEFINED
)
if
old_value
!=
value
:
if
old_value
!=
value
:
self
.
hyper
data
[
key
]
=
value
self
.
data
[
key
]
=
value
flag_modified
(
self
,
'
hyper
data'
)
flag_modified
(
self
,
'data'
)
def
__delitem
(
self
,
key
):
def
__delitem
(
self
,
key
):
"""Allow direct access to
hyper
data via the bracket operator."""
"""Allow direct access to data via the bracket operator."""
if
key
in
self
.
hyper
data
:
if
key
in
self
.
data
:
del
self
.
hyper
data
[
key
]
del
self
.
data
[
key
]
flag_modified
(
self
,
'
hyper
data'
)
flag_modified
(
self
,
'data'
)
def
__repr__
(
self
):
def
__repr__
(
self
):
return
'<{0.__class__.__name__}(id={0.id}, type
name={0.typenam
e!r}, '
\
return
'<{0.__class__.__name__}(id={0.id}, type
={0.typ
e!r}, '
\
'user_id={0.user_id}, parent_id={0.parent_id}, '
\
'user_id={0.user_id}, parent_id={0.parent_id}, '
\
'name={0.name!r},
date={0.date
})>'
.
format
(
self
)
'name={0.name!r},
created={0.created
})>'
.
format
(
self
)
@
validates
(
'name'
)
@
validates
(
'name'
)
def
validate_name
(
self
,
key
,
value
):
def
validate_name
(
self
,
key
,
value
):
return
self
.
enforce_length
(
key
,
value
)
return
self
.
enforce_length
(
key
,
value
)
def
save_
hyper
data
(
self
):
def
save_data
(
self
):
"""This is a necessary, yet ugly trick.
"""This is a necessary, yet ugly trick.
Indeed, PostgreSQL does not yet manage incremental updates (see
Indeed, PostgreSQL does not yet manage incremental updates (see
https://bashelton.com/2014/03/updating-postgresql-json-fields-via-sqlalchemy/)
https://bashelton.com/2014/03/updating-postgresql-json-fields-via-sqlalchemy/)
"""
"""
flag_modified
(
self
,
'
hyper
data'
)
flag_modified
(
self
,
'data'
)
class
DocumentNode
(
Node
):
class
DocumentNode
(
Node
):
...
@@ -163,7 +163,7 @@ class CorpusNode(Node):
...
@@ -163,7 +163,7 @@ class CorpusNode(Node):
'type': 1,
'type': 1,
'url': None}]
'url': None}]
"""
"""
if
'resources'
not
in
self
.
hyper
data
:
if
'resources'
not
in
self
.
data
:
self
[
'resources'
]
=
MutableList
()
self
[
'resources'
]
=
MutableList
()
return
self
[
'resources'
]
return
self
[
'resources'
]
...
@@ -206,7 +206,7 @@ class NodeNode(Base):
...
@@ -206,7 +206,7 @@ class NodeNode(Base):
#
#
# We could manually write a class for every NodeType, or find a way to
# We could manually write a class for every NodeType, or find a way to
# tell SQLAlchemy that it should stick to instantiate a Node when a
# tell SQLAlchemy that it should stick to instantiate a Node when a
# class is not defined for the wanted type
name
.
# class is not defined for the wanted type.
_ALREADY_IMPLEMENTED_NODE_TYPES
=
\
_ALREADY_IMPLEMENTED_NODE_TYPES
=
\
set
(
cls
.
__mapper_args__
.
get
(
'polymorphic_identity'
)
for
cls
in
Node
.
__subclasses__
())
set
(
cls
.
__mapper_args__
.
get
(
'polymorphic_identity'
)
for
cls
in
Node
.
__subclasses__
())
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment