Commit 69ade62b authored by delanoe's avatar delanoe

Merge branch 'refactoring' into refactoring-alex

parents 4d4384a4 6182a4fa
# WARNING: to ensure consistency and retrocompatibility, lists should keep the # WARNING: to ensure consistency and retrocompatibility, lists should keep the
# initial order (ie., new elements should be appended at the end of the lists) # initial order (ie., new elements should be appended at the end of the lists)
from gargantext.util.lists import *
LISTTYPES = {
'DOCUMENT': WeightedList,
'SYNONYMS': Translations,
'MIAMLIST': UnweightedList,
'STOPLIST': UnweightedList,
'COOCCURRENCES': WeightedMatrix,
}
NODETYPES = [ NODETYPES = [
None, None,
# documents hierachy
'USER', 'USER',
'PROJECT', 'PROJECT',
'CORPUS', 'CORPUS',
'DOCUMENT', 'DOCUMENT',
# lists
'SYNONYMS',
'MIAMLIST',
'STOPLIST',
'COOCCURRENCES',
] ]
......
from gargantext.util.db import * from gargantext.util.db import *
from gargantext.util.files import upload
from gargantext.constants import *
from .nodes import Node from .nodes import Node
__all__ = ['Ngram', 'NodeNgram'] __all__ = ['Ngram', 'NodeNgram', 'NodeNgramNgram']
class Ngram(Base): class Ngram(Base):
...@@ -19,3 +17,11 @@ class NodeNgram(Base): ...@@ -19,3 +17,11 @@ class NodeNgram(Base):
node_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE'), primary_key=True) node_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE'), primary_key=True)
ngram_id = Column(Integer, ForeignKey(Ngram.id, ondelete='CASCADE'), primary_key=True) ngram_id = Column(Integer, ForeignKey(Ngram.id, ondelete='CASCADE'), primary_key=True)
weight = Column(Float) weight = Column(Float)
class NodeNgramNgram(Base):
__tablename__ = 'nodes_ngrams_ngrams'
node_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE'), primary_key=True)
ngram1_id = Column(Integer, ForeignKey(Ngram.id, ondelete='CASCADE'), primary_key=True)
ngram2_id = Column(Integer, ForeignKey(Ngram.id, ondelete='CASCADE'), primary_key=True)
weight = Column(Float)
...@@ -12,7 +12,7 @@ __all__ = ['Node'] ...@@ -12,7 +12,7 @@ __all__ = ['Node']
class NodeType(TypeDecorator): class NodeType(TypeDecorator):
"""Define a new type of column to describe a Node's type. """Define a new type of column to describe a Node's type.
This column type is implemented as an SQL integer. Internally, this column type is implemented as an SQL integer.
Values are detailed in `gargantext.constants.NODETYPES`. Values are detailed in `gargantext.constants.NODETYPES`.
""" """
impl = Integer impl = Integer
...@@ -22,6 +22,11 @@ class NodeType(TypeDecorator): ...@@ -22,6 +22,11 @@ class NodeType(TypeDecorator):
return NODETYPES[typeindex] return NODETYPES[typeindex]
class Node(Base): class Node(Base):
"""This model can fit many purposes.
It intends to provide a generic model, allowing hierarchical structure
and NoSQL-like data structuring.
The possible types are defined in `gargantext.constants.NODETYPES`.
"""
__tablename__ = 'nodes' __tablename__ = 'nodes'
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
typename = Column(NodeType, index=True) typename = Column(NodeType, index=True)
...@@ -35,16 +40,51 @@ class Node(Base): ...@@ -35,16 +40,51 @@ class Node(Base):
hyperdata = Column(JSONB, default=dict) hyperdata = Column(JSONB, default=dict)
def __init__(self, **kwargs): def __init__(self, **kwargs):
"""Node's constructor.
Initialize the `hyperdata` as a dictionary if no value was given.
"""
if 'hyperdata' not in kwargs: if 'hyperdata' not in kwargs:
kwargs['hyperdata'] = kwargs.get('hyperdata', {}) kwargs['hyperdata'] = kwargs.get('hyperdata', MutableDict())
Base.__init__(self, **kwargs) Base.__init__(self, **kwargs)
def __getitem__(self, key): def __getitem__(self, key):
"""Allow direct access to hyperdata via the bracket operator.
"""
return self.hyperdata[key] return self.hyperdata[key]
def __setitem__(self, key, value): def __setitem__(self, key, value):
"""Allow direct access to hyperdata via the bracket operator.
"""
self.hyperdata[key] = value self.hyperdata[key] = value
@property
def ngrams(self):
"""Pseudo-attribute allowing to retrieve a node's ngrams.
Returns a query (which can be further filtered), of which returned rows
are the ngram's weight for this node and the ngram.
"""
from . import NodeNgram, Ngram
query = (session
.query(NodeNgram.weight, Ngram)
.select_from(NodeNgram)
.join(Ngram)
.filter(NodeNgram.node_id == self.id)
)
return query
def as_list(self):
"""Retrieve the current node as a list/matrix of ngrams identifiers.
See `gargantext.util.lists` and `gargantext.constants.LISTTYPES`
for more info.
"""
try:
return LISTTYPES[self.typename](self.id)
except KeyError:
raise ValueError('This node\'s typename is not convertible to a list: %s (accepted values: %s)' % (
self.typename,
', '.join(LISTTYPES.keys())
))
def save_hyperdata(self): def save_hyperdata(self):
"""This is a necessary, yet ugly trick. """This is a necessary, yet ugly trick.
Indeed, PostgreSQL does not yet manage incremental updates (see Indeed, PostgreSQL does not yet manage incremental updates (see
...@@ -80,17 +120,27 @@ class Node(Base): ...@@ -80,17 +120,27 @@ class Node(Base):
) )
def resources(self): def resources(self):
"""Return all the resources attached to a given node.
Mainly used for corpora.
"""
if 'resources' not in self.hyperdata: if 'resources' not in self.hyperdata:
self['resources'] = MutableList() self['resources'] = MutableList()
return self['resources'] return self['resources']
def add_resource(self, type, path=None, url=None): def add_resource(self, type, path=None, url=None):
"""Attach a resource to a given node.
Mainly used for corpora.
"""
self.resources().append(MutableDict( self.resources().append(MutableDict(
{'type': type, 'path':path, 'url':url, 'extracted': False} {'type': type, 'path':path, 'url':url, 'extracted': False}
)) ))
def status(self, action=None, progress=0, complete=False, error=None): def status(self, action=None, progress=0, complete=False, error=None):
"""Get the status of the given action """Get or update the status of the given action.
If no action is given, the status of the first uncomplete or last item
is returned.
The `complete` parameter should be a boolean.
The `error` parameter should be an exception.
""" """
date = datetime.now() date = datetime.now()
# if the hyperdata do not have data about status # if the hyperdata do not have data about status
......
...@@ -9,7 +9,7 @@ class User(Base): ...@@ -9,7 +9,7 @@ class User(Base):
# Do not change! # Do not change!
# The properties below are a reflection of Django's auth module's models. # The properties below are a reflection of Django's auth module's models.
__tablename__ = 'auth_user' __tablename__ = models.User._meta.db_table
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
password = Column(String(128)) password = Column(String(128))
last_login = DateTime(timezone=False) last_login = DateTime(timezone=False)
...@@ -34,16 +34,14 @@ class User(Base): ...@@ -34,16 +34,14 @@ class User(Base):
def nodes(self, typename=None): def nodes(self, typename=None):
"""get all nodes belonging to the user""" """get all nodes belonging to the user"""
# ↓ this below is a workaround because of Python's lame import system
from .nodes import Node from .nodes import Node
query = (session query = (session
.query(Node) .query(Node)
.filter(Node.user_id == self.id) .filter(Node.user_id == self.id)
.order_by(Node.date)
) )
if typename is not None: if typename is not None:
query = query.filter(Node.typename == typename) query = query.filter(Node.typename == typename)
return query.all() return query
def contacts_nodes(self, typename=None): def contacts_nodes(self, typename=None):
for contact in self.contacts(): for contact in self.contacts():
......
import json import json
import types
import datetime import datetime
import traceback import traceback
import inspect
__all__ = ['json_encoder', 'json_dumps']
class JSONEncoder(json.JSONEncoder): class JSONEncoder(json.JSONEncoder):
def default(self, obj): def default(self, obj):
if isinstance(obj, datetime.datetime): from gargantext.util.db import Base
if isinstance(obj, Base):
return {
key: value
for key, value in obj.__dict__.items()
if not key.startswith('_')
}
elif isinstance(obj, datetime.datetime):
return obj.isoformat()[:19] + 'Z' return obj.isoformat()[:19] + 'Z'
elif isinstance(obj, (set, tuple)):
return list(obj)
elif isinstance(obj, Exception): elif isinstance(obj, Exception):
tbe = traceback.TracebackException.from_exception(obj) tbe = traceback.TracebackException.from_exception(obj)
return list(line.strip() for line in tbe.format()) return list(line.strip() for line in tbe.format())
elif hasattr(obj, '__iter__') and not isinstance(obj, dict):
return list(obj)
else: else:
return super(self.__class__, self).default(obj) return super(self.__class__, self).default(obj)
......
This diff is collapsed.
...@@ -7,71 +7,80 @@ from gargantext.constants import * ...@@ -7,71 +7,80 @@ from gargantext.constants import *
from gargantext.util.validation import validate from gargantext.util.validation import validate
class NodeListResource(APIView): _node_available_fields = ['id', 'parent_id', 'name', 'typename', 'hyperdata', 'ngrams']
_node_default_fields = ['id', 'parent_id', 'name', 'typename']
_node_available_types = NODETYPES
_fields = ['id', 'parent_id', 'name', 'typename', 'hyperdata']
_types = NODETYPES
def _query(self, request): def _query_nodes(request, node_id=None):
# parameters validation user = cache.User[request.user.username]
parameters = get_parameters(request) # parameters validation
parameters = validate(parameters, {'type': dict, 'items': { parameters = get_parameters(request)
'pagination_limit': {'type': int, 'default': 10}, parameters = validate(parameters, {'type': dict, 'items': {
'pagination_offset': {'type': int, 'default': 0}, 'pagination_limit': {'type': int, 'default': 10},
'fields': {'type': list, 'default': self._fields, 'items': { 'pagination_offset': {'type': int, 'default': 0},
'type': str, 'range': self._fields, 'fields': {'type': list, 'default': _node_default_fields, 'items': {
}}, 'type': str, 'range': _node_available_fields,
# optional filtering parameters }},
'type': {'type': list, 'default': self._types, 'required': False, 'items': { # optional filtering parameters
'type': str, 'range': self._types, 'types': {'type': list, 'required': False, 'items': {
}}, 'type': str, 'range': _node_available_types,
'parent_id': {'type': int, 'required': False}, }},
}}) 'parent_id': {'type': int, 'required': False},
# start the query }})
query = session.query(*tuple( # start the query
getattr(Node, field) for field in parameters['fields'] query = user.nodes()
)) # filter by id
# filter by type if node_id is not None:
if 'type' in parameters: query = query.filter(Node.id == node_id)
query = query.filter(Node.typename.in_(parameters['type'])) # filter by type
# filter by parent if 'types' in parameters:
if 'parent_id' in parameters: query = query.filter(Node.typename.in_(parameters['types']))
query = query.filter(Node.parent_id == parameters['parent_id']) # filter by parent
# count if 'parent_id' in parameters:
count = query.count() query = query.filter(Node.parent_id == parameters['parent_id'])
# order # count
query = query.order_by(Node.hyperdata['publication_date'], Node.id) count = query.count()
# paginate the query # order
if parameters['pagination_limit'] == -1: query = query.order_by(Node.hyperdata['publication_date'], Node.id)
query = query[parameters['pagination_offset']:] # paginate the query
else: if parameters['pagination_limit'] == -1:
query = query[ query = query[parameters['pagination_offset']:]
parameters['pagination_offset'] : else:
parameters['pagination_limit'] query = query[
] parameters['pagination_offset'] :
# return the result! parameters['pagination_limit']
return parameters, query, count ]
# return the result!
return parameters, query, count
class NodeListResource(APIView):
def get(self, request): def get(self, request):
"""Displays the list of nodes corresponding to the query. """Displays the list of nodes corresponding to the query.
""" """
parameters, query, count = self._query(request) parameters, query, count = _query_nodes(request)
return JsonHttpResponse({ return JsonHttpResponse({
'parameters': parameters, 'parameters': parameters,
'count': count, 'count': count,
'records': [dict(zip(parameters['fields'], node)) for node in query] 'records': [
{field: getattr(node, field) for field in parameters['fields']}
for node in query
]
}) })
def post(self, request): def post(self, request):
pass """Create a new node.
NOT IMPLEMENTED
"""
def delete(self, request): def delete(self, request):
"""Removes the list of nodes corresponding to the query. """Removes the list of nodes corresponding to the query.
WARNING! THIS IS TOTALLY UNTESTED!!!!! WARNING! THIS IS TOTALLY UNTESTED!!!!!
""" """
parameters, query, count = self._query(request) parameters, query, count = _query_nodes(request)
for node in query: query.delete()
node.delete()
session.commit() session.commit()
return JsonHttpResponse({ return JsonHttpResponse({
'parameters': parameters, 'parameters': parameters,
...@@ -81,27 +90,20 @@ class NodeListResource(APIView): ...@@ -81,27 +90,20 @@ class NodeListResource(APIView):
class NodeResource(APIView): class NodeResource(APIView):
def _query(self, request, node_id):
user = cache.User[request.user.username]
node = session.query(Node).filter(Node.id == node_id).first()
if node is None:
raise Http404()
if not user.owns(node):
raise HttpResponseForbidden()
return user, node
def get(self, request, node_id): def get(self, request, node_id):
user, node = self._query(request, node_id) parameters, query, count = _query_nodes(request, node_id)
if not len(query):
raise Http404()
node = query[0]
return JsonHttpResponse({ return JsonHttpResponse({
'id': node.id, field: getattr(node, field) for field in parameters['fields']
'parent_id': node.parent_id,
'name': node.name,
'hyperdata': node.hyperdata,
}) })
def delete(self, request, node_id): def delete(self, request, node_id):
parameters, query, count = _query_nodes(request, node_id)
if not len(query):
raise Http404()
from sqlalchemy import delete from sqlalchemy import delete
user, node = self._query(request, node_id)
result = session.execute( result = session.execute(
delete(Node).where(Node.id == node_id) delete(Node).where(Node.id == node_id)
) )
......
...@@ -12,19 +12,22 @@ def _get_user_project_corpus(request, project_id, corpus_id): ...@@ -12,19 +12,22 @@ def _get_user_project_corpus(request, project_id, corpus_id):
"""Helper method to get a corpus, knowing the project's and corpus' ID. """Helper method to get a corpus, knowing the project's and corpus' ID.
Raises HTTP errors when parameters (user, IDs...) are invalid. Raises HTTP errors when parameters (user, IDs...) are invalid.
""" """
user = cache.User[request.user.username] user = cache.User[request.user.id]
project = session.query(Node).filter(Node.id == project_id).first() project = session.query(Node).filter(Node.id == project_id).first()
corpus = session.query(Node).filter(Node.id == corpus_id).filter(Node.parent_id == project_id).first() corpus = session.query(Node).filter(Node.id == corpus_id).filter(Node.parent_id == project_id).first()
if corpus is None: if corpus is None:
raise Http404() raise Http404()
if not user.owns(corpus): if not user.owns(corpus):
raise HttpResponseForbidden() print("CORPORA: invalid user %i (User doesn't own this corpus)" % user.id)
return user, project, corpus return (False, user, project, corpus)
return (True, user, project, corpus)
@requires_auth @requires_auth
def corpus(request, project_id, corpus_id): def corpus(request, project_id, corpus_id):
user, project, corpus = _get_user_project_corpus(request, project_id, corpus_id) authorized, user, project, corpus = _get_user_project_corpus(request, project_id, corpus_id)
if not authorized:
return HttpResponseForbidden()
# response! # response!
return render( return render(
template_name = 'pages/corpora/corpus.html', template_name = 'pages/corpora/corpus.html',
......
...@@ -48,7 +48,7 @@ def overview(request): ...@@ -48,7 +48,7 @@ def overview(request):
'debug': settings.DEBUG, 'debug': settings.DEBUG,
'date': datetime.now(), 'date': datetime.now(),
# projects owned by the user # projects owned by the user
'number': len(user_projects), 'number': user_projects.count(),
'projects': user_projects, 'projects': user_projects,
# projects owned by the user's contacts # projects owned by the user's contacts
'common_users': (contact for contact, projects in contacts_projects), 'common_users': (contact for contact, projects in contacts_projects),
......
...@@ -20,7 +20,7 @@ urlpatterns = [ ...@@ -20,7 +20,7 @@ urlpatterns = [
url(r'^projects/(\d+)/?$', projects.project), url(r'^projects/(\d+)/?$', projects.project),
# corpora # corpora
url(r'^projects/(\d+)/corpora/(\d+)?$', corpora.corpus), url(r'^projects/(\d+)/corpora/(\d+)/?$', corpora.corpus),
url(r'^projects/(\d+)/corpora/(\d+)/chart?$', corpora.chart), url(r'^projects/(\d+)/corpora/(\d+)/chart/?$', corpora.chart),
] ]
amqp==1.4.9 amqp==1.4.9
anyjson==0.3.3 anyjson==0.3.3
billiard==3.3.0.22 billiard==3.3.0.22 # multiprocessing fork
celery==3.1.20 celery==3.1.20
chardet==2.3.0
dateparser==0.3.2 dateparser==0.3.2
Django==1.9.2 Django==1.9.2
django-celery==3.1.17 django-celery==3.1.17
...@@ -10,18 +11,18 @@ django-pgjsonb==0.0.16 ...@@ -10,18 +11,18 @@ django-pgjsonb==0.0.16
djangorestframework==3.3.2 djangorestframework==3.3.2
html5lib==0.9999999 html5lib==0.9999999
jdatetime==1.7.2 jdatetime==1.7.2
kombu==3.0.33 kombu==3.0.33 # messaging
lxml==3.5.0 lxml==3.5.0
nltk==3.1 nltk==3.1
numpy==1.10.4 numpy==1.10.4
psycopg2==2.6.1 psycopg2==2.6.1
pycountry==1.20 pycountry==1.20
python-dateutil==2.4.2 python-dateutil==2.4.2
pytz==2015.7 pytz==2015.7 # timezones
PyYAML==3.11 PyYAML==3.11
RandomWords==0.1.12 RandomWords==0.1.12
six==1.10.0 six==1.10.0
SQLAlchemy==1.1.0b1.dev0 SQLAlchemy==1.1.0b1.dev0
ujson==1.35 ujson==1.35
umalqurra==0.2 umalqurra==0.2 # arabic calendars (?? why use ??)
wheel==0.29.0 wheel==0.29.0
...@@ -532,7 +532,9 @@ $("#corpusdisplayer").hide() ...@@ -532,7 +532,9 @@ $("#corpusdisplayer").hide()
// FIRST portion of code to be EXECUTED: // FIRST portion of code to be EXECUTED:
// (3) Get records and hyperdata for paginator // (3) Get records and hyperdata for paginator
$.ajax({ $.ajax({
url: '/api/nodes?type[]=DOCUMENT&pagination_limit=-1&parent_id=' + corpus_id, url: '/api/nodes?types[]=DOCUMENT&pagination_limit=-1&parent_id='
+ corpus_id
+'&fields[]=parent_id&fields[]=id&fields[]=name&fields[]=typename&fields[]=hyperdata',
success: function(data){ success: function(data){
$("#content_loader").remove(); $("#content_loader").remove();
$.each(data.records, function(i, record){ $.each(data.records, function(i, record){
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment