Commit 69ade62b authored by delanoe's avatar delanoe

Merge branch 'refactoring' into refactoring-alex

parents 4d4384a4 6182a4fa
# WARNING: to ensure consistency and retrocompatibility, lists should keep the
# initial order (ie., new elements should be appended at the end of the lists)
from gargantext.util.lists import *
LISTTYPES = {
'DOCUMENT': WeightedList,
'SYNONYMS': Translations,
'MIAMLIST': UnweightedList,
'STOPLIST': UnweightedList,
'COOCCURRENCES': WeightedMatrix,
}
NODETYPES = [
None,
# documents hierachy
'USER',
'PROJECT',
'CORPUS',
'DOCUMENT',
# lists
'SYNONYMS',
'MIAMLIST',
'STOPLIST',
'COOCCURRENCES',
]
......
from gargantext.util.db import *
from gargantext.util.files import upload
from gargantext.constants import *
from .nodes import Node
__all__ = ['Ngram', 'NodeNgram']
__all__ = ['Ngram', 'NodeNgram', 'NodeNgramNgram']
class Ngram(Base):
......@@ -19,3 +17,11 @@ class NodeNgram(Base):
node_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE'), primary_key=True)
ngram_id = Column(Integer, ForeignKey(Ngram.id, ondelete='CASCADE'), primary_key=True)
weight = Column(Float)
class NodeNgramNgram(Base):
__tablename__ = 'nodes_ngrams_ngrams'
node_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE'), primary_key=True)
ngram1_id = Column(Integer, ForeignKey(Ngram.id, ondelete='CASCADE'), primary_key=True)
ngram2_id = Column(Integer, ForeignKey(Ngram.id, ondelete='CASCADE'), primary_key=True)
weight = Column(Float)
......@@ -12,7 +12,7 @@ __all__ = ['Node']
class NodeType(TypeDecorator):
"""Define a new type of column to describe a Node's type.
This column type is implemented as an SQL integer.
Internally, this column type is implemented as an SQL integer.
Values are detailed in `gargantext.constants.NODETYPES`.
"""
impl = Integer
......@@ -22,6 +22,11 @@ class NodeType(TypeDecorator):
return NODETYPES[typeindex]
class Node(Base):
"""This model can fit many purposes.
It intends to provide a generic model, allowing hierarchical structure
and NoSQL-like data structuring.
The possible types are defined in `gargantext.constants.NODETYPES`.
"""
__tablename__ = 'nodes'
id = Column(Integer, primary_key=True)
typename = Column(NodeType, index=True)
......@@ -35,16 +40,51 @@ class Node(Base):
hyperdata = Column(JSONB, default=dict)
def __init__(self, **kwargs):
"""Node's constructor.
Initialize the `hyperdata` as a dictionary if no value was given.
"""
if 'hyperdata' not in kwargs:
kwargs['hyperdata'] = kwargs.get('hyperdata', {})
kwargs['hyperdata'] = kwargs.get('hyperdata', MutableDict())
Base.__init__(self, **kwargs)
def __getitem__(self, key):
"""Allow direct access to hyperdata via the bracket operator.
"""
return self.hyperdata[key]
def __setitem__(self, key, value):
"""Allow direct access to hyperdata via the bracket operator.
"""
self.hyperdata[key] = value
@property
def ngrams(self):
"""Pseudo-attribute allowing to retrieve a node's ngrams.
Returns a query (which can be further filtered), of which returned rows
are the ngram's weight for this node and the ngram.
"""
from . import NodeNgram, Ngram
query = (session
.query(NodeNgram.weight, Ngram)
.select_from(NodeNgram)
.join(Ngram)
.filter(NodeNgram.node_id == self.id)
)
return query
def as_list(self):
"""Retrieve the current node as a list/matrix of ngrams identifiers.
See `gargantext.util.lists` and `gargantext.constants.LISTTYPES`
for more info.
"""
try:
return LISTTYPES[self.typename](self.id)
except KeyError:
raise ValueError('This node\'s typename is not convertible to a list: %s (accepted values: %s)' % (
self.typename,
', '.join(LISTTYPES.keys())
))
def save_hyperdata(self):
"""This is a necessary, yet ugly trick.
Indeed, PostgreSQL does not yet manage incremental updates (see
......@@ -80,17 +120,27 @@ class Node(Base):
)
def resources(self):
"""Return all the resources attached to a given node.
Mainly used for corpora.
"""
if 'resources' not in self.hyperdata:
self['resources'] = MutableList()
return self['resources']
def add_resource(self, type, path=None, url=None):
"""Attach a resource to a given node.
Mainly used for corpora.
"""
self.resources().append(MutableDict(
{'type': type, 'path':path, 'url':url, 'extracted': False}
))
def status(self, action=None, progress=0, complete=False, error=None):
"""Get the status of the given action
"""Get or update the status of the given action.
If no action is given, the status of the first uncomplete or last item
is returned.
The `complete` parameter should be a boolean.
The `error` parameter should be an exception.
"""
date = datetime.now()
# if the hyperdata do not have data about status
......
......@@ -9,7 +9,7 @@ class User(Base):
# Do not change!
# The properties below are a reflection of Django's auth module's models.
__tablename__ = 'auth_user'
__tablename__ = models.User._meta.db_table
id = Column(Integer, primary_key=True)
password = Column(String(128))
last_login = DateTime(timezone=False)
......@@ -34,16 +34,14 @@ class User(Base):
def nodes(self, typename=None):
"""get all nodes belonging to the user"""
# ↓ this below is a workaround because of Python's lame import system
from .nodes import Node
query = (session
.query(Node)
.filter(Node.user_id == self.id)
.order_by(Node.date)
)
if typename is not None:
query = query.filter(Node.typename == typename)
return query.all()
return query
def contacts_nodes(self, typename=None):
for contact in self.contacts():
......
import json
import types
import datetime
import traceback
import inspect
__all__ = ['json_encoder', 'json_dumps']
class JSONEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime.datetime):
from gargantext.util.db import Base
if isinstance(obj, Base):
return {
key: value
for key, value in obj.__dict__.items()
if not key.startswith('_')
}
elif isinstance(obj, datetime.datetime):
return obj.isoformat()[:19] + 'Z'
elif isinstance(obj, (set, tuple)):
return list(obj)
elif isinstance(obj, Exception):
tbe = traceback.TracebackException.from_exception(obj)
return list(line.strip() for line in tbe.format())
elif hasattr(obj, '__iter__') and not isinstance(obj, dict):
return list(obj)
else:
return super(self.__class__, self).default(obj)
......
This diff is collapsed.
......@@ -7,71 +7,80 @@ from gargantext.constants import *
from gargantext.util.validation import validate
class NodeListResource(APIView):
_node_available_fields = ['id', 'parent_id', 'name', 'typename', 'hyperdata', 'ngrams']
_node_default_fields = ['id', 'parent_id', 'name', 'typename']
_node_available_types = NODETYPES
_fields = ['id', 'parent_id', 'name', 'typename', 'hyperdata']
_types = NODETYPES
def _query(self, request):
# parameters validation
parameters = get_parameters(request)
parameters = validate(parameters, {'type': dict, 'items': {
'pagination_limit': {'type': int, 'default': 10},
'pagination_offset': {'type': int, 'default': 0},
'fields': {'type': list, 'default': self._fields, 'items': {
'type': str, 'range': self._fields,
}},
# optional filtering parameters
'type': {'type': list, 'default': self._types, 'required': False, 'items': {
'type': str, 'range': self._types,
}},
'parent_id': {'type': int, 'required': False},
}})
# start the query
query = session.query(*tuple(
getattr(Node, field) for field in parameters['fields']
))
# filter by type
if 'type' in parameters:
query = query.filter(Node.typename.in_(parameters['type']))
# filter by parent
if 'parent_id' in parameters:
query = query.filter(Node.parent_id == parameters['parent_id'])
# count
count = query.count()
# order
query = query.order_by(Node.hyperdata['publication_date'], Node.id)
# paginate the query
if parameters['pagination_limit'] == -1:
query = query[parameters['pagination_offset']:]
else:
query = query[
parameters['pagination_offset'] :
parameters['pagination_limit']
]
# return the result!
return parameters, query, count
def _query_nodes(request, node_id=None):
user = cache.User[request.user.username]
# parameters validation
parameters = get_parameters(request)
parameters = validate(parameters, {'type': dict, 'items': {
'pagination_limit': {'type': int, 'default': 10},
'pagination_offset': {'type': int, 'default': 0},
'fields': {'type': list, 'default': _node_default_fields, 'items': {
'type': str, 'range': _node_available_fields,
}},
# optional filtering parameters
'types': {'type': list, 'required': False, 'items': {
'type': str, 'range': _node_available_types,
}},
'parent_id': {'type': int, 'required': False},
}})
# start the query
query = user.nodes()
# filter by id
if node_id is not None:
query = query.filter(Node.id == node_id)
# filter by type
if 'types' in parameters:
query = query.filter(Node.typename.in_(parameters['types']))
# filter by parent
if 'parent_id' in parameters:
query = query.filter(Node.parent_id == parameters['parent_id'])
# count
count = query.count()
# order
query = query.order_by(Node.hyperdata['publication_date'], Node.id)
# paginate the query
if parameters['pagination_limit'] == -1:
query = query[parameters['pagination_offset']:]
else:
query = query[
parameters['pagination_offset'] :
parameters['pagination_limit']
]
# return the result!
return parameters, query, count
class NodeListResource(APIView):
def get(self, request):
"""Displays the list of nodes corresponding to the query.
"""
parameters, query, count = self._query(request)
parameters, query, count = _query_nodes(request)
return JsonHttpResponse({
'parameters': parameters,
'count': count,
'records': [dict(zip(parameters['fields'], node)) for node in query]
'records': [
{field: getattr(node, field) for field in parameters['fields']}
for node in query
]
})
def post(self, request):
pass
"""Create a new node.
NOT IMPLEMENTED
"""
def delete(self, request):
"""Removes the list of nodes corresponding to the query.
WARNING! THIS IS TOTALLY UNTESTED!!!!!
"""
parameters, query, count = self._query(request)
for node in query:
node.delete()
parameters, query, count = _query_nodes(request)
query.delete()
session.commit()
return JsonHttpResponse({
'parameters': parameters,
......@@ -81,27 +90,20 @@ class NodeListResource(APIView):
class NodeResource(APIView):
def _query(self, request, node_id):
user = cache.User[request.user.username]
node = session.query(Node).filter(Node.id == node_id).first()
if node is None:
raise Http404()
if not user.owns(node):
raise HttpResponseForbidden()
return user, node
def get(self, request, node_id):
user, node = self._query(request, node_id)
parameters, query, count = _query_nodes(request, node_id)
if not len(query):
raise Http404()
node = query[0]
return JsonHttpResponse({
'id': node.id,
'parent_id': node.parent_id,
'name': node.name,
'hyperdata': node.hyperdata,
field: getattr(node, field) for field in parameters['fields']
})
def delete(self, request, node_id):
parameters, query, count = _query_nodes(request, node_id)
if not len(query):
raise Http404()
from sqlalchemy import delete
user, node = self._query(request, node_id)
result = session.execute(
delete(Node).where(Node.id == node_id)
)
......
......@@ -12,19 +12,22 @@ def _get_user_project_corpus(request, project_id, corpus_id):
"""Helper method to get a corpus, knowing the project's and corpus' ID.
Raises HTTP errors when parameters (user, IDs...) are invalid.
"""
user = cache.User[request.user.username]
user = cache.User[request.user.id]
project = session.query(Node).filter(Node.id == project_id).first()
corpus = session.query(Node).filter(Node.id == corpus_id).filter(Node.parent_id == project_id).first()
if corpus is None:
raise Http404()
if not user.owns(corpus):
raise HttpResponseForbidden()
return user, project, corpus
print("CORPORA: invalid user %i (User doesn't own this corpus)" % user.id)
return (False, user, project, corpus)
return (True, user, project, corpus)
@requires_auth
def corpus(request, project_id, corpus_id):
user, project, corpus = _get_user_project_corpus(request, project_id, corpus_id)
authorized, user, project, corpus = _get_user_project_corpus(request, project_id, corpus_id)
if not authorized:
return HttpResponseForbidden()
# response!
return render(
template_name = 'pages/corpora/corpus.html',
......
......@@ -48,7 +48,7 @@ def overview(request):
'debug': settings.DEBUG,
'date': datetime.now(),
# projects owned by the user
'number': len(user_projects),
'number': user_projects.count(),
'projects': user_projects,
# projects owned by the user's contacts
'common_users': (contact for contact, projects in contacts_projects),
......
......@@ -20,7 +20,7 @@ urlpatterns = [
url(r'^projects/(\d+)/?$', projects.project),
# corpora
url(r'^projects/(\d+)/corpora/(\d+)?$', corpora.corpus),
url(r'^projects/(\d+)/corpora/(\d+)/chart?$', corpora.chart),
url(r'^projects/(\d+)/corpora/(\d+)/?$', corpora.corpus),
url(r'^projects/(\d+)/corpora/(\d+)/chart/?$', corpora.chart),
]
amqp==1.4.9
anyjson==0.3.3
billiard==3.3.0.22
billiard==3.3.0.22 # multiprocessing fork
celery==3.1.20
chardet==2.3.0
dateparser==0.3.2
Django==1.9.2
django-celery==3.1.17
......@@ -10,18 +11,18 @@ django-pgjsonb==0.0.16
djangorestframework==3.3.2
html5lib==0.9999999
jdatetime==1.7.2
kombu==3.0.33
kombu==3.0.33 # messaging
lxml==3.5.0
nltk==3.1
numpy==1.10.4
psycopg2==2.6.1
pycountry==1.20
python-dateutil==2.4.2
pytz==2015.7
pytz==2015.7 # timezones
PyYAML==3.11
RandomWords==0.1.12
six==1.10.0
SQLAlchemy==1.1.0b1.dev0
ujson==1.35
umalqurra==0.2
umalqurra==0.2 # arabic calendars (?? why use ??)
wheel==0.29.0
......@@ -532,7 +532,9 @@ $("#corpusdisplayer").hide()
// FIRST portion of code to be EXECUTED:
// (3) Get records and hyperdata for paginator
$.ajax({
url: '/api/nodes?type[]=DOCUMENT&pagination_limit=-1&parent_id=' + corpus_id,
url: '/api/nodes?types[]=DOCUMENT&pagination_limit=-1&parent_id='
+ corpus_id
+'&fields[]=parent_id&fields[]=id&fields[]=name&fields[]=typename&fields[]=hyperdata',
success: function(data){
$("#content_loader").remove();
$.each(data.records, function(i, record){
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment