Commit 883a52c1 authored by Mathieu Rodic's avatar Mathieu Rodic

[FEAT] implemented lists

[FEAT] linked nodes with links (cf. `gargantext.constants` and `gargantext.models.Node.as_list()`
[FEAT] implemented an `ngrams` pseudo-attribute on `gargantext.models.Node` (available from REST api)
[FEAT] on REST API, renamed GET parameter `type[]` to `types[]`
[FEAT] `json_dumps()` can even serialize SQLAlchemy models and models now
parent 6e5bf987
# WARNING: to ensure consistency and retrocompatibility, lists should keep the
# initial order (ie., new elements should be appended at the end of the lists)
from gargantext.util.lists import *
LISTTYPES = {
'DOCUMENT': WeightedList,
'SYNONYMS': Translations,
'MIAMLIST': UnweightedList,
'STOPLIST': UnweightedList,
'COOCCURRENCES': WeightedMatrix,
}
NODETYPES = [
None,
# documents hierachy
'USER',
'PROJECT',
'CORPUS',
'DOCUMENT',
# lists
'SYNONYMS',
'MIAMLIST',
'STOPLIST',
'COOCCURRENCES',
]
......
from gargantext.util.db import *
from gargantext.util.files import upload
from gargantext.constants import *
from .nodes import Node
__all__ = ['Ngram', 'NodeNgram']
__all__ = ['Ngram', 'NodeNgram', 'NodeNgramNgram']
class Ngram(Base):
......@@ -19,3 +17,11 @@ class NodeNgram(Base):
node_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE'), primary_key=True)
ngram_id = Column(Integer, ForeignKey(Ngram.id, ondelete='CASCADE'), primary_key=True)
weight = Column(Float)
class NodeNgramNgram(Base):
__tablename__ = 'nodes_ngrams_ngrams'
node_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE'), primary_key=True)
ngram1_id = Column(Integer, ForeignKey(Ngram.id, ondelete='CASCADE'), primary_key=True)
ngram2_id = Column(Integer, ForeignKey(Ngram.id, ondelete='CASCADE'), primary_key=True)
weight = Column(Float)
......@@ -45,6 +45,26 @@ class Node(Base):
def __setitem__(self, key, value):
self.hyperdata[key] = value
@property
def ngrams(self):
from . import NodeNgram, Ngram
query = (session
.query(NodeNgram.weight, Ngram)
.select_from(NodeNgram)
.join(Ngram)
.filter(NodeNgram.node_id == self.id)
)
return query
def as_list(self):
try:
return LISTTYPES[self.typename](self.id)
except KeyError:
raise ValueError('This node\'s typename is not convertible to a list: %s (accepted values: %s)' % (
self.typename,
', '.join(LISTTYPES.keys())
))
def save_hyperdata(self):
"""This is a necessary, yet ugly trick.
Indeed, PostgreSQL does not yet manage incremental updates (see
......
......@@ -9,7 +9,7 @@ class User(Base):
# Do not change!
# The properties below are a reflection of Django's auth module's models.
__tablename__ = 'auth_user'
__tablename__ = models.User._meta.db_table
id = Column(Integer, primary_key=True)
password = Column(String(128))
last_login = DateTime(timezone=False)
......@@ -34,16 +34,14 @@ class User(Base):
def nodes(self, typename=None):
"""get all nodes belonging to the user"""
# ↓ this below is a workaround because of Python's lame import system
from .nodes import Node
query = (session
.query(Node)
.filter(Node.user_id == self.id)
.order_by(Node.date)
)
if typename is not None:
query = query.filter(Node.typename == typename)
return query.all()
return query
def contacts_nodes(self, typename=None):
for contact in self.contacts():
......
import json
import types
import datetime
import traceback
import inspect
__all__ = ['json_encoder', 'json_dumps']
class JSONEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime.datetime):
from gargantext.util.db import Base
if isinstance(obj, Base):
return {
key: value
for key, value in obj.__dict__.items()
if not key.startswith('_')
}
elif isinstance(obj, datetime.datetime):
return obj.isoformat()[:19] + 'Z'
elif isinstance(obj, (set, tuple)):
return list(obj)
elif isinstance(obj, Exception):
tbe = traceback.TracebackException.from_exception(obj)
return list(line.strip() for line in tbe.format())
elif hasattr(obj, '__iter__') and not isinstance(obj, dict):
return list(obj)
else:
return super(self.__class__, self).default(obj)
......
"""Allows easier lists management (synonyms, blacklists, whitelists, etc.)
"""
__all__ = ['Translations', 'WeightedMatrix', 'UnweightedList', 'WeightedList']
from gargantext.util.db import session, bulk_insert
from collections import defaultdict
from math import sqrt
class _BaseClass:
def __add__(self, other):
if hasattr(self, '__radd__'):
return self.__radd__(other)
else:
return NotImplemented
def __sub__(self, other):
if hasattr(self, '__rsub__'):
return self.__rsub__(other)
else:
return NotImplemented
def __mul__(self, other):
if hasattr(self, '__rmul__'):
return self.__rmul__(other)
else:
return NotImplemented
def __div__(self, other):
if hasattr(self, '__rdiv__'):
return self.__rdiv__(other)
else:
return NotImplemented
def __and__(self, other):
if hasattr(self, '__rand__'):
return self.__rand__(other)
else:
return NotImplemented
def __or__(self, other):
if hasattr(self, '__ror__'):
return self.__ror__(other)
else:
return NotImplemented
def __repr__(self):
items = self.items
if isinstance(items, defaultdict):
if len(items) and isinstance(next(iter(items.values())), defaultdict):
items = {
key: dict(value)
for key, value in items.items()
}
else:
items = dict(items)
return '<%s %s>' % (
self.__class__.__name__,
repr(items),
)
__str__ = __repr__
class Translations(_BaseClass):
def __init__(self, source=None):
self.items = defaultdict(int)
self.groups = defaultdict(set)
if source is None:
return
elif isinstance(source, int):
self.id = source
from gargantext.models import NodeNgramNgram
query = (session
.query(NodeNgramNgram.ngram2_id, NodeNgramNgram.ngram1_id)
.filter(NodeNgramNgram.node_id == source)
)
self.items.update(query)
for key, value in self.items.items():
self.groups[value].add(key)
elif isinstance(source, Translations):
self.items.update(source.items)
self.groups.update(source.groups)
elif hasattr(source, '__iter__'):
self.items.update(source)
for key, value in self.items.items():
self.groups[value].add(key)
else:
raise TypeError
def __rmul__(self, other):
result = NotImplemented
if isinstance(other, UnweightedList):
result = UnweightedList()
result.items = set(
self.items.get(key, key)
for key in other.items
)
elif isinstance(other, WeightedList):
result = WeightedList()
for key, value in other.items.items():
result.items[
self.items.get(key, key)
] += value
elif isinstance(other, Translations):
result = Translations()
items = self.items
items.update(other.items)
for key, value in items.items():
if value in items:
value = items[value]
if key != value:
result.items[key] = value
result.groups[value].add(key)
return result
def __iter__(self):
for key, value in self.items.items():
yield key, value
def save(self, node_id=None):
from gargantext.models import NodeNgramNgram
if node_id is None:
if hasattr(self, 'id'):
node_id = self.id
else:
raise ValueError('Please mention an ID to save the node.')
# delete previous data
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id == node_id).delete()
session.commit()
# insert new data
bulk_insert(
NodeNgramNgram,
('node_id', 'ngram2_id', 'ngram1_id', 'score'),
((node_id, key, value, 1.0) for key, value in self.items.items())
)
class WeightedMatrix(_BaseClass):
def __init__(self, source=None):
self.items = defaultdict(float)
if source is None:
return
elif isinstance(source, int):
self.id = source
from gargantext.models import NodeNgramNgram
query = (session
.query(NodeNgramNgram.ngram1_id, NodeNgramNgram.ngram2_id, NodeNgramNgram.score)
.filter(NodeNgramNgram.node_id == source)
)
for key1, key2, value in self.items.items():
self.items[key1, key2] = value
elif isinstance(source, WeightedMatrix):
for key1, key2, value in source:
self.items[key1, key2] = value
elif hasattr(source, '__iter__'):
for row in source:
self.items[row[0], row[1]] = row[2]
else:
raise TypeError
def __iter__(self):
for (key1, key2), value in self.items.items():
yield key1, key2, value
def save(self, node_id=None):
from gargantext.models import NodeNgramNgram
if node_id is None:
if hasattr(self, 'id'):
node_id = self.id
else:
raise ValueError('Please mention an ID to save the node.')
# delete previous data
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id == node_id).delete()
session.commit()
# insert new data
bulk_insert(
NodeNgramNgram,
('node_id', 'ngram1_id', 'ngram2_id', 'score'),
((node_id, key1, key2, value) for key1, key2, value in self)
)
def __radd__(self, other):
result = NotImplemented
if isinstance(other, WeightedMatrix):
result = WeightedMatrix()
for key1, key2, value in self:
value = value + other.items[key1, key2]
if value != 0.0:
result.items[key1, key2] = value
return result
def __rsub__(self, other):
result = NotImplemented
if isinstance(other, (UnweightedList, WeightedList)):
result = WeightedMatrix()
for key1, key2, value in self:
if key1 in other.items or key2 in other.items:
continue
result.items[key1, key2] = value
elif isinstance(other, WeightedMatrix):
result = WeightedMatrix()
for key1, key2, value in self:
value = value - other.items[key1, key2]
if value != 0.0:
result.items[key1, key2] = value
return result
def __rand__(self, other):
result = NotImplemented
if isinstance(other, (UnweightedList, WeightedList)):
result = WeightedMatrix()
for key1, key2, value in self:
if key1 not in other.items or key2 not in other.items:
continue
result.items[key1, key2] = value
return result
def __rmul__(self, other):
result = NotImplemented
if isinstance(other, Translations):
result = WeightedMatrix()
for (key1, key2), value in self.items.items():
result.items[key1,
other.items.get(key2, key2)
] += value
elif isinstance(other, UnweightedList):
result = self.__rand__(other)
# elif isinstance(other, WeightedMatrix):
# result = WeightedMatrix()
elif isinstance(other, WeightedList):
result = WeightedMatrix()
for key1, key2, value in self:
if key1 not in other.items or key2 not in other.items:
continue
result.items[key1, key2] = value * sqrt(other.items[key1] * other.items[key2])
return result
def __rdiv__(self, other):
result = NotImplemented
if isinstance(other, WeightedList):
result = WeightedMatrix()
for key1, key2, value in self:
if key1 not in other.items or key2 not in other.items:
continue
result.items[key1, key2] = value / sqrt(other.items[key1] * other.items[key2])
return result
class UnweightedList(_BaseClass):
def __init__(self, source=None):
self.items = set()
if source is None:
return
elif isinstance(source, int):
self.id = source
from gargantext.models import NodeNgram
query = (session
.query(NodeNgram.ngram_id)
.filter(NodeNgram.node_id == source)
)
self.items.update(row[0] for row in query)
elif isinstance(source, WeightedList):
self.items.update(source.items.keys())
elif isinstance(source, UnweightedList):
self.items.update(source.items)
elif hasattr(source, '__iter__'):
items = tuple(item for item in source)
if len(items) == 0:
return
if hasattr(items[0], '__iter__'):
self.items.update(item[0] for item in items)
else:
self.items.update(items)
else:
raise TypeError
def __radd__(self, other):
result = NotImplemented
if isinstance(other, UnweightedList):
result = UnweightedList(other)
result.items |= self.items
elif isinstance(other, WeightedList):
result = WeightedList(other)
for key in self.items:
result.items[key] += 1.0
return result
def __rsub__(self, other):
result = NotImplemented
if isinstance(other, UnweightedList):
result = UnweightedList(self)
result.items -= other.items
elif isinstance(other, WeightedList):
result = UnweightedList(self)
result.items -= set(other.items.keys())
return result
def __ror__(self, other):
result = NotImplemented
if isinstance(other, UnweightedList):
result = UnweightedList(self)
result.items |= other.items
elif isinstance(other, WeightedList):
result = UnweightedList(self)
result.items |= set(other.items.keys())
return result
def __rand__(self, other):
result = NotImplemented
if isinstance(other, UnweightedList):
result = UnweightedList(self)
result.items &= other.items
elif isinstance(other, WeightedList):
result = UnweightedList(self)
result.items &= set(other.items)
return result
def __rmul__(self, other):
result = NotImplemented
if isinstance(other, Translations):
result = UnweightedList()
result.items = set(
other.items.get(key, key)
for key in self.items
)
elif isinstance(other, UnweightedList):
result = WeightedList(self)
result.items = {key: 1.0 for key in self.items & other.items}
elif isinstance(other, WeightedList):
result = WeightedList()
result.items = {key: value for key, value in other.items.items() if key in self.items}
return result
def save(self, node_id=None):
from gargantext.models import NodeNgram
if node_id is None:
if hasattr(self, 'id'):
node_id = self.id
else:
raise ValueError('Please mention an ID to save the node.')
# delete previous data
session.query(NodeNgram).filter(NodeNgram.node_id == node_id).delete()
session.commit()
# insert new data
bulk_insert(
NodeNgram,
('node_id', 'ngram_id', 'weight'),
((node_id, key, 1.0) for key in self.items)
)
class WeightedList(_BaseClass):
def __init__(self, source=None):
self.items = defaultdict(float)
if source is None:
return
elif isinstance(source, int):
self.id = source
from gargantext.models import NodeNgram
query = (session
.query(NodeNgram.ngram_id, NodeNgram.weight)
.filter(NodeNgram.node_id == source)
)
self.items.update(query)
elif isinstance(source, WeightedList):
self.items = source.items.copy()
elif isinstance(source, UnweightedList):
for key in source.items:
self.items[key] = 1.0
elif hasattr(source, '__iter__'):
self.items.update(source)
else:
raise TypeError
def __iter__(self):
for key, value in self.items.items():
yield key, value
def __radd__(self, other):
result = NotImplemented
if isinstance(other, WeightedList):
result = WeightedList(self)
for key, value in other.items.items():
result.items[key] += value
elif isinstance(other, UnweightedList):
result = WeightedList(self)
for key in other.items:
result.items[key] += 1.0
return result
def __rsub__(self, other):
"""Remove elements of the other list from the current one
"""
result = NotImplemented
if isinstance(other, UnweightedList):
result = WeightedList()
result.items = {key: value for key, value in self.items.items() if key not in other.items}
elif isinstance(other, WeightedList):
result = WeightedList(self)
for key, value in other.items.items():
if key in result.items and result.items[key] == value:
result.items.pop(key)
else:
result.items[key] -= value
return result
def __ror__(self, other):
result = NotImplemented
if isinstance(other, UnweightedList):
result = UnweightedList(self)
result.items |= other.items
elif isinstance(other, WeightedList):
result = UnweightedList(self)
result.items |= set(other.items.keys())
return result
def __rmul__(self, other):
result = NotImplemented
if isinstance(other, WeightedList):
result = WeightedList()
result.items = {
key: value * other.items[key]
for key, value
in self.items.items()
if key in other.items
}
if isinstance(other, UnweightedList):
result = WeightedList()
result.items = {
key: value
for key, value
in self.items.items()
if key in other.items
}
elif isinstance(other, Translations):
result = WeightedList()
for key, value in self.items.items():
result.items[
other.items.get(key, key)
] += value
return result
def __rand__(self, other):
result = NotImplemented
if isinstance(other, UnweightedList):
result = UnweightedList(self)
result.items &= other.items
elif isinstance(other, WeightedList):
result = UnweightedList(self)
result.items &= set(other.items.keys())
return result
def save(self, node_id=None):
from gargantext.models import NodeNgram
if node_id is None:
if hasattr(self, 'id'):
node_id = self.id
else:
raise ValueError('Please mention an ID to save the node.')
# delete previous data
session.query(NodeNgram).filter(NodeNgram.node_id == node_id).delete()
session.commit()
# insert new data
bulk_insert(
NodeNgram,
('node_id', 'ngram_id', 'weight'),
((node_id, key, value) for key, value in self.items.items())
)
def test():
from collections import OrderedDict
# define operands
operands = OrderedDict()
operands['wm'] = WeightedMatrix(((1, 2, .5), (1, 3, .75), (2, 3, .6), (3, 3, 1), ))
operands['ul'] = UnweightedList((1, 2, 3, 4, 5))
# operands['ul'] = UnweightedList(82986)
# operands['ul2'] = UnweightedList((1, 2, 3, 6))
# operands['ul2'].save(5)
# operands['ul3'] = UnweightedList(5)
operands['wl'] = WeightedList({1:.7, 2:.8, 7: 1.1})
# operands['wl1'].save(5)
# operands['wl2'] = WeightedList(5)
# operands['t1'] = Translations({1:2, 4:5})
operands['t'] = Translations({3:2, 4:5})
# operands['t2'].save(5)
# operands['t3'] = Translations(5)
# define operators
operators = OrderedDict()
operators['+'] = '__add__'
operators['-'] = '__sub__'
operators['*'] = '__mul__'
operators['|'] = '__or__'
operators['&'] = '__and__'
# show operands
for operand_name, operand in operands.items():
print('%4s = %s' % (operand_name, operand))
# show operations results
for operator_name, operator in operators.items():
print()
for operand1_name, operand1 in operands.items():
for operand2_name, operand2 in operands.items():
if hasattr(operand1, operator):
result = getattr(operand1, operator)(operand2)
else:
result = '?'
print('%4s %s %-4s = %s' % (
operand1_name,
operator_name,
operand2_name,
'?' if result == NotImplemented else result,
))
if __name__ == '__main__':
test()
......@@ -7,71 +7,80 @@ from gargantext.constants import *
from gargantext.util.validation import validate
class NodeListResource(APIView):
_node_available_fields = ['id', 'parent_id', 'name', 'typename', 'hyperdata', 'ngrams']
_node_default_fields = ['id', 'parent_id', 'name', 'typename']
_node_available_types = NODETYPES
_fields = ['id', 'parent_id', 'name', 'typename', 'hyperdata']
_types = NODETYPES
def _query(self, request):
# parameters validation
parameters = get_parameters(request)
parameters = validate(parameters, {'type': dict, 'items': {
'pagination_limit': {'type': int, 'default': 10},
'pagination_offset': {'type': int, 'default': 0},
'fields': {'type': list, 'default': self._fields, 'items': {
'type': str, 'range': self._fields,
}},
# optional filtering parameters
'type': {'type': list, 'default': self._types, 'required': False, 'items': {
'type': str, 'range': self._types,
}},
'parent_id': {'type': int, 'required': False},
}})
# start the query
query = session.query(*tuple(
getattr(Node, field) for field in parameters['fields']
))
# filter by type
if 'type' in parameters:
query = query.filter(Node.typename.in_(parameters['type']))
# filter by parent
if 'parent_id' in parameters:
query = query.filter(Node.parent_id == parameters['parent_id'])
# count
count = query.count()
# order
query = query.order_by(Node.hyperdata['publication_date'], Node.id)
# paginate the query
if parameters['pagination_limit'] == -1:
query = query[parameters['pagination_offset']:]
else:
query = query[
parameters['pagination_offset'] :
parameters['pagination_limit']
]
# return the result!
return parameters, query, count
def _query_nodes(request, node_id=None):
user = cache.User[request.user.username]
# parameters validation
parameters = get_parameters(request)
parameters = validate(parameters, {'type': dict, 'items': {
'pagination_limit': {'type': int, 'default': 10},
'pagination_offset': {'type': int, 'default': 0},
'fields': {'type': list, 'default': _node_default_fields, 'items': {
'type': str, 'range': _node_available_fields,
}},
# optional filtering parameters
'types': {'type': list, 'required': False, 'items': {
'type': str, 'range': _node_available_types,
}},
'parent_id': {'type': int, 'required': False},
}})
# start the query
query = user.nodes()
# filter by id
if node_id is not None:
query = query.filter(Node.id == node_id)
# filter by type
if 'types' in parameters:
query = query.filter(Node.typename.in_(parameters['types']))
# filter by parent
if 'parent_id' in parameters:
query = query.filter(Node.parent_id == parameters['parent_id'])
# count
count = query.count()
# order
query = query.order_by(Node.hyperdata['publication_date'], Node.id)
# paginate the query
if parameters['pagination_limit'] == -1:
query = query[parameters['pagination_offset']:]
else:
query = query[
parameters['pagination_offset'] :
parameters['pagination_limit']
]
# return the result!
return parameters, query, count
class NodeListResource(APIView):
def get(self, request):
"""Displays the list of nodes corresponding to the query.
"""
parameters, query, count = self._query(request)
parameters, query, count = _query_nodes(request)
return JsonHttpResponse({
'parameters': parameters,
'count': count,
'records': [dict(zip(parameters['fields'], node)) for node in query]
'records': [
{field: getattr(node, field) for field in parameters['fields']}
for node in query
]
})
def post(self, request):
pass
"""Create a new node.
NOT IMPLEMENTED
"""
def delete(self, request):
"""Removes the list of nodes corresponding to the query.
WARNING! THIS IS TOTALLY UNTESTED!!!!!
"""
parameters, query, count = self._query(request)
for node in query:
node.delete()
parameters, query, count = _query_nodes(request)
query.delete()
session.commit()
return JsonHttpResponse({
'parameters': parameters,
......@@ -81,27 +90,20 @@ class NodeListResource(APIView):
class NodeResource(APIView):
def _query(self, request, node_id):
user = cache.User[request.user.username]
node = session.query(Node).filter(Node.id == node_id).first()
if node is None:
raise Http404()
if not user.owns(node):
raise HttpResponseForbidden()
return user, node
def get(self, request, node_id):
user, node = self._query(request, node_id)
parameters, query, count = _query_nodes(request, node_id)
if not len(query):
raise Http404()
node = query[0]
return JsonHttpResponse({
'id': node.id,
'parent_id': node.parent_id,
'name': node.name,
'hyperdata': node.hyperdata,
field: getattr(node, field) for field in parameters['fields']
})
def delete(self, request, node_id):
parameters, query, count = _query_nodes(request, node_id)
if not len(query):
raise Http404()
from sqlalchemy import delete
user, node = self._query(request, node_id)
result = session.execute(
delete(Node).where(Node.id == node_id)
)
......
......@@ -532,7 +532,7 @@ $("#corpusdisplayer").hide()
// FIRST portion of code to be EXECUTED:
// (3) Get records and hyperdata for paginator
$.ajax({
url: '/api/nodes?type[]=DOCUMENT&pagination_limit=-1&parent_id=' + corpus_id,
url: '/api/nodes?types[]=DOCUMENT&pagination_limit=-1&parent_id=' + corpus_id,
success: function(data){
$("#content_loader").remove();
$.each(data.records, function(i, record){
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment