Commit 0b4a7eae authored by Administrator's avatar Administrator

Merge branch 'unstable-mat' into unstable

parents d8d27f26 b7ccc435
from collections import defaultdict
from math import sqrt
from gargantext_web.db import session, NodeNgram, NodeNgramNgram, bulk_insert
class Translations:
class BaseClass:
def __add__(self, other):
if hasattr(self, '__radd__'):
return self.__radd__(other)
else:
return NotImplemented
def __sub__(self, other):
if hasattr(self, '__rsub__'):
return self.__rsub__(other)
else:
return NotImplemented
def __mul__(self, other):
if hasattr(self, '__rmul__'):
return self.__rmul__(other)
else:
return NotImplemented
def __div__(self, other):
if hasattr(self, '__rdiv__'):
return self.__rdiv__(other)
else:
return NotImplemented
def __and__(self, other):
if hasattr(self, '__rand__'):
return self.__rand__(other)
else:
return NotImplemented
def __or__(self, other):
if hasattr(self, '__ror__'):
return self.__ror__(other)
else:
return NotImplemented
def __repr__(self):
items = self.items
if isinstance(items, defaultdict):
if len(items) and isinstance(next(iter(items.values())), defaultdict):
items = {
key: dict(value)
for key, value in items.items()
}
else:
items = dict(items)
return '<%s %s>' % (
self.__class__.__name__,
repr(items),
)
__str__ = __repr__
class Translations(BaseClass):
def __init__(self, other=None):
if other is None:
self.items = defaultdict(int)
self.groups = defaultdict(set)
elif isinstance(other, int):
query = (session
.query(NodeNgramNgram.ngramy_id, NodeNgramNgram.ngramx_id)
.filter(NodeNgramNgram.node_id == other)
)
self.items = defaultdict(int, query)
self.groups = defaultdict(set)
for key, value in self.items.items():
self.groups[value].add(key)
elif isinstance(other, Translations):
self.items = other.items.copy()
self.groups = other.groups.copy()
......@@ -18,39 +86,69 @@ class Translations:
else:
raise TypeError
def __add__(self, other):
result = self.__class__(self)
result.items.update(other)
for key, value in other.groups:
result.groups[key] += value
return result
def __sub__(self, other):
result = self.__class__(self)
if isinstance(other, Translations):
def __rmul__(self, other):
result = NotImplemented
if isinstance(other, UnweightedList):
result = UnweightedList()
result.items = set(
self.items.get(key, key)
for key in other.items
)
elif isinstance(other, WeightedList):
result = WeightedList()
for key, value in other.items.items():
result.items.pop(key, None)
result.groups[value].remove(key)
if len(result.groups[value]) == 0:
result.groups.pop(value)
result.items[
self.items.get(key, key)
] += value
elif isinstance(other, Translations):
result = Translations()
items = self.items
items.update(other.items)
for key, value in items.items():
if value in items:
value = items[value]
if key != value:
result.items[key] = value
result.groups[value].add(key)
return result
def __iter__(self):
for key, value in self.items.items():
yield key, value
def save(self, node_id):
# delete previous data
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id == node_id).delete()
session.commit()
# insert new data
bulk_insert(
NodeNgramNgram,
('node_id', 'ngramy_id', 'ngramx_id', 'score'),
((node_id, key, value, 1.0) for key, value in self.items.items())
)
class WeightedMatrix:
class WeightedMatrix(BaseClass):
def __init__(self, other=None):
if other is None:
self.items = defaultdict(lambda: defaultdict(float))
elif isinstance(other, int):
query = (session
.query(NodeNgramNgram.ngramx_id, NodeNgramNgram.ngramy_id, NodeNgramNgram.score)
.filter(NodeNgramNgram.node_id == other)
)
self.items = defaultdict(lambda: defaultdict(float))
for key1, key2, value in self.items.items():
self.items[key1][key2] = value
elif isinstance(other, WeightedMatrix):
self.items = other.items.copy()
self.items = defaultdict(lambda: defaultdict(float))
for key1, key2, value in other:
self.items[key1][key2] = value
elif hasattr(other, '__iter__'):
self.items = defaultdict(lambda: defaultdict(float))
for row in other:
self.items[other[0]][other[1]] = [other[2]]
self.items[row[0]][row[1]] = row[2]
else:
raise TypeError
......@@ -59,36 +157,103 @@ class WeightedMatrix:
for key2, value in key2_value.items():
yield key1, key2, value
def __sub__(self, other):
"""Remove elements of the other list from the current one
Can only be substracted to another list of coocurrences.
"""
pass
def save(self, node_id):
# delete previous data
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id == node_id).delete()
session.commit()
# insert new data
bulk_insert(
NodeNgramNgram,
('node_id', 'ngramx_id', 'ngramy_id', 'score'),
((node_id, key1, key2, value) for key1, key2, value in self)
)
def __mul__(self, other):
if isinstance(other, Translations):
result = WeightedMatrix()
for key1, key2_value in self.items.items():
for key2, value in self.items:
result.items[
other.items.get(key, key)
] = value
else:
raise TypeError
return result
def __radd__(self, other):
result = NotImplemented
if isinstance(other, WeightedMatrix):
result = WeightedMatrix()
for key1, key2, value in self:
value = value + other.items[key1][key2]
if value != 0.0:
result.items[key1][key2] = value
return result
def __rsub__(self, other):
result = NotImplemented
if isinstance(other, (UnweightedList, WeightedList)):
result = WeightedMatrix()
for key1, key2, value in self:
if key1 in other.items or key2 in other.items:
continue
result.items[key1][key2] = value
elif isinstance(other, WeightedMatrix):
result = WeightedMatrix()
for key1, key2, value in self:
value = value - other.items[key1][key2]
if value != 0.0:
result.items[key1][key2] = value
return result
def __rand__(self, other):
result = NotImplemented
if isinstance(other, (UnweightedList, WeightedList)):
result = WeightedMatrix()
for key1, key2, value in self:
if key1 not in other.items or key2 not in other.items:
continue
result.items[key1][key2] = value
return result
def __rmul__(self, other):
result = NotImplemented
if isinstance(other, Translations):
result = WeightedMatrix()
for key1, key2_value in self.items.items():
key1 = other.items.get(key1, key1)
for key2, value in key2_value.items():
result.items[key1][
other.items.get(key2, key2)
] += value
elif isinstance(other, UnweightedList):
result = self.__rand__(other)
# elif isinstance(other, WeightedMatrix):
# result = WeightedMatrix()
elif isinstance(other, WeightedList):
result = WeightedMatrix()
for key1, key2, value in self:
if key1 not in other.items or key2 not in other.items:
continue
result.items[key1][key2] = value * sqrt(other.items[key1] * other.items[key2])
return result
def __rdiv__(self, other):
result = NotImplemented
if isinstance(other, WeightedList):
result = WeightedMatrix()
for key1, key2, value in self:
if key1 not in other.items or key2 not in other.items:
continue
result.items[key1][key2] = value / sqrt(other.items[key1] * other.items[key2])
return result
class UnweightedList:
class UnweightedList(BaseClass):
def __init__(self, other=None):
if other is None:
self.items = set()
elif isinstance(other, int):
query = (session
.query(NodeNgram.ngram_id)
.filter(NodeNgram.node_id == other)
)
self.items = {row[0] for row in query}
elif isinstance(other, WeightedList):
self.items = set(other.items.keys())
elif isinstance(other, UnweightedList):
self.items = other.items.copy()
elif hasattr(other, '__iter__'):
items = (item for item in other)
items = tuple(item for item in other)
if len(items) == 0:
self.items = set()
else:
......@@ -99,44 +264,86 @@ class UnweightedList:
else:
raise TypeError
def __add__(self, other):
result = self.__class__(self)
def __radd__(self, other):
result = NotImplemented
if isinstance(other, UnweightedList):
result.items |= other.items
result = UnweightedList(other)
result.items |= self.items
elif isinstance(other, WeightedList):
result.items |= set(other.items.keys())
else:
raise TypeError
result = WeightedList(other)
for key in self.items:
result.items[key] += 1.0
return result
__or__ = __add__
def __sub__(self, other):
result = self.__class__(self)
def __rsub__(self, other):
result = NotImplemented
if isinstance(other, UnweightedList):
result = UnweightedList(self)
result.items -= other.items
elif isinstance(other, WeightedList):
result = UnweightedList(self)
result.items -= set(other.items.keys())
else:
raise TypeError
return result
def __and__(self, other):
result = self.__class__(self)
def __ror__(self, other):
result = NotImplemented
if isinstance(other, UnweightedList):
result = UnweightedList(self)
result.items |= other.items
elif isinstance(other, WeightedList):
result = UnweightedList(self)
result.items |= set(other.items.keys())
return result
def __rand__(self, other):
result = NotImplemented
if isinstance(other, UnweightedList):
result = UnweightedList(self)
result.items &= other.items
elif isinstance(other, WeightedList):
result.items &= set(other.items.keys())
else:
raise TypeError
result = UnweightedList(self)
result.items &= set(other.items)
return result
def __rmul__(self, other):
result = NotImplemented
if isinstance(other, Translations):
result = UnweightedList()
result.items = set(
other.items.get(key, key)
for key in self.items
)
elif isinstance(other, UnweightedList):
result = WeightedList(self)
result.items = {key: 1.0 for key in self.items & other.items}
elif isinstance(other, WeightedList):
result = WeightedList()
result.items = {key: value for key, value in other.items.items() if key in self.items}
return result
def save(self, node_id):
# delete previous data
session.query(NodeNgram).filter(NodeNgram.node_id == node_id).delete()
session.commit()
# insert new data
bulk_insert(
NodeNgram,
('node_id', 'ngram_id', 'weight'),
((node_id, key, 1.0) for key in self.items)
)
class WeightedList:
class WeightedList(BaseClass):
def __init__(self, other=None):
if other is None:
self.items = defaultdict(float)
elif isinstance(other, int):
query = (session
.query(NodeNgram.ngram_id, NodeNgram.weight)
.filter(NodeNgram.node_id == other)
)
self.items = defaultdict(float, query)
elif isinstance(other, WeightedList):
self.items = other.items.copy()
elif isinstance(other, UnweightedList):
......@@ -144,7 +351,7 @@ class WeightedList:
for key in other.items:
self.items[key] = 1.0
elif hasattr(other, '__iter__'):
self.items = defaultdict(float, items)
self.items = defaultdict(float, other)
else:
raise TypeError
......@@ -152,60 +359,135 @@ class WeightedList:
for key, value in self.items.items():
yield key, value
def __add__(self, other):
"""Add elements from the other list to the current one
"""
result = self.__class__(self)
if isinstance(other, UnweightedList):
for key, value in other.items:
result.items[key] += 1.0
elif isinstance(other, WeightedList):
for key, value in other.items:
def __radd__(self, other):
result = NotImplemented
if isinstance(other, WeightedList):
result = WeightedList(self)
for key, value in other.items.items():
result.items[key] += value
else:
raise TypeError
elif isinstance(other, UnweightedList):
result = WeightedList(self)
for key in other.items:
result.items[key] += 1.0
return result
def __sub__(self, other):
def __rsub__(self, other):
"""Remove elements of the other list from the current one
"""
result = self.__class__(self)
result = NotImplemented
if isinstance(other, UnweightedList):
for key in other.items:
result.items.pop(key, None)
else:
raise TypeError
result = WeightedList()
result.items = {key: value for key, value in self.items.items() if key not in other.items}
elif isinstance(other, WeightedList):
result = WeightedList(self)
for key, value in other.items.items():
if key in result.items and result.items[key] == value:
result.items.pop(key)
else:
result.items[key] -= value
return result
def __and__(self, other):
def __ror__(self, other):
result = NotImplemented
if isinstance(other, UnweightedList):
result = defaultdict(float)
for key, value in self.items.items():
if item in other.items:
result[key] = value
else:
raise TypeError
result = UnweightedList(self)
result.items |= other.items
elif isinstance(other, WeightedList):
result = UnweightedList(self)
result.items |= set(other.items.keys())
return result
def __mul__(self, other):
if isinstance(other, Translations):
def __rmul__(self, other):
result = NotImplemented
if isinstance(other, WeightedList):
result = WeightedList()
result.items = {
key: value * other.items[key]
for key, value
in self.items.items()
if key in other.items
}
if isinstance(other, UnweightedList):
result = WeightedList()
for key, value in self.items:
result.items = {
key: value
for key, value
in self.items.items()
if key in other.items
}
elif isinstance(other, Translations):
result = WeightedList()
for key, value in self.items.items():
result.items[
other.items.get(key, key)
] += value
else:
raise TypeError
return result
def __rand__(self, other):
result = NotImplemented
if isinstance(other, UnweightedList):
result = UnweightedList(self)
result.items &= other.items
elif isinstance(other, WeightedList):
result = UnweightedList(self)
result.items &= set(other.items.keys())
return result
def save(self, node_id):
# delete previous data
session.query(NodeNgram).filter(NodeNgram.node_id == node_id).delete()
session.commit()
# insert new data
bulk_insert(
NodeNgram,
('node_id', 'ngram_id', 'weight'),
((node_id, key, value) for key, value in self.items.items())
)
def test():
from collections import OrderedDict
# define operands
operands = OrderedDict()
operands['wm'] = WeightedMatrix(((1, 2, .5), (1, 3, .75), (2, 3, .6), (3, 3, 1), ))
operands['ul'] = UnweightedList((1, 2, 3, 4, 5))
# operands['ul2'] = UnweightedList((1, 2, 3, 6))
# operands['ul2'].save(5)
# operands['ul3'] = UnweightedList(5)
operands['wl'] = WeightedList({1:.7, 2:.8, 7: 1.1})
# operands['wl1'].save(5)
# operands['wl2'] = WeightedList(5)
# operands['t1'] = Translations({1:2, 4:5})
operands['t'] = Translations({3:2, 4:5})
# operands['t2'].save(5)
# operands['t3'] = Translations(5)
# define operators
operators = OrderedDict()
operators['+'] = '__add__'
operators['-'] = '__sub__'
operators['*'] = '__mul__'
operators['|'] = '__or__'
operators['&'] = '__and__'
# show operands
for operand_name, operand in operands.items():
print('%4s = %s' % (operand_name, operand))
# show operations results
for operator_name, operator in operators.items():
print()
for operand1_name, operand1 in operands.items():
for operand2_name, operand2 in operands.items():
if hasattr(operand1, operator):
result = getattr(operand1, operator)(operand2)
else:
result = '?'
print('%4s %s %-4s = %s' % (
operand1_name,
operator_name,
operand2_name,
'?' if result == NotImplemented else result,
))
if __name__ == '__main__':
test()
# if __name__ == '__main__':
# l = Coocurrences()
# l = List()
# for i in l:
# print(i)
# t1 = Translations()
# t2 = Translations()
# t2.items = {1: 2}
# for i in t1 + t2:
# print(i)
/* app css stylesheet */
/*
* Class names corresponding to server-side list names
* To display another list name, please add a new class under this
*/
.MiamList {
color: black;
background-color: rgba(60, 118, 61, 0.5);
cursor: pointer;
}
.StopList {
color: black;
background-color: rgba(169, 68, 66, 0.2);
cursor: pointer;
}
.delete-keyword, .occurrences {
vertical-align: super;
font-size: 70%;
......@@ -27,27 +43,9 @@
border-bottom: none;
}
.miamword {
color: black;
background-color: rgba(60, 118, 61, 0.5);
cursor: pointer;
}
.stopword {
color: black;
background-color: rgba(169, 68, 66, 0.2);
cursor: pointer;
}
.global-stopword {
color: black;
background-color: rgba(169, 68, 66, 0.05);
cursor: pointer;
}
.main-panel, .text-panel, .words-panel {
height: 800px;
margin: 10px 0px;
margin: 10px 0;
}
.text-panel {
......@@ -59,11 +57,7 @@
height: 250px;
}
.keyword-container {
/*display: inline-block;*/
}
.keyword {
.keyword-text {
word-break: break-all;
}
......
(function () {
'use strict';
/*
* Django STATIC_URL given to angular to load async resources
*/
var S = window.STATIC_URL;
window.annotationsApp = angular.module('annotationsApp', ['annotationsAppHttp']);
/*
* Angular Templates must not conflict with Django's
*/
window.annotationsApp.config(function($interpolateProvider) {
$interpolateProvider.startSymbol('{[{');
$interpolateProvider.endSymbol('}]}');
});
/*
* Template of the ngram element displayed in the flat lists (called "extra-text")
*/
window.annotationsApp.directive('keywordTemplate', function () {
return {
templateUrl: function ($element, $attributes) {
......@@ -18,25 +27,36 @@
};
});
/*
* For ngram elements displayed in the flat lists (called "extra-text")
*/
window.annotationsApp.controller('ExtraAnnotationController',
['$scope', '$rootScope', '$element', 'NgramHttpService',
function ($scope, $rootScope, $element, NgramHttpService) {
// TODO use the tooltip ?
/*
* Click on the 'delete' cross button
*/
$scope.onDeleteClick = function () {
NgramHttpService.delete({
'listId': $scope.keyword.list_id,
'ngramId': $scope.keyword.uuid
}).$promise.then(function(data) {
}, function(data) {
$.each($rootScope.annotations, function(index, element) {
if (element.list_id == $scope.keyword.list_id && element.uuid == $scope.keyword.uuid) {
$rootScope.annotations.splice(index, 1);
return false;
}
});
}, function(data) {
console.log(data);
console.error("unable to delete the Ngram " + $scope.keyword.uuid);
});
}
};
}]);
/*
* For mouse selection on the text
*/
window.annotationsApp.controller('AnnotationController',
['$scope', '$rootScope', '$element',
function ($scope, $rootScope, $element) {
......@@ -57,14 +77,9 @@
};
}]);
window.annotationsApp.directive('selectionTemplate', function () {
return {
templateUrl: function ($element, $attributes) {
return S + 'annotations/selection_tpl.html';
}
};
});
/*
* Controller of the menu over the current mouse selection
*/
window.annotationsApp.controller('AnnotationMenuController',
['$scope', '$rootScope', '$element', '$timeout', 'NgramHttpService',
function ($scope, $rootScope, $element, $timeout, NgramHttpService) {
......@@ -87,9 +102,12 @@
}
return false;
}
// we only need one singleton at a time
var selection = getSelected();
/*
* When mouse selection is started, we highlight it
*/
function toggleSelectionHighlight(text) {
if (text.trim() !== "") {
$(".text-panel").addClass("selection");
......@@ -98,53 +116,68 @@
}
}
/*
* Dynamically construct the selection menu scope
*/
function toggleMenu(context, annotation) {
$timeout(function() {
$scope.$apply(function() {
var miamlist_id = _.invert($rootScope.activeLists).MiamList;
var stoplist_id = _.invert($rootScope.activeLists).StopList;
// variable used in onClick
$scope.selection_text = angular.copy(annotation);
if (angular.isObject(annotation)) {
$scope.level = angular.copy(annotation.level || 'global');
$scope.category = $rootScope.lists[annotation.list_id].toLowerCase();
$scope.listId = angular.copy(annotation.list_id);
// used in onClick
$scope.selection_text = angular.copy(annotation);
if ($scope.category == "miamlist") {
$scope.local_miamlist = false;
$scope.global_stoplist = true;
$scope.local_stoplist = true;
} else if ($scope.category == "stoplist") {
if ($scope.level == "local") {
$scope.local_stoplist = false;
$scope.global_stoplist = true;
// existing ngram
// Delete from the current list
$scope.menuItems = [
{
'action': 'delete',
'listId': annotation.list_id,
'verb': 'Delete from',
'listName': $rootScope.lists[annotation.list_id]
}
if ($scope.level == "global") {
$scope.global_stoplist = false;
$scope.local_stoplist = true;
}
$scope.local_miamlist = true;
];
if ($rootScope.lists[annotation.list_id] == "MiamList") {
// Add to the alternative list
$scope.menuItems.push({
'action': 'post',
'listId': stoplist_id,
'verb': 'Add to',
'listName': $rootScope.lists[stoplist_id]
});
} else if ($rootScope.lists[annotation.list_id] == "StopList") {
// Add to the alternative list
$scope.menuItems.push({
'action': 'post',
'listId': miamlist_id,
'verb': 'Add to',
'listName': $rootScope.lists[miamlist_id]
});
}
// show menu
// show the menu
$element.fadeIn(100);
}
else if (annotation.trim() !== "") {
$scope.selection_text = angular.copy(annotation);
$scope.level = "New Ngram from selection";
$scope.category = null;
$scope.local_miamlist = true;
$scope.local_stoplist = true;
$scope.global_stoplist = true;
// show menu
} else if (annotation.trim() !== "") {
// new ngram
$scope.menuItems = [
{
'action': 'post',
'listId': miamlist_id,
'verb': 'Add to',
'listName': $rootScope.activeLists[miamlist_id]
}
];
// show the menu
$element.fadeIn(100);
} else {
// close menu
$scope.menuItems = [];
// close the menu
$element.fadeOut(100);
}
});
});
}
var elt = $(".text-panel")[0];
var pos = $(".text-panel").position();
function positionElement(context, x, y) {
......@@ -157,18 +190,26 @@
positionElement(null, e.pageX, e.pageY);
}
// TODO is mousedown necessary ?
$(".text-panel").mousedown(function(){
$(".text-panel").mousemove(positionMenu);
/*
* Dynamically position the menu
*/
$(".text-container").mousedown(function(){
$(".text-container").mousemove(positionMenu);
});
$(".text-panel").mouseup(function(){
$(".text-panel").unbind("mousemove", positionMenu);
/*
* Finish positioning the menu then display the menu
*/
$(".text-container").mouseup(function(){
$(".text-container").unbind("mousemove", positionMenu);
toggleSelectionHighlight(selection.toString().trim());
toggleMenu(null, selection.toString().trim());
});
$(".text-panel").delegate(':not("#selection")', "click", function(e) {
/*
* Toggle the menu when clicking on an existing ngram keyword
*/
$(".text-container").delegate(':not("#selection")', "click", function(e) {
if ($(e.target).hasClass("keyword-inline")) return;
positionMenu(e);
toggleSelectionHighlight(selection.toString().trim());
......@@ -178,69 +219,94 @@
$rootScope.$on("positionAnnotationMenu", positionElement);
$rootScope.$on("toggleAnnotationMenu", toggleMenu);
$scope.onClick = function($event, action, listId, level) {
/*
* Menu click action
*/
$scope.onMenuClick = function($event, action, listId) {
if (angular.isObject($scope.selection_text)) {
// delete from the current list
// action on an existing Ngram
NgramHttpService[action]({
'listId': listId,
'ngramId': $scope.selection_text.uuid
}).$promise.then(function(data) {
}, function(data) {
$.each($rootScope.annotations, function(index, element) {
if (element.list_id == listId && element.uuid == $scope.selection_text.uuid) {
$rootScope.annotations.splice(index, 1);
return false;
}
});
});
}, function(data) {
console.log(data);
console.error("unable to edit the Ngram " + $scope.selection_text);
}
);
} else if ($scope.selection_text.trim() !== "") {
// new annotation from selection
NgramHttpService.post(
{
'listId': listId
'listId': listId,
'ngramId': 'new'
},
{'annotation' : {'text': $scope.selection_text.trim()}}
).$promise.then(function(data) {
$rootScope.annotations.push(data);
});
{
'annotation' : {'text': $scope.selection_text.trim()}
}, function(data) {
$rootScope.annotations.push(data);
}, function(data) {
console.log(data);
console.error("unable to edit the Ngram " + $scope.selection_text);
}
);
}
// hide selection highlighted text and the menu
// hide the highlighted text the the menu
$(".text-panel").removeClass("selection");
$element.fadeOut(100);
};
}
]);
/*
* Text highlighting controller
*/
window.annotationsApp.controller('IntraTextController',
['$scope', '$rootScope', '$compile', 'NgramHttpService',
function ($scope, $rootScope, $compile, NgramHttpService) {
$scope.extra_stoplist = [];
$scope.extra_miamlist = [];
$scope.currentStopPage = 0;
$scope.currentMiamPage = 0;
$scope.pageSize = 15;
var counter = 0;
/*
* Replace the text by and html template
* Replace the text by an html template for ngram keywords
*/
function replaceTextByTemplate(text, annotation, template, pattern, lists) {
function replaceTextByTemplate(text, ngram, template, pattern, lists) {
return text.replace(pattern, function(matched) {
var tpl = angular.element(template);
tpl.append(matched);
tpl.attr('title', annotation.tooltip_content);
tpl.attr('uuid', annotation.uuid);
if ('MiamList' == lists[annotation.list_id]) tpl.addClass("miamword");
if ('StopList' == lists[annotation.list_id]) tpl.addClass("stopword");
//if (annotation.category == 'stoplist' && annotation.level == 'global') tpl.addClass("global-stopword");
tpl.attr('title', ngram.tooltip_content);
tpl.attr('uuid', ngram.uuid);
/*
* Add CSS class depending on the list the ngram is into
* FIXME Lists names and css classes are fixed, can do better
*/
tpl.addClass(ngram.listName);
return tpl.get(0).outerHTML;
});
}
function compileText(annotations, fullText, abstractText, $rootScope) {
/*
* Sorts annotations on the number of words
* Required for overlapping ngrams
*/
function lengthSort(listitems, valuekey) {
listitems.sort(function(a, b) {
var compA = a[valuekey].split(" ").length;
var compB = b[valuekey].split(" ").length;
return (compA > compB) ? -1 : (compA <= compB) ? 1 : 0;
});
return listitems;
}
/*
* Match and replace Ngram into the text
*/
function compileNgramsHtml(annotations, textMapping, $rootScope) {
// TODO remove this debug counter
counter = 0;
var templateBegin = "<span ng-controller='AnnotationController' ng-click='onClick($event)' class='keyword-inline'>";
var templateBeginRegexp = "<span ng-controller='AnnotationController' ng-click='onClick\(\$event\)' class='keyword-inline'>";
......@@ -251,97 +317,96 @@
var startPattern = "\\b((?:"+templateBeginRegexp+")*";
var middlePattern = "(?:<\/span>)*\\s(?:"+templateBeginRegexp+")*";
var endPattern = "(?:<\/span>)*)\\b";
/*
* Sorts annotations on the number of words
*/
function lengthSort(listitems, valuekey) {
listitems.sort(function(a, b) {
var compA = a[valuekey].split(" ").length;
var compB = b[valuekey].split(" ").length;
return (compA > compB) ? -1 : (compA <= compB) ? 1 : 0;
});
return listitems;
}
var sortedSizeAnnotations = lengthSort(annotations, "text");
var extra_stoplist = [],
extra_miamlist = [];
var sortedSizeAnnotations = lengthSort(annotations, "text"),
extraNgramList = angular.copy($rootScope.extraNgramList);
_.each(sortedSizeAnnotations, function (annotation) {
// TODO better split to manage two-words with minus sign
annotation.category = $rootScope.lists[annotation.list_id].toLowerCase();
// reinitialize an empty list
extraNgramList = angular.forEach(extraNgramList, function(name, id) {
extraNgramList[id] = [];
});
angular.forEach(sortedSizeAnnotations, function (annotation) {
// exclude ngrams that are into inactive lists
if ($rootScope.activeLists[annotation.list_id] === undefined) return;
// used to setup css class
annotation.listName = $rootScope.lists[annotation.list_id];
// regexps
var words = annotation.text.split(" ");
var pattern = new RegExp(startPattern + words.join(middlePattern) + endPattern, 'gmi');
var textRegexp = new RegExp("\\b"+annotation.text+"\\b", 'igm');
var isDisplayedIntraText = false;
// highlight text as html
angular.forEach(textMapping, function(text, eltId) {
if (pattern.test(text) === true) {
textMapping[eltId] = replaceTextByTemplate(text, annotation, template, pattern, $rootScope.lists);
// TODO remove debug
counter++;
isDisplayedIntraText = true;
}
});
if (pattern.test(fullText) === true) {
fullText = replaceTextByTemplate(fullText, annotation, template, pattern, $rootScope.lists);
// TODO remove debug
counter++;
} else if (pattern.test(abstractText) === true) {
abstractText = replaceTextByTemplate(abstractText, annotation, template, pattern, $rootScope.lists);
counter++;
} else if (!textRegexp.test($rootScope.full_text) && !textRegexp.test($rootScope.abstract_text)) {
if (annotation.category == "stoplist") {
// Deactivated stoplist for the moment
// if ($.inArray(annotation.uuid, $scope.extra_stoplist.map(function (item) {
// return item.uuid;
// })) == -1) {
// extra_stoplist = lengthSort(extra_stoplist.concat(annotation), "text");
// }
} else if (annotation.category == "miamlist") {
if ($.inArray(annotation.uuid, $scope.extra_miamlist.map(function (item) {
return item.uuid;
})) == -1) {
extra_miamlist = lengthSort(extra_miamlist.concat(annotation), "text");
}
if (!isDisplayedIntraText) {
// add extra-text ngrams that are not already displayed
if ($.inArray(annotation.uuid, extraNgramList[annotation.list_id].map(function (item) {
return item.uuid;
})) == -1) {
// push the ngram and sort
extraNgramList[annotation.list_id] = extraNgramList[annotation.list_id].concat(annotation);
}
}
});
$scope.extra_stoplist = extra_stoplist;
$scope.extra_miamlist = extra_miamlist;
return {
'fullTextHtml': fullText,
'abstractTextHtml': abstractText
};
// update extraNgramList
$rootScope.extraNgramList = angular.forEach(extraNgramList, function(name, id) {
extraNgramList[id] = lengthSort(extraNgramList[id], 'text');
});
// return the object of element ID with the corresponding HTML
return textMapping;
}
/*
* Listen changes on the ngram data
*/
$rootScope.$watchCollection('annotations', function (newValue, oldValue) {
if ($rootScope.annotations === undefined) return;
if (angular.equals(newValue, oldValue)) return;
$rootScope.miamListId = _.invert($rootScope.lists)['MiamList'];
$rootScope.stopListId = _.invert($rootScope.lists)['StopList'];
// initialize extraNgramList
var extraNgramList = {};
$rootScope.extraNgramList = angular.forEach($rootScope.activeLists, function(name, id) {
this[id] = [];
}, extraNgramList);
$rootScope.extraNgramList = extraNgramList;
$scope.extra_stoplist = [];
$scope.extra_miamlist = [];
var result = compileText(
/*
* Transform text into HTML with higlighted ngrams
*/
var result = compileNgramsHtml(
$rootScope.annotations,
angular.copy($rootScope.full_text),
angular.copy($rootScope.abstract_text),
{
'#full-text': angular.copy($rootScope.full_text),
'#abstract-text': angular.copy($rootScope.abstract_text),
'#title': angular.copy($rootScope.title)
},
$rootScope
);
$.each($rootScope.annotations, function(index, element) {
if (element.list_id == $rootScope.stopListId) {
$scope.extra_stoplist.push(element);
} else if (element.list_id == $rootScope.miamListId) {
$scope.extra_miamlist.push(element);
}
// inject highlighted HTML
angular.forEach(result, function(html, eltId) {
angular.element(eltId).html(html);
});
angular.element('#full-text').html(result.fullTextHtml);
angular.element('#abstract-text').html(result.abstractTextHtml);
// inject one Angular controller on every highlighted text element
angular.element('.text-container').find('[ng-controller=AnnotationController]').each(function(idx, elt) {
angular.element(elt).replaceWith($compile(elt)($rootScope.$new(true)));
});
});
function submitNewAnnotation($event, inputEltId, listId) {
/*
* Add a new NGram from the user input in the extra-text list
*/
$scope.onListSubmit = function ($event, listId) {
var inputEltId = "#"+ listId +"-input";
if ($event.keyCode !== undefined && $event.keyCode != 13) return;
var value = $(inputEltId).val().trim();
if (value === "") return;
......@@ -350,47 +415,54 @@
'listId': listId,
'ngramId': 'new'
},
{'annotation' : {'text': value}},
{
'annotation' : {'text': value}
},
function(data) {
// on success
if (data) {
$rootScope.annotations.push(data);
$(inputEltId).val("");
}
});
}, function(data) {
// on error
$(inputEltId).parent().addClass("has-error");
console.error("error adding Ngram "+ value);
}
);
};
$(inputEltId).val("");
}
}
]);
$scope.onMiamlistSubmit = function ($event) {
submitNewAnnotation($event, "#miamlist-input", _.invert($rootScope.lists)['MiamList']);
};
// TODO refactor
$scope.onStoplistSubmit = function ($event) {
submitNewAnnotation($event, "#stoplist-input", _.invert($rootScope.lists)['MiamList']);
};
$scope.numStopPages = function () {
if ($scope.extra_stoplist === undefined) return 0;
return Math.ceil($scope.extra_stoplist.length / $scope.pageSize);
};
$scope.numMiamPages = function () {
if ($scope.extra_miamlist === undefined) return 0;
return Math.ceil($scope.extra_miamlist.length / $scope.pageSize);
};
$scope.nextMiamPage = function() {
$scope.currentMiamPage = $scope.currentMiamPage + 1;
};
$scope.previousMiamPage = function() {
$scope.currentMiamPage = $scope.currentMiamPage - 1;
/*
* Controller for one List Tab displaying extra-text ngram
*/
window.annotationsApp.controller('ExtraTextPaginationController',
['$scope', '$rootScope', function ($scope, $rootScope) {
$rootScope.$watchCollection('extraNgramList', function (newValue, oldValue) {
$scope.currentListPage = 0;
$scope.pageSize = 15;
$scope.nextListPage = function() {
$scope.currentListPage = $scope.currentListPage + 1;
};
$scope.nextStopPage = function() {
$scope.currentStopPage = $scope.currentStopPage + 1;
$scope.previousListPage = function() {
$scope.currentListPage = $scope.currentListPage - 1;
};
$scope.previousStopPage = function() {
$scope.currentStopPage = $scope.currentStopPage - 1;
$scope.totalListPages = function (listId) {
if ($rootScope.extraNgramList[listId] === undefined) return 0;
return Math.ceil($rootScope.extraNgramList[listId].length / $scope.pageSize);
};
}
]);
});
}]);
/*
* Filter used in Ngram flat lists pagination (extra-text panel)
*/
window.annotationsApp.filter('startFrom', function () {
return function (input, start) {
if (input === undefined) return;
......@@ -405,28 +477,32 @@
$rootScope.documentResource = DocumentHttpService.get(
{'docId': $rootScope.docId},
function(data, responseHeaders) {
$scope.title = data.title;
$scope.authors = data.authors;
$scope.journal = data.journal;
$scope.publication_date = data.publication_date;
// TODO this data have to be deleted
//$scope.current_page_number = data.current_page_number;
//$scope.last_page_number = data.last_page_number;
// put in rootScope because used by many components
$rootScope.title = data.title;
$rootScope.docId = data.id;
$rootScope.full_text = data.full_text;
$rootScope.abstract_text = data.abstract_text;
// GET the annotations
// TODO
// GET the annotationss
$rootScope.annotationsResource = NgramListHttpService.get(
{'corpusId': $rootScope.corpusId, 'docId': $rootScope.docId}
).$promise.then(function(data) {
$rootScope.annotations = data[$rootScope.corpusId.toString()][$rootScope.docId.toString()];
$rootScope.lists = data[$rootScope.corpusId.toString()]['lists'];
});
{
'corpusId': $rootScope.corpusId,
'docId': $rootScope.docId
},
function(data) {
$rootScope.annotations = data[$rootScope.corpusId.toString()][$rootScope.docId.toString()];
$rootScope.lists = data[$rootScope.corpusId.toString()].lists;
// TODO active list selection controller
$rootScope.activeLists = angular.copy($rootScope.lists);
$rootScope.mainListId = _.invert($rootScope.activeLists).MiamList;
}
);
});
// TODO setup pagination client-side
// TODO setup article pagination
$scope.onPreviousClick = function () {
DocumentHttpService.get($scope.docId - 1);
};
......@@ -435,8 +511,11 @@
};
}]);
/*
* Main function
* GET the document node and all its ngrams
*/
window.annotationsApp.run(function ($rootScope) {
/* GET the document node and all the annotations in the list associated */
var path = window.location.pathname.match(/\/project\/(.*)\/corpus\/(.*)\/document\/(.*)\//);
$rootScope.projectId = path[1];
$rootScope.corpusId = path[2];
......
......@@ -57,11 +57,11 @@
{
post: {
method: 'POST',
params: {'listId': '@listId', 'ngramId': ''}
params: {'listId': '@listId', 'ngramId': '@ngramId'}
},
delete: {
method: 'DELETE',
params: {'listId': '@listId', 'ngramId': '@id'}
params: {'listId': '@listId', 'ngramId': '@ngramId'}
}
}
);
......
<span ng-if="keyword.category == 'miamlist'" ng-click='onDeleteClick()' class="delete-keyword" data-keyword-id="{[{keyword.uuid}]}" data-keyword-text="{[{keyword.text}]}" data-keyword-category="miamlist">×</span>
<span ng-if="keyword.category == 'miamlist'" data-toggle="tooltip" class="keyword miamword">{[{keyword.text}]}</span>
<span ng-if="keyword.category == 'stoplist'" ng-click='onDeleteClick()' class="delete-keyword" data-keyword-id="{[{keyword.uuid}]}" data-keyword-text="{[{keyword.text}]}" data-keyword-category="stoplist">×</span>
<span ng-if="keyword.category == 'stoplist'" data-toggle="tooltip" class="keyword stopword">{[{keyword.text}]}</span>
<span ng-click='onDeleteClick()' class="delete-keyword">×</span>
<span data-toggle="tooltip" class="keyword-text {[{keyword.listName}]}">{[{keyword.text}]}</span>
<span class="occurrences" data-keyword-id="{[{keyword.uuid}]}">{[{keyword.occurrences}]}</span>
<ul class="noselection">
<li class="miamword" ng-if="local_miamlist === true" ng-click="onClick($event, 'post', miamListId, 'local')">add to miam-list</li>
<li class="miamword" ng-if="local_miamlist === false" ng-click="onClick($event, 'delete', miamListId, 'local')">remove from miam-list</li>
<li class="stopword" ng-if="local_stoplist === true" ng-click="onClick($event, 'post', stopListId, 'local')">add to stop-list</li>
<li class="stopword" ng-if="local_stoplist === false" ng-click="onClick($event, 'delete', stopListId, 'local')">remove from stop-list</li>
<!--<li class="stopword" ng-if="global_stoplist === true" ng-click="onClick($event, 'post', 'stoplist', 'global')">add to global stop-list</li>
<li class="stopword" ng-if="global_stoplist === false" ng-click="onClick($event, 'delete', 'stoplist', 'global')">remove from global stop-list</li>-->
</ul>
......@@ -9,8 +9,8 @@
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<title>Gargantext corpus annotations editor</title>
<meta name="description" content="">
<title>Gargantext article editor</title>
<meta name="description" content="Gargantext">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="{% static 'bower_components/bootstrap/dist/css/bootstrap.min.css' %}">
<link rel="stylesheet" href="{% static 'bower_components/angular/angular-csp.css' %}">
......@@ -18,51 +18,38 @@
<script src="{% static 'bower_components/jquery/dist/jquery.min.js' %}"></script>
</head>
<body>
<!-- TODO integrate this later into the corpus.html django template -->
<!-- TODO integrate this later into the any other django template -->
<div id="annotationsApp">
<div class="container-fluid">
<div class="row-fluid main-panel" ng-controller="IntraTextController">
<div class="col-md-4 col-xs-4 tabbable words-panel">
<ul class="nav nav-pills nav-justified">
<li class="active"><a href="#tab1" data-toggle="tab">Miamwords</a></li>
<li><a href="#tab2" data-toggle="tab">Stopwords</a></li>
<li ng-repeat="(listId, listName) in activeLists" ng-class="{active: $first == true}">
<a href="#tab-{[{listId}]}" data-toggle="tab">{[{listName}]}</a>
</li>
</ul>
<div class="tab-content">
<div class="tab-pane active" id="tab1">
<div ng-if="extra_miamlist.length == 0" class="alert alert-info" role="alert">No extra text miam-word yet</div>
<ul class="list-group words-list">
<li ng-repeat="keyword in extra_miamlist | startFrom:currentMiamPage*pageSize | limitTo:pageSize" class="list-group-item">
<div ng-controller="ExtraAnnotationController" keyword-template class="keyword-container"></div>
</li>
</ul>
<nav ng-class="{invisible: numMiamPages() - 1 == 0}" class="clearfix">
<ul class="pagination pagination-s pull-right words-pagination">
<li ng-class="{disabled: currentMiamPage == 0}"><a ng-click="previousMiamPage()" class="glyphicon glyphicon-backward"></a></li>
<li ng-class="{disabled: currentMiamPage >= numMiamPages()-1}"><a ng-click="nextMiamPage()" class="glyphicon glyphicon-forward"></a></li>
</ul>
</nav>
<div class="form-group">
<input type="text" class="form-control" id="miamlist-input" ng-keypress="onMiamlistSubmit($event)">
<button type="submit" class="btn btn-default btn-primary" ng-click="onMiamlistSubmit($event)">Add</button>
<div ng-controller="ExtraTextPaginationController" ng-repeat="(listId, listName) in activeLists" ng-class="{active: $first == true}" class="tab-pane" id="tab-{[{listId}]}">
<div ng-if="extraNgramList[listId].length == 0" class="alert alert-info" role="alert">
Input any keyword you want to link to this article and the list named '{[{listName}]}'
</div>
</div>
<div class="tab-pane" id="tab2">
<div ng-if="extra_stoplist.length == 0" class="alert alert-info" role="alert">No extra text stop-word yet</div>
<ul class="list-group words-list">
<li ng-repeat="keyword in extra_stoplist | startFrom:currentStopPage*pageSize | limitTo:pageSize" class="list-group-item">
<ul class="list-group words-list clearfix">
<li ng-repeat="keyword in extraNgramList[listId] | startFrom:currentListPage * pageSize | limitTo:pageSize" class="list-group-item">
<div ng-controller="ExtraAnnotationController" keyword-template class="keyword-container"></div>
</li>
</ul>
<nav ng-class="{invisible: numStopPages() - 1 == 0}" class="clearfix">
<nav ng-class="{invisible: totalListPages - 1 == 0}" class="clearfix">
<ul class="pagination pagination-s pull-right words-pagination">
<li ng-class="{disabled: currentStopPage == 0}"><a ng-click="previousMiamPage()" class="glyphicon glyphicon-backward"></a></li>
<li ng-class="{disabled: currentStopPage >= numStopPages()-1}"><a ng-click="nextStopPage()" class="glyphicon glyphicon-forward"></a></li>
<li ng-class="{disabled: currentListPage == 0}"><a ng-click="previousListPage()" class="glyphicon glyphicon-backward"></a></li>
<li ng-class="{disabled: currentListPage >= totalListPages - 1}"><a ng-click="nextListPage()" class="glyphicon glyphicon-forward"></a></li>
</ul>
</nav>
<div class="form-group">
<input type="text" class="form-control" id="stoplist-input" ng-keypress="onStoplistSubmit($event)">
<button type="submit" class="btn btn-default btn-primary" ng-click="onStoplistSubmit($event)">Add</button>
<input type="text" class="form-control" id="{[{ listId }]}-input" ng-keypress="onListSubmit($event, listId)">
<button type="submit" class="form-control btn btn-default btn-primary" ng-click="onListSubmit($event, listId)">Add to {[{listName}]}</button>
</div>
</div>
</div>
......@@ -70,7 +57,7 @@
<div class="col-md-8 col-xs-8 text-panel" ng-controller="DocController" id="document">
<div class="row-fluid clearfix">
<div class="col-md-7 col-xs-7">
<h3>{[{title}]}</h3>
<h3 class="text-container" id="title">{[{title}]}</h3>
</div>
<div class="col-md-5 col-xs-5">
<nav>
......@@ -90,17 +77,21 @@
</div>
<h4 ng-if="abstract_text != null">Abstract</h4>
<p id="abstract-text" class="text-container">
<div ng-if="abstract_text == null" class="alert alert-info" role="alert">No abstract text</div>
<div ng-if="abstract_text == null" class="alert alert-info" role="alert">Empty abstract text</div>
</p>
<h4 ng-if="full_text != null">Full Article</h4>
<p id="full-text" class="text-container">
<div ng-if="full_text == null" class="alert alert-info" role="alert">No full text</div>
<div ng-if="full_text == null" class="alert alert-info" role="alert">Empty full text</div>
</p>
</div>
</div> <!-- end of the main row -->
</div>
<!-- this menu is over the text -->
<div ng-controller="AnnotationMenuController" id="selection" class="selection-menu" selection-template></div>
<!-- this menu is over the text on mouse selection -->
<div ng-controller="AnnotationMenuController" id="selection" class="selection-menu">
<ul class="noselection">
<li ng-repeat="item in menuItems" class="{[{item.listName}]}" ng-click="onMenuClick($event, item.action, item.listId)">{[{item.verb}]} {[{item.listName}]}</li>
</ul>
</div>
</div>
<!--[if lt IE 7]>
<p class="browsehappy">You are using an <strong>outdated</strong> browser. Please <a href="http://browsehappy.com/">upgrade your browser</a> to improve your experience.</p>
......
......@@ -40,15 +40,16 @@ class NgramList(APIView):
"""Get All for a doc id"""
corpus_id = int(corpus_id)
doc_id = int(doc_id)
lists = dict()
lists = {}
for list_type in ['MiamList', 'StopList']:
list_id = list()
list_id = listIds(user_id=request.user.id, corpus_id=int(corpus_id), typeList=list_type)
lists["%s" % list_id[0][0]] = list_type
# ngrams of list_id of corpus_id:
doc_ngram_list = listNgramIds(corpus_id=corpus_id, doc_id=doc_id, user_id=request.user.id)
#doc_ngram_list = [(1, 'miam', 2, 1931), (2, 'stop', 2, 1932), (3, 'Potassium channels', 4, 1931)]
doc_ngram_list = [(i, 'miam', i, 1931) for i in range(500)]
doc_ngram_list += [(i, 'stop', i, 1932) for i in range(501, 600)]
# doc_ngram_list = [(1, 'miam', 2, 1931), (2, 'stop', 2, 1932), (3, 'Potassium channels', 4, 1931)]
data = { '%s' % corpus_id : {
'%s' % doc_id : [
......
......@@ -7,17 +7,29 @@ from sqlalchemy import text, distinct, or_
from sqlalchemy.sql import func
from sqlalchemy.orm import aliased
import datetime
import copy
from gargantext_web.views import move_to_trash
from gargantext_web.db import *
from gargantext_web.validation import validate, ValidationException
from node import models
def DebugHttpResponse(data):
return HttpResponse('<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), ))
import json
class JSONEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()[:19] + 'Z'
else:
return super(self.__class__, self).default(obj)
json_encoder = JSONEncoder(indent=4)
def JsonHttpResponse(data, status=200):
return HttpResponse(
content = json.dumps(data, indent=4),
content = json_encoder.encode(data),
content_type = 'application/json; charset=utf-8',
status = status
)
......@@ -54,7 +66,7 @@ class APIException(_APIException):
self.detail = message
_operators = {
_operators_dict = {
"=": lambda field, value: (field == value),
"!=": lambda field, value: (field != value),
"<": lambda field, value: (field < value),
......@@ -65,6 +77,14 @@ _operators = {
"contains": lambda field, value: (field.contains(value)),
"startswith": lambda field, value: (field.startswith(value)),
}
_hyperdata_list = [
hyperdata
for hyperdata in session.query(Hyperdata).order_by(Hyperdata.name)
]
_hyperdata_dict = {
hyperdata.name: hyperdata
for hyperdata in _hyperdata_list
}
from rest_framework.decorators import api_view
......@@ -75,6 +95,7 @@ def Root(request, format=None):
'snippets': reverse('snippet-list', request=request, format=format)
})
class NodesChildrenNgrams(APIView):
def get(self, request, node_id):
......@@ -117,6 +138,7 @@ class NodesChildrenNgrams(APIView):
],
})
class NodesChildrenDuplicates(APIView):
def _fetch_duplicates(self, request, node_id, extra_columns=None, min_count=1):
......@@ -205,6 +227,7 @@ class NodesChildrenDuplicates(APIView):
'deleted': count
})
class NodesChildrenMetatadata(APIView):
def get(self, request, node_id):
......@@ -264,49 +287,98 @@ class NodesChildrenMetatadata(APIView):
'data': collection,
})
class NodesChildrenQueries(APIView):
def _parse_filter(self, filter):
# validate filter keys
filter_keys = {'field', 'operator', 'value'}
if set(filter) != filter_keys:
raise APIException('Every filter should have exactly %d keys: "%s"'% (len(filter_keys), '", "'.join(filter_keys)), 400)
field, operator, value = filter['field'], filter['operator'], filter['value']
# validate operator
if operator not in _operators:
raise APIException('Invalid operator: "%s"'% (operator, ), 400)
# validate value, depending on the operator
if operator == 'in':
if not isinstance(value, list):
raise APIException('Parameter "value" should be an array when using operator "%s"'% (operator, ), 400)
for v in value:
if not isinstance(v, (int, float, str)):
raise APIException('Parameter "value" should be an array of numbers or strings when using operator "%s"'% (operator, ), 400)
else:
if not isinstance(value, (int, float, str)):
raise APIException('Parameter "value" should be a number or string when using operator "%s"'% (operator, ), 400)
# parse field
field_objects = {
'hyperdata': None,
'ngrams': ['terms', 'n'],
}
field = field.split('.')
if len(field) < 2 or field[0] not in field_objects:
raise APIException('Parameter "field" should be a in the form "object.key", where "object" takes one of the following values: "%s". "%s" was found instead' % ('", "'.join(field_objects), '.'.join(field)), 400)
if field_objects[field[0]] is not None and field[1] not in field_objects[field[0]]:
raise APIException('Invalid key for "%s" in parameter "field", should be one of the following values: "%s". "%s" was found instead' % (field[0], '", "'.join(field_objects[field[0]]), field[1]), 400)
# return value
return field, _operators[operator], value
def _count_documents(self, query):
return {
'fields': []
}
def _sql(self, input, node_id):
fields = dict()
tables = set('nodes')
hyperdata_aliases = dict()
# retrieve all unique fields names
fields_names = input['retrieve']['fields'].copy()
fields_names += [filter['field'] for filter in input['filters']]
fields_names += input['sort']
fields_names = set(fields_names)
# relate fields to their respective ORM counterparts
for field_name in fields_names:
field_name_parts = field_name.split('.')
field = None
if len(field_name_parts) == 1:
field = getattr(Node, field_name)
elif field_name_parts[0] == 'ngrams':
field = getattr(Ngram, field_name)
tables.add('ngrams')
elif field_name_parts[1] == 'count':
if field_name_parts[0] == 'nodes':
field = func.count(Node.id)
elif field_name_parts[0] == 'ngrams':
field = func.count(Ngram.id)
tables.add('ngrams')
elif field_name_parts[0] == 'hyperdata':
hyperdata = _hyperdata_dict[field_name_parts[1]]
if hyperdata not in hyperdata_aliases:
hyperdata_aliases[hyperdata] = aliased(Node_Hyperdata)
hyperdata_alias = hyperdata_aliases[hyperdata]
field = getattr(hyperdata_alias, 'value_%s' % hyperdata.type)
if len(field_name_parts) == 3:
field = func.date_trunc(field_name_parts[2], field)
fields[field_name] = field
# build query: selected fields
query = (session
.query(*(fields[field_name] for field_name in input['retrieve']['fields']))
)
# build query: selected tables
query = query.select_from(Node)
if 'ngrams' in tables:
query = (query
.join(Node_Ngram, Node_Ngram.node_id == Node.id)
.join(Ngram, Ngram.id == Node_Ngram.ngram_id)
)
for hyperdata, hyperdata_alias in hyperdata_aliases.items():
query = (query
.join(hyperdata_alias, hyperdata_alias.node_id == Node.id)
.filter(hyperdata_alias.hyperdata_id == hyperdata.id)
)
# build query: filtering
query = (query
.filter(Node.parent_id == node_id)
)
for filter in input['filters']:
query = (query
.filter(_operators_dict[filter['operator']](
fields[filter['field']],
filter['value']
))
)
# build query: aggregations
if input['retrieve']['aggregate']:
for field_name in input['retrieve']['fields']:
if not field_name.endswith('.count'):
query = query.group_by(fields[field_name])
# build query: sorting
for field_name in input['sort']:
last = field_name[-1:]
if last in ('+', '-'):
field_name = field_name[:-1]
if last == '-':
query = query.order_by(fields[field_name].desc())
else:
query = query.order_by(fields[field_name])
# build and return result
output = copy.deepcopy(input)
output['pagination']['total'] = query.count()
output['results'] = list(
query[input['pagination']['offset']:input['pagination']['offset']+input['pagination']['limit']]
if input['pagination']['limit']
else query[input['pagination']['offset']:]
)
return output
def _haskell(self, input, node_id):
output = copy.deepcopy(input)
output['pagination']['total'] = 0
output['results'] = list()
return output
def post(self, request, node_id):
""" Query the children of the given node.
......@@ -348,199 +420,53 @@ class NodesChildrenQueries(APIView):
}
"""
hyperdata_aliases = {}
# validate query
query_fields = {'pagination', 'retrieve', 'sort', 'filters'}
for key in request.DATA:
if key not in query_fields:
raise APIException('Unrecognized field "%s" in query object. Accepted fields are: "%s"' % (key, '", "'.join(query_fields)), 400)
# selecting info
if 'retrieve' not in request.DATA:
raise APIException('The query should have a "retrieve" parameter.', 400)
retrieve = request.DATA['retrieve']
retrieve_types = {'fields', 'aggregates'}
if 'type' not in retrieve:
raise APIException('In the query\'s "retrieve" parameter, a "type" should be specified. Possible values are: "%s".' % ('", "'.join(retrieve_types), ), 400)
if 'list' not in retrieve or not isinstance(retrieve['list'], list):
raise APIException('In the query\'s "retrieve" parameter, a "list" should be provided as an array', 400)
if retrieve['type'] not in retrieve_types:
raise APIException('Unrecognized "type": "%s" in the query\'s "retrieve" parameter. Possible values are: "%s".' % (retrieve['type'], '", "'.join(retrieve_types), ), 400)
if retrieve['type'] == 'fields':
fields_names = ['id'] + retrieve['list'] if 'id' not in retrieve['list'] else retrieve['list']
elif retrieve['type'] == 'aggregates':
fields_names = list(retrieve['list'])
fields_list = []
for field_name in fields_names:
split_field_name = field_name.split('.')
if split_field_name[0] == 'hyperdata':
hyperdata = session.query(Hyperdata).filter(Hyperdata.name == split_field_name[1]).first()
if hyperdata is None:
hyperdata_query = session.query(Hyperdata.name).order_by(Hyperdata.name)
hyperdata_names = [hyperdata.name for hyperdata in hyperdata_query.all()]
raise APIException('Invalid key for "%s" in parameter "field", should be one of the following values: "%s". "%s" was found instead' % (field[0], '", "'.join(hyperdata_names), field[1]), 400)
# check or create Node_Hyperdata alias; join if necessary
if hyperdata.id in hyperdata_aliases:
hyperdata_alias = hyperdata_aliases[hyperdata.id]
else:
hyperdata_alias = hyperdata_aliases[hyperdata.id] = aliased(Node_Hyperdata)
field = getattr(hyperdata_alias, 'value_' + hyperdata.type)
# operation on field
if len(split_field_name) > 2:
# datetime truncation
if hyperdata.type == 'datetime':
datepart = split_field_name[2]
accepted_dateparts = ['year', 'month', 'day', 'hour', 'minute']
if datepart not in accepted_dateparts:
raise APIException('Invalid date truncation for "%s": "%s". Accepted values are: "%s".' % (split_field_name[1], split_field_name[2], '", "'.join(accepted_dateparts), ), 400)
# field = extract(datepart, field)
field = func.date_trunc(datepart, field)
# field = func.date_trunc(text('"%s"'% (datepart,)), field)
else:
authorized_field_names = {'id', 'name', }
authorized_aggregates = {
'nodes.count': func.count(Node.id),
'ngrams.count': func.count(Ngram.id),
}
if retrieve['type'] == 'aggregates' and field_name in authorized_aggregates:
field = authorized_aggregates[field_name]
elif field_name in authorized_field_names:
field = getattr(Node, field_name)
else:
raise APIException('Unrecognized "field": "%s" in the query\'s "retrieve" parameter. Possible values are: "%s".' % (field_name, '", "'.join(authorized_field_names), ))
fields_list.append(
field.label(
field_name if '.' in field_name else 'node.' + field_name
)
)
# starting the query!
document_type_id = cache.NodeType['Document'].id ##session.query(NodeType.id).filter(NodeType.name == 'Document').scalar()
query = (session
.query(*fields_list)
.select_from(Node)
.filter(Node.type_id == document_type_id)
.filter(Node.parent_id == node_id)
)
# join ngrams if necessary
if 'ngrams.count' in fields_names:
query = (query
.join(Node_Ngram, Node_Ngram.node_id == Node.id)
.join(Ngram, Ngram.id == Node_Ngram.ngram_id)
)
# join hyperdata aliases
for hyperdata_id, hyperdata_alias in hyperdata_aliases.items():
query = (query
.join(hyperdata_alias, hyperdata_alias.node_id == Node.id)
.filter(hyperdata_alias.hyperdata_id == hyperdata_id)
)
# filtering
for filter in request.DATA.get('filters', []):
# parameters extraction & validation
field, operator, value = self._parse_filter(filter)
#
if field[0] == 'hyperdata':
# which hyperdata?
hyperdata = session.query(Hyperdata).filter(Hyperdata.name == field[1]).first()
if hyperdata is None:
hyperdata_query = session.query(Hyperdata.name).order_by(Hyperdata.name)
hyperdata_names = [hyperdata.name for hyperdata in hyperdata_query.all()]
raise APIException('Invalid key for "%s" in parameter "field", should be one of the following values: "%s". "%s" was found instead' % (field[0], '", "'.join(hyperdata_names), field[1]), 400)
# check or create Node_Hyperdata alias; join if necessary
if hyperdata.id in hyperdata_aliases:
hyperdata_alias = hyperdata_aliases[hyperdata.id]
else:
hyperdata_alias = hyperdata_aliases[hyperdata.id] = aliased(Node_Hyperdata)
query = (query
.join(hyperdata_alias, hyperdata_alias.node_id == Node.id)
.filter(hyperdata_alias.hyperdata_id == hyperdata.id)
)
# adjust date
if hyperdata.type == 'datetime':
value = value + '2000-01-01T00:00:00Z'[len(value):]
# filter query
query = query.filter(operator(
getattr(hyperdata_alias, 'value_' + hyperdata.type),
value
))
elif field[0] == 'ngrams':
query = query.filter(
Node.id.in_(session
.query(Node_Ngram.node_id)
.join(Ngram, Ngram.id == Node_Ngram.ngram_id)
.filter(operator(
getattr(Ngram, field[1]),
map(lambda x: x.replace('-', ' '), value)
))
)
)
# authorized field names
sql_fields = set({
'id', 'name',
'nodes.count', 'ngrams.count',
'ngrams.terms', 'ngrams.n',
})
for hyperdata in _hyperdata_list:
sql_fields.add('hyperdata.' + hyperdata.name)
if hyperdata.type == 'datetime':
for part in ['year', 'month', 'day', 'hour', 'minute']:
sql_fields.add('hyperdata.' + hyperdata.name + '.' + part)
# authorized field names: Haskell
haskell_fields = set({
'haskell.test',
})
# TODO: date_trunc (psql) -> index also
# authorized field names: all of them
authorized_fields = sql_fields | haskell_fields
# groupping
for field_name in fields_names:
if field_name not in authorized_aggregates:
# query = query.group_by(text(field_name))
query = query.group_by('"%s"' % (
field_name if '.' in field_name else 'node.' + field_name
, ))
# sorting
sort_fields_names = request.DATA.get('sort', ['id'])
if not isinstance(sort_fields_names, list):
raise APIException('The query\'s "sort" parameter should be an array', 400)
sort_fields_list = []
for sort_field_name in sort_fields_names:
try:
desc = sort_field_name[0] == '-'
if sort_field_name[0] in {'-', '+'}:
sort_field_name = sort_field_name[1:]
field = fields_list[fields_names.index(sort_field_name)]
if desc:
field = field.desc()
sort_fields_list.append(field)
except:
raise APIException('Unrecognized field "%s" in the query\'s "sort" parameter. Accepted values are: "%s"' % (sort_field_name, '", "'.join(fields_names)), 400)
query = query.order_by(*sort_fields_list)
# input validation
input = validate(request.DATA, {'type': dict, 'items': {
'pagination': {'type': dict, 'items': {
'limit': {'type': int, 'default': 0},
'offset': {'type': int, 'default': 0},
}, 'default': {'limit': 0, 'offset': 0}},
'filters': {'type': list, 'items': {'type': dict, 'items': {
'field': {'type': str, 'required': True, 'range': authorized_fields},
'operator': {'type': str, 'required': True, 'range': list(_operators_dict.keys())},
'value': {'required': True},
}}, 'default': list()},
'retrieve': {'type': dict, 'required': True, 'items': {
'aggregate': {'type': bool, 'default': False},
'fields': {'type': list, 'items': {'type': str, 'range': authorized_fields}, 'range': (1, )},
}},
'sort': {'type': list, 'items': {'type': str}, 'default': list()},
}})
# return result, depending on the queried fields
if set(input['retrieve']['fields']) <= sql_fields:
method = self._sql
elif set(input['retrieve']['fields']) <= haskell_fields:
method = self._haskell
else:
raise ValidationException('queried fields are mixing incompatible types of fields')
return JsonHttpResponse(method(input, node_id), 201)
# pagination
pagination = request.DATA.get('pagination', {})
for key, value in pagination.items():
if key not in {'limit', 'offset'}:
raise APIException('Unrecognized parameter in "pagination": "%s"' % (key, ), 400)
if not isinstance(value, int):
raise APIException('In "pagination", "%s" should be an integer.' % (key, ), 400)
if 'offset' not in pagination:
pagination['offset'] = 0
if 'limit' not in pagination:
pagination['limit'] = 0
# respond to client!
# return DebugHttpResponse(str(query))
# return DebugHttpResponse(literalquery(query))
results = [
list(row)
# dict(zip(fields_names, row))
for row in (
query[pagination["offset"]:pagination["offset"]+pagination["limit"]]
if pagination['limit']
else query[pagination["offset"]:]
)
]
pagination["total"] = query.count()
return Response({
"pagination": pagination,
"retrieve": fields_names,
"sorted": sort_fields_names,
"results": results,
}, 201)
class NodesList(APIView):
authentication_classes = (SessionAuthentication, BasicAuthentication)
......@@ -598,6 +524,7 @@ class Nodes(APIView):
except Exception as error:
msgres ="error deleting : " + node_id + str(error)
class CorpusController:
@classmethod
......
from rest_framework.exceptions import APIException
from datetime import datetime
__all__ = ['validate']
_types_names = {
bool: 'boolean',
int: 'integer',
float: 'float',
str: 'string',
dict: 'object',
list: 'array',
datetime: 'datetime',
}
class ValidationException(APIException):
status_code = 400
default_detail = 'Bad request!'
def validate(value, expected, path='input'):
# Is the expected type respected?
if 'type' in expected:
expected_type = expected['type']
if not isinstance(value, expected_type):
if expected_type in (bool, int, float, str, datetime, ):
try:
if expected_type == bool:
value = value not in {0, 0.0, '', '0', 'false'}
elif expected_type == datetime:
value = value + '2000-01-01T00:00:00Z'[len(value):]
value = datetime.strptime(value, '%Y-%m-%dT%H:%M:%SZ')
else:
value = expected_type(value)
except ValueError:
raise ValidationException('%s should be a JSON %s, but could not be parsed as such' % (path, _types_names[expected_type], ))
else:
raise ValidationException('%s should be a JSON %s' % (path, _types_names[expected_type], ))
else:
expected_type = type(value)
# Is the value in the expected range?
if 'range' in expected:
expected_range = expected['range']
if isinstance(expected_range, tuple):
if expected_type in (int, float):
tested_value = value
tested_name = 'value'
elif expected_type in (str, list):
tested_value = len(value)
tested_name = 'length'
if tested_value < expected_range[0]:
raise ValidationException('%s should have a minimum %s of %d' % (path, tested_name, expected_range[0], ))
if len(expected_range) > 1 and tested_value > expected_range[1]:
raise ValidationException('%s should have a maximum %s of %d' % (path, tested_name, expected_range[1], ))
elif isinstance(expected_range, (list, set, dict, )) and value not in expected_range:
expected_values = expected_range if isinstance(expected_range, list) else expected_range.keys()
expected_values = [str(value) for value in expected_values if isinstance(value, expected_type)]
if len(expected_values) < 16:
expected_values_str = '", "'.join(expected_values)
expected_values_str = '"' + expected_values_str + '"'
else:
expected_values_str = '", "'.join(expected_values[:16])
expected_values_str = '"' + expected_values_str + '"...'
raise ValidationException('%s should take one of the following values: %s' % (path, expected_values_str, ))
# Do we have to translate through a dictionary?
if 'translate' in expected:
translate = expected['translate']
if callable(translate):
value = translate(value)
if value is None and expected.get('required', False):
raise ValidationException('%s has been given an invalid value' % (path, ))
return value
try:
value = expected['translate'][value]
except KeyError:
if expected.get('translate_fallback_keep', False):
return value
if expected.get('required', False):
raise ValidationException('%s has been given an invalid value' % (path, ))
else:
return expected.get('default', value)
# Are we handling an iterable?
if expected_type in (list, dict):
if 'items' in expected:
expected_items = expected['items']
if expected_type == list:
for i, element in enumerate(value):
value[i] = validate(element, expected_items, '%s[%d]' % (path, i, ))
elif expected_type == dict:
if expected_items:
for key in value:
if key not in expected_items:
raise ValidationException('%s should not have a "%s" key.' % (path, key, ))
for expected_key, expected_value in expected_items.items():
if expected_key in value:
value[expected_key] = validate(value[expected_key], expected_value, '%s["%s"]' % (path, expected_key, ))
elif 'required' in expected_value and expected_value['required']:
raise ValidationException('%s should have a "%s" key.' % (path, expected_key, ))
elif 'default' in expected_value:
value[expected_key] = expected_value['default']
# Let's return the proper value!
return value
......@@ -259,11 +259,21 @@ gargantext.controller("DatasetController", function($scope, $http) {
$scope.corpora = [];
$http.get('/api/nodes?type=Project', {cache: true}).success(function(response){
$scope.projects = response.data;
// Initially set to what is indicated in the URL
if (/^\/project\/\d+\/corpus\/\d+/.test(location.pathname)) {
$scope.projectId = parseInt(location.pathname.split('/')[2]);
$scope.updateCorpora();
}
});
// update corpora according to the select parent project
$scope.updateCorpora = function() {
$http.get('/api/nodes?type=Corpus&parent=' + $scope.projectId, {cache: true}).success(function(response){
$scope.corpora = response.data;
// Initially set to what is indicated in the URL
if (/^\/project\/\d+\/corpus\/\d+/.test(location.pathname)) {
$scope.corpusId = parseInt(location.pathname.split('/')[4]);
$scope.updateEntities();
}
});
};
// update entities depending on the selected corpus
......@@ -522,8 +532,8 @@ gargantext.controller("GraphController", function($scope, $http, $element) {
filters: query.filters,
sort: ['hyperdata.publication_date.day'],
retrieve: {
type: 'aggregates',
list: ['hyperdata.publication_date.day', query.mesured]
aggregate: true,
fields: ['hyperdata.publication_date.day', query.mesured]
}
};
// request to the server
......@@ -588,4 +598,4 @@ setTimeout(function(){
// // $('button.refresh').first().click();
}, 500);
}, 250);
*/
\ No newline at end of file
*/
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment