Commit 08539c33 authored by PkSM3's avatar PkSM3

Merge branch 'unstable' of ssh://delanoe.org:1979/gargantext into samuel

parents 08dc8379 addc6e19
from collections import defaultdict
class Translations:
def __init__(self, other=None):
if other is None:
self.items = defaultdict(int)
self.groups = defaultdict(set)
elif isinstance(other, Translations):
self.items = other.items.copy()
self.groups = other.groups.copy()
elif hasattr(other, '__iter__'):
self.items = defaultdict(int, other)
self.groups = defaultdict(set)
for key, value in self.items.items():
self.groups[value].add(key)
else:
raise TypeError
def __add__(self, other):
result = self.__class__(self)
result.items.update(other)
for key, value in other.groups:
result.groups[key] += value
return result
def __sub__(self, other):
result = self.__class__(self)
if isinstance(other, Translations):
for key, value in other.items.items():
result.items.pop(key, None)
result.groups[value].remove(key)
if len(result.groups[value]) == 0:
result.groups.pop(value)
return result
def __iter__(self):
for key, value in self.items.items():
yield key, value
class WeightedMatrix:
def __init__(self, other=None):
if other is None:
self.items = defaultdict(lambda: defaultdict(float))
elif isinstance(other, WeightedMatrix):
self.items = other.items.copy()
elif hasattr(other, '__iter__'):
self.items = defaultdict(lambda: defaultdict(float))
for row in other:
self.items[other[0]][other[1]] = [other[2]]
else:
raise TypeError
def __iter__(self):
for key1, key2_value in self.items.items():
for key2, value in key2_value.items():
yield key1, key2, value
def __sub__(self, other):
"""Remove elements of the other list from the current one
Can only be substracted to another list of coocurrences.
"""
pass
def __mul__(self, other):
if isinstance(other, Translations):
result = WeightedMatrix()
for key1, key2_value in self.items.items():
for key2, value in self.items:
result.items[
other.items.get(key, key)
] = value
else:
raise TypeError
return result
class UnweightedList:
def __init__(self, other=None):
if other is None:
self.items = set()
elif isinstance(other, WeightedList):
self.items = set(other.items.keys())
elif isinstance(other, UnweightedList):
self.items = other.items.copy()
elif hasattr(other, '__iter__'):
items = (item for item in other)
if len(items) == 0:
self.items = set()
else:
if hasattr(items[0], '__iter__'):
self.items = set(item[0] for item in items)
else:
self.items = set(item for item in items)
else:
raise TypeError
def __add__(self, other):
result = self.__class__(self)
if isinstance(other, UnweightedList):
result.items |= other.items
elif isinstance(other, WeightedList):
result.items |= set(other.items.keys())
else:
raise TypeError
return result
__or__ = __add__
def __sub__(self, other):
result = self.__class__(self)
if isinstance(other, UnweightedList):
result.items -= other.items
elif isinstance(other, WeightedList):
result.items -= set(other.items.keys())
else:
raise TypeError
return result
def __and__(self, other):
result = self.__class__(self)
if isinstance(other, UnweightedList):
result.items &= other.items
elif isinstance(other, WeightedList):
result.items &= set(other.items.keys())
else:
raise TypeError
return result
class WeightedList:
def __init__(self, other=None):
if other is None:
self.items = defaultdict(float)
elif isinstance(other, WeightedList):
self.items = other.items.copy()
elif isinstance(other, UnweightedList):
self.items = defaultdict(float)
for key in other.items:
self.items[key] = 1.0
elif hasattr(other, '__iter__'):
self.items = defaultdict(float, items)
else:
raise TypeError
def __iter__(self):
for key, value in self.items.items():
yield key, value
def __add__(self, other):
"""Add elements from the other list to the current one
"""
result = self.__class__(self)
if isinstance(other, UnweightedList):
for key, value in other.items:
result.items[key] += 1.0
elif isinstance(other, WeightedList):
for key, value in other.items:
result.items[key] += value
else:
raise TypeError
return result
def __sub__(self, other):
"""Remove elements of the other list from the current one
"""
result = self.__class__(self)
if isinstance(other, UnweightedList):
for key in other.items:
result.items.pop(key, None)
else:
raise TypeError
return result
def __and__(self, other):
if isinstance(other, UnweightedList):
result = defaultdict(float)
for key, value in self.items.items():
if item in other.items:
result[key] = value
else:
raise TypeError
return result
def __mul__(self, other):
if isinstance(other, Translations):
result = WeightedList()
for key, value in self.items:
result.items[
other.items.get(key, key)
] += value
else:
raise TypeError
return result
# if __name__ == '__main__':
# l = Coocurrences()
# l = List()
# for i in l:
# print(i)
# t1 = Translations()
# t2 = Translations()
# t2.items = {1: 2}
# for i in t1 + t2:
# print(i)
...@@ -8,7 +8,8 @@ ...@@ -8,7 +8,8 @@
"angular": "~1.2.x", "angular": "~1.2.x",
"angular-loader": "~1.2.x", "angular-loader": "~1.2.x",
"angular-resource": "~1.2.x", "angular-resource": "~1.2.x",
"bootstrap": "~3.x" "bootstrap": "~3.x",
"angular-cookies": "1.2"
}, },
"resolutions": { "resolutions": {
"angular": "~1.2.x" "angular": "~1.2.x"
......
...@@ -46,7 +46,7 @@ ...@@ -46,7 +46,7 @@
} }
.main-panel, .text-panel, .words-panel { .main-panel, .text-panel, .words-panel {
height: 400px; height: 800px;
margin: 10px 0px; margin: 10px 0px;
} }
......
...@@ -18,30 +18,43 @@ ...@@ -18,30 +18,43 @@
}; };
}); });
window.annotationsApp.controller('ExtraAnnotationController', ['$scope', '$rootScope', '$element', 'NgramHttpService', window.annotationsApp.controller('ExtraAnnotationController',
['$scope', '$rootScope', '$element', 'NgramHttpService',
function ($scope, $rootScope, $element, NgramHttpService) { function ($scope, $rootScope, $element, NgramHttpService) {
// TODO use the tooltip ? // TODO use the tooltip ?
$scope.onDeleteClick = function () { $scope.onDeleteClick = function () {
NgramHttpService.delete($scope.keyword, $rootScope); NgramHttpService.delete({
}; 'listId': $scope.keyword.list_id,
'ngramId': $scope.keyword.uuid
}).$promise.then(function(data) {
NgramListHttpService.get(
{'corpusId': $rootScope.corpusId, 'docId': $rootScope.docId}
).$promise.then(function(data) {
$rootScope.annotations = data[$rootScope.corpusId.toString()][$rootScope.docId.toString()];
$rootScope.lists = data[$rootScope.corpusId.toString()]['lists'];
});
});
};
}]); }]);
window.annotationsApp.controller('AnnotationController', ['$scope', '$rootScope', '$element', function ($scope, $rootScope, $element) { window.annotationsApp.controller('AnnotationController',
// FIXME maybe use angular.copy of the annotation ['$scope', '$rootScope', '$element',
var keyword = _.find( function ($scope, $rootScope, $element) {
$rootScope.annotations, // FIXME maybe use angular.copy of the annotation
function(annotation) { return annotation.uuid.toString() === $element[0].getAttribute('uuid').toString(); } var keyword = _.find(
); $rootScope.annotations,
// attach the annotation scope dynamically function(annotation) { return annotation.uuid.toString() === $element[0].getAttribute('uuid').toString(); }
if (keyword) { );
$scope.keyword = keyword; // attach the annotation scope dynamically
} if (keyword) {
$scope.keyword = keyword;
}
$scope.onClick = function(e) { $scope.onClick = function(e) {
$rootScope.$emit("positionAnnotationMenu", e.pageX, e.pageY); $rootScope.$emit("positionAnnotationMenu", e.pageX, e.pageY);
$rootScope.$emit("toggleAnnotationMenu", $scope.keyword); $rootScope.$emit("toggleAnnotationMenu", $scope.keyword);
e.stopPropagation(); e.stopPropagation();
}; };
}]); }]);
window.annotationsApp.directive('selectionTemplate', function () { window.annotationsApp.directive('selectionTemplate', function () {
...@@ -52,310 +65,326 @@ ...@@ -52,310 +65,326 @@
}; };
}); });
window.annotationsApp.controller('AnnotationMenuController', ['$scope', '$rootScope', '$element', '$timeout', 'NgramHttpService', window.annotationsApp.controller('AnnotationMenuController',
['$scope', '$rootScope', '$element', '$timeout', 'NgramHttpService',
function ($scope, $rootScope, $element, $timeout, NgramHttpService) { function ($scope, $rootScope, $element, $timeout, NgramHttpService) {
/* /*
* Universal text selection * Universal text selection
*/ */
function getSelected() { function getSelected() {
if (window.getSelection) { if (window.getSelection) {
return window.getSelection(); return window.getSelection();
} }
else if (document.getSelection) { else if (document.getSelection) {
return document.getSelection(); return document.getSelection();
} }
else { else {
var selection = document.selection && document.selection.createRange(); var selection = document.selection && document.selection.createRange();
if (selection.text) { if (selection.text) {
return selection.text; return selection.text;
} }
return false; return false;
} }
return false; return false;
} }
var selection = getSelected(); var selection = getSelected();
function toggleSelectionHighlight(text) { function toggleSelectionHighlight(text) {
if (text.trim() !== "") { if (text.trim() !== "") {
$(".text-panel").addClass("selection"); $(".text-panel").addClass("selection");
} else { } else {
$(".text-panel").removeClass("selection"); $(".text-panel").removeClass("selection");
}
} }
}
function toggleMenu(context, annotation){ function toggleMenu(context, annotation) {
$timeout(function() { $timeout(function() {
$scope.$apply(function() { $scope.$apply(function() {
if (angular.isObject(annotation)) {
$scope.level = angular.copy(annotation.level);
$scope.category = angular.copy(annotation.category);
// used in onClick
$scope.selection_text = angular.copy(annotation);
if ($scope.category == "miamlist") {
$scope.local_miamlist = false;
$scope.global_stoplist = true;
$scope.local_stoplist = true;
} else if ($scope.category == "stoplist") {
if ($scope.level == "local") { if (angular.isObject(annotation)) {
$scope.local_stoplist = false; $scope.level = angular.copy(annotation.level || 'global');
$scope.category = $rootScope.lists[annotation.list_id].toLowerCase();
$scope.listId = angular.copy(annotation.list_id);
// used in onClick
$scope.selection_text = angular.copy(annotation);
if ($scope.category == "miamlist") {
$scope.local_miamlist = false;
$scope.global_stoplist = true; $scope.global_stoplist = true;
}
if ($scope.level == "global") {
$scope.global_stoplist = false;
$scope.local_stoplist = true; $scope.local_stoplist = true;
} else if ($scope.category == "stoplist") {
if ($scope.level == "local") {
$scope.local_stoplist = false;
$scope.global_stoplist = true;
}
if ($scope.level == "global") {
$scope.global_stoplist = false;
$scope.local_stoplist = true;
}
$scope.local_miamlist = true;
} }
// show menu
$element.fadeIn(100);
}
else if (annotation.trim() !== "") {
$scope.selection_text = angular.copy(annotation);
$scope.level = "New Ngram from selection";
$scope.category = null;
$scope.local_miamlist = true; $scope.local_miamlist = true;
$scope.local_stoplist = true;
$scope.global_stoplist = true;
// show menu
$element.fadeIn(100);
} else {
// close menu
$element.fadeOut(100);
} }
// show menu });
$element.fadeIn(100);
}
else if (annotation.trim() !== "") {
$scope.selection_text = angular.copy(annotation);
$scope.level = "Create from current selection";
$scope.category = null;
$scope.local_miamlist = true;
$scope.local_stoplist = true;
$scope.global_stoplist = true;
// show menu
$element.fadeIn(100);
} else {
// close menu
$element.fadeOut(100);
}
}); });
}); }
} var elt = $(".text-panel")[0];
var elt = $(".text-panel")[0]; var pos = $(".text-panel").position();
var pos = $(".text-panel").position();
function positionElement(context, x, y) {
// todo try bootstrap popover component
$element.css('left', x + 10);
$element.css('top', y + 10);
}
function positionMenu(e) { function positionElement(context, x, y) {
positionElement(null, e.pageX, e.pageY); // todo try bootstrap popover component
} $element.css('left', x + 10);
$element.css('top', y + 10);
}
// TODO is mousedown necessary ? function positionMenu(e) {
$(".text-panel").mousedown(function(){ positionElement(null, e.pageX, e.pageY);
$(".text-panel").mousemove(positionMenu);
});
$(".text-panel").mouseup(function(){
$(".text-panel").unbind("mousemove", positionMenu);
toggleSelectionHighlight(selection.toString().trim());
toggleMenu(null, selection.toString().trim());
});
$(".text-panel").delegate(':not("#selection")', "click", function(e) {
if ($(e.target).hasClass("keyword-inline")) return;
positionMenu(e);
toggleSelectionHighlight(selection.toString().trim());
toggleMenu(null, selection.toString().trim());
});
$rootScope.$on("positionAnnotationMenu", positionElement);
$rootScope.$on("toggleAnnotationMenu", toggleMenu);
$scope.onClick = function($event, action, category, level) {
if (angular.isObject($scope.selection_text)) {
// change the status of an existing Ngram
$scope.selection_text.category = category;
$scope.selection_text.level = level;
NgramHttpService[action](
{
'listId': $rootScope.listId,
'ngramId': $scope.selection_text.uuid
},
{'annotation': $scope.selection_text}
);
} else if ($scope.selection_text.trim() !== "") {
// new annotation from selection
NgramHttpService.post(
{
'listId': $rootScope.listId
},
{'annotation' : {'text': $scope.selection_text.trim(), 'category': category, 'level': level}}
);
} }
// hide selection highlighted text and the menu
$(".text-panel").removeClass("selection");
$element.fadeOut(100);
};
}]); // TODO is mousedown necessary ?
$(".text-panel").mousedown(function(){
$(".text-panel").mousemove(positionMenu);
});
window.annotationsApp.controller('IntraTextController', ['$scope', '$rootScope', '$compile', 'NgramHttpService', $(".text-panel").mouseup(function(){
function ($scope, $rootScope, $compile, NgramHttpService) { $(".text-panel").unbind("mousemove", positionMenu);
toggleSelectionHighlight(selection.toString().trim());
toggleMenu(null, selection.toString().trim());
});
$scope.extra_stoplist = []; $(".text-panel").delegate(':not("#selection")', "click", function(e) {
$scope.extra_miamlist = []; if ($(e.target).hasClass("keyword-inline")) return;
$scope.currentStopPage = 0; positionMenu(e);
$scope.currentMiamPage = 0; toggleSelectionHighlight(selection.toString().trim());
$scope.pageSize = 15; toggleMenu(null, selection.toString().trim());
var counter = 0;
/*
* Replace the text by and html template
*/
function replaceTextByTemplate(text, annotation, template, pattern) {
return text.replace(pattern, function(matched) {
var tpl = angular.element(template);
tpl.append(matched);
tpl.attr('title', annotation.tooltip_content);
tpl.attr('uuid', annotation.uuid);
if (annotation.category == 'miamlist') tpl.addClass("miamword");
if (annotation.category == 'stoplist' && annotation.level == 'local') tpl.addClass("stopword");
//if (annotation.category == 'stoplist' && annotation.level == 'global') tpl.addClass("global-stopword");
return tpl.get(0).outerHTML;
}); });
$rootScope.$on("positionAnnotationMenu", positionElement);
$rootScope.$on("toggleAnnotationMenu", toggleMenu);
$scope.onClick = function($event, action, listId, level) {
if (angular.isObject($scope.selection_text)) {
// delete from the current list
NgramHttpService[action]({
'listId': listId,
'ngramId': $scope.selection_text.uuid
}).$promise.then(function(data) {
$.each($rootScope.annotations, function(index, element) {
if (element.list_id == listId && element.uuid == $scope.selection_text.uuid) {
$rootScope.annotations.splice(index, 1);
return false;
}
});
});
} else if ($scope.selection_text.trim() !== "") {
// new annotation from selection
NgramHttpService.post(
{
'listId': listId
},
{'annotation' : {'text': $scope.selection_text.trim()}}
).$promise.then(function(data) {
$rootScope.annotations.push(data);
});
}
// hide selection highlighted text and the menu
$(".text-panel").removeClass("selection");
$element.fadeOut(100);
};
} }
]);
function compileText(annotations, fullText, abstractText, $rootScope) { window.annotationsApp.controller('IntraTextController',
counter = 0; ['$scope', '$rootScope', '$compile', 'NgramHttpService',
var templateBegin = "<span ng-controller='AnnotationController' ng-click='onClick($event)' class='keyword-inline'>"; function ($scope, $rootScope, $compile, NgramHttpService) {
var templateBeginRegexp = "<span ng-controller='AnnotationController' ng-click='onClick\(\$event\)' class='keyword-inline'>";
var templateEnd = "</span>"; $scope.extra_stoplist = [];
var template = templateBegin + templateEnd; $scope.extra_miamlist = [];
$scope.currentStopPage = 0;
$scope.currentMiamPage = 0;
$scope.pageSize = 15;
var counter = 0;
var startPattern = "\\b((?:"+templateBeginRegexp+")*";
var middlePattern = "(?:<\/span>)*\\s(?:"+templateBeginRegexp+")*";
var endPattern = "(?:<\/span>)*)\\b";
/* /*
* Sorts annotations on the number of words * Replace the text by and html template
*/ */
function lengthSort(listitems, valuekey) { function replaceTextByTemplate(text, annotation, template, pattern, lists) {
listitems.sort(function(a, b) { return text.replace(pattern, function(matched) {
var compA = a[valuekey].split(" ").length; var tpl = angular.element(template);
var compB = b[valuekey].split(" ").length; tpl.append(matched);
return (compA > compB) ? -1 : (compA <= compB) ? 1 : 0; tpl.attr('title', annotation.tooltip_content);
}); tpl.attr('uuid', annotation.uuid);
return listitems;
if ('MiamList' == lists[annotation.list_id]) tpl.addClass("miamword");
if ('StopList' == lists[annotation.list_id]) tpl.addClass("stopword");
//if (annotation.category == 'stoplist' && annotation.level == 'global') tpl.addClass("global-stopword");
return tpl.get(0).outerHTML;
});
} }
var sortedSizeAnnotations = lengthSort(annotations, "text"); function compileText(annotations, fullText, abstractText, $rootScope) {
var extra_stoplist = [], counter = 0;
extra_miamlist = []; var templateBegin = "<span ng-controller='AnnotationController' ng-click='onClick($event)' class='keyword-inline'>";
var templateBeginRegexp = "<span ng-controller='AnnotationController' ng-click='onClick\(\$event\)' class='keyword-inline'>";
_.each(sortedSizeAnnotations, function (annotation) {
// TODO better split to manage two-words with minus sign var templateEnd = "</span>";
var words = annotation.text.split(" "); var template = templateBegin + templateEnd;
var pattern = new RegExp(startPattern + words.join(middlePattern) + endPattern, 'gmi');
var textRegexp = new RegExp("\\b"+annotation.text+"\\b", 'igm'); var startPattern = "\\b((?:"+templateBeginRegexp+")*";
var middlePattern = "(?:<\/span>)*\\s(?:"+templateBeginRegexp+")*";
if (pattern.test(fullText) === true) { var endPattern = "(?:<\/span>)*)\\b";
fullText = replaceTextByTemplate(fullText, annotation, template, pattern); /*
// TODO remove debug * Sorts annotations on the number of words
counter++; */
} else if (pattern.test(abstractText) === true) { function lengthSort(listitems, valuekey) {
abstractText = replaceTextByTemplate(abstractText, annotation, template, pattern); listitems.sort(function(a, b) {
counter++; var compA = a[valuekey].split(" ").length;
} else if (!textRegexp.test($rootScope.full_text) && !textRegexp.test($rootScope.abstract_text)) { var compB = b[valuekey].split(" ").length;
if (annotation.category == "stoplist" && annotation.level == 'local') { return (compA > compB) ? -1 : (compA <= compB) ? 1 : 0;
// if ($.inArray(annotation.uuid, $scope.extra_stoplist.map(function (item) { });
// return item.uuid; return listitems;
// })) == -1) {
extra_stoplist = lengthSort(extra_stoplist.concat(annotation), "text");
// }
} else if (annotation.category == "miamlist") {
// if ($.inArray(annotation.uuid, $scope.extra_miamlist.map(function (item) {
// return item.uuid;
// })) == -1) {
extra_miamlist = lengthSort(extra_miamlist.concat(annotation), "text");
// }
}
} }
});
$scope.extra_stoplist = extra_stoplist;
$scope.extra_miamlist = extra_miamlist;
return { var sortedSizeAnnotations = lengthSort(annotations, "text");
'fullTextHtml': fullText, var extra_stoplist = [],
'abstractTextHtml': abstractText extra_miamlist = [];
};
} _.each(sortedSizeAnnotations, function (annotation) {
// TODO better split to manage two-words with minus sign
annotation.category = $rootScope.lists[annotation.list_id].toLowerCase();
var words = annotation.text.split(" ");
var pattern = new RegExp(startPattern + words.join(middlePattern) + endPattern, 'gmi');
var textRegexp = new RegExp("\\b"+annotation.text+"\\b", 'igm');
if (pattern.test(fullText) === true) {
fullText = replaceTextByTemplate(fullText, annotation, template, pattern, $rootScope.lists);
// TODO remove debug
counter++;
} else if (pattern.test(abstractText) === true) {
abstractText = replaceTextByTemplate(abstractText, annotation, template, pattern, $rootScope.lists);
counter++;
} else if (!textRegexp.test($rootScope.full_text) && !textRegexp.test($rootScope.abstract_text)) {
if (annotation.category == "stoplist") {
// Deactivated stoplist for the moment
// if ($.inArray(annotation.uuid, $scope.extra_stoplist.map(function (item) {
// return item.uuid;
// })) == -1) {
// extra_stoplist = lengthSort(extra_stoplist.concat(annotation), "text");
// }
} else if (annotation.category == "miamlist") {
if ($.inArray(annotation.uuid, $scope.extra_miamlist.map(function (item) {
return item.uuid;
})) == -1) {
extra_miamlist = lengthSort(extra_miamlist.concat(annotation), "text");
}
}
}
});
$scope.extra_stoplist = extra_stoplist;
$scope.extra_miamlist = extra_miamlist;
$rootScope.$watchCollection('annotations', function (newValue, oldValue) { return {
if ($rootScope.annotations === undefined) return; 'fullTextHtml': fullText,
if (angular.equals(newValue, oldValue)) return; 'abstractTextHtml': abstractText
};
}
$scope.extra_stoplist = []; $rootScope.$watchCollection('annotations', function (newValue, oldValue) {
$scope.extra_miamlist = []; if ($rootScope.annotations === undefined) return;
if (angular.equals(newValue, oldValue)) return;
var result = compileText( $rootScope.miamListId = _.invert($rootScope.lists)['MiamList'];
$rootScope.annotations, $rootScope.stopListId = _.invert($rootScope.lists)['StopList'];
angular.copy($rootScope.full_text),
angular.copy($rootScope.abstract_text),
$rootScope);
console.log($rootScope.annotations.length); $scope.extra_stoplist = [];
console.log(counter); $scope.extra_miamlist = [];
angular.element('#full-text').html(result.fullTextHtml); var result = compileText(
angular.element('#abstract-text').html(result.abstractTextHtml); $rootScope.annotations,
angular.copy($rootScope.full_text),
angular.copy($rootScope.abstract_text),
$rootScope
);
angular.element('.text-container').find('[ng-controller=AnnotationController]').each(function(idx, elt) { console.log($rootScope.annotations.length);
angular.element(elt).replaceWith($compile(elt)($rootScope.$new(true))); console.log(counter);
});
}); angular.element('#full-text').html(result.fullTextHtml);
angular.element('#abstract-text').html(result.abstractTextHtml);
function submitNewAnnotation($event, inputEltId, category) {
if ($event.keyCode !== undefined && $event.keyCode != 13) return; angular.element('.text-container').find('[ng-controller=AnnotationController]').each(function(idx, elt) {
var value = $(inputEltId).val().trim(); angular.element(elt).replaceWith($compile(elt)($rootScope.$new(true)));
if (value === "") return; });
NgramHttpService.post(
{
'listId': $rootScope.listId
},
{'annotation' : {'text': value, 'category': category, 'level': 'local'}},
function(data) {
// on success
if (data) {
$rootScope.annotations.push(data);
}
}); });
$(inputEltId).val(""); function submitNewAnnotation($event, inputEltId, listId) {
} if ($event.keyCode !== undefined && $event.keyCode != 13) return;
var value = $(inputEltId).val().trim();
if (value === "") return;
$scope.onMiamlistSubmit = function ($event) { NgramHttpService.post(
submitNewAnnotation($event, "#miamlist-input", "miamlist"); {
}; 'listId': listId,
// TODO refactor 'ngramId': 'new'
$scope.onStoplistSubmit = function ($event) { },
submitNewAnnotation($event, "#stoplist-input", "stoplist"); {'annotation' : {'text': value}},
}; function(data) {
$scope.numStopPages = function () { // on success
if ($scope.extra_stoplist === undefined) return 0; if (data) {
return Math.ceil($scope.extra_stoplist.length / $scope.pageSize); $rootScope.annotations.push(data);
}; }
$scope.numMiamPages = function () { });
if ($scope.extra_miamlist === undefined) return 0;
return Math.ceil($scope.extra_miamlist.length / $scope.pageSize); $(inputEltId).val("");
}; }
$scope.nextMiamPage = function() {
$scope.currentMiamPage = $scope.currentMiamPage + 1; $scope.onMiamlistSubmit = function ($event) {
}; submitNewAnnotation($event, "#miamlist-input", _.invert($rootScope.lists)['MiamList']);
$scope.previousMiamPage = function() { };
$scope.currentMiamPage = $scope.currentMiamPage - 1; // TODO refactor
}; $scope.onStoplistSubmit = function ($event) {
$scope.nextStopPage = function() { submitNewAnnotation($event, "#stoplist-input", _.invert($rootScope.lists)['MiamList']);
$scope.currentStopPage = $scope.currentStopPage + 1; };
}; $scope.numStopPages = function () {
$scope.previousStopPage = function() { if ($scope.extra_stoplist === undefined) return 0;
$scope.currentStopPage = $scope.currentStopPage - 1; return Math.ceil($scope.extra_stoplist.length / $scope.pageSize);
}; };
}]); $scope.numMiamPages = function () {
if ($scope.extra_miamlist === undefined) return 0;
return Math.ceil($scope.extra_miamlist.length / $scope.pageSize);
};
$scope.nextMiamPage = function() {
$scope.currentMiamPage = $scope.currentMiamPage + 1;
};
$scope.previousMiamPage = function() {
$scope.currentMiamPage = $scope.currentMiamPage - 1;
};
$scope.nextStopPage = function() {
$scope.currentStopPage = $scope.currentStopPage + 1;
};
$scope.previousStopPage = function() {
$scope.currentStopPage = $scope.currentStopPage - 1;
};
}
]);
window.annotationsApp.filter('startFrom', function () { window.annotationsApp.filter('startFrom', function () {
return function (input, start) { return function (input, start) {
...@@ -365,29 +394,33 @@ ...@@ -365,29 +394,33 @@
}; };
}); });
window.annotationsApp.controller('DocController', ['$scope', '$rootScope', 'NgramListHttpService', 'DocumentHttpService', window.annotationsApp.controller('DocController',
function ($scope, $rootScope, NgramListHttpService, DocumentHttpService) { ['$scope', '$rootScope', 'NgramListHttpService', 'DocumentHttpService',
//$rootScope.$on('loadNewDoc', function (event, listId, docId) { function ($scope, $rootScope, NgramListHttpService, DocumentHttpService) {
$rootScope.documentResource = DocumentHttpService.get({docId: $rootScope.docId}, function(data, responseHeaders) { $rootScope.documentResource = DocumentHttpService.get(
$scope.title = data.title; {'docId': $rootScope.docId},
$scope.authors = data.authors; function(data, responseHeaders) {
$scope.journal = data.journal; $scope.title = data.title;
$scope.publication_date = data.publication_date; $scope.authors = data.authors;
// TODO this data have to be deleted $scope.journal = data.journal;
$scope.current_page_number = data.current_page_number; $scope.publication_date = data.publication_date;
$scope.last_page_number = data.last_page_number; // TODO this data have to be deleted
// put in rootScope because used by many components //$scope.current_page_number = data.current_page_number;
$rootScope.docId = data.id; //$scope.last_page_number = data.last_page_number;
$rootScope.full_text = data.full_text; // put in rootScope because used by many components
$rootScope.abstract_text = data.abstract_text; $rootScope.docId = data.id;
// GET the annotations $rootScope.full_text = data.full_text;
$rootScope.annotationsResource = NgramListHttpService.get( $rootScope.abstract_text = data.abstract_text;
{'listId': $rootScope.listId, 'docId': $rootScope.docId} // GET the annotations
).$promise.then(function(data) { // TODO
$rootScope.annotations = data[$rootScope.listId.toString()][$rootScope.docId.toString()]; $rootScope.annotationsResource = NgramListHttpService.get(
}); {'corpusId': $rootScope.corpusId, 'docId': $rootScope.docId}
).$promise.then(function(data) {
$rootScope.annotations = data[$rootScope.corpusId.toString()][$rootScope.docId.toString()];
$rootScope.lists = data[$rootScope.corpusId.toString()]['lists'];
});
}); });
//});
// TODO setup pagination client-side // TODO setup pagination client-side
$scope.onPreviousClick = function () { $scope.onPreviousClick = function () {
DocumentHttpService.get($scope.docId - 1); DocumentHttpService.get($scope.docId - 1);
...@@ -399,9 +432,10 @@ ...@@ -399,9 +432,10 @@
window.annotationsApp.run(function ($rootScope) { window.annotationsApp.run(function ($rootScope) {
/* GET the document node and all the annotations in the list associated */ /* GET the document node and all the annotations in the list associated */
// TODO debug var path = window.location.pathname.match(/\/project\/(.*)\/corpus\/(.*)\/document\/(.*)\//);
$rootScope.docId = 4; $rootScope.projectId = path[1];
$rootScope.listId = 1; $rootScope.corpusId = path[2];
$rootScope.docId = path[3];
}); });
})(window); })(window);
(function () { (function () {
'use strict'; 'use strict';
var http = angular.module('annotationsAppHttp', ['ngResource']); var http = angular.module('annotationsAppHttp', ['ngResource', 'ngCookies']);
/* http.config(['$httpProvider', function($httpProvider){
* Read Document $httpProvider.defaults.xsrfHeaderName = 'X-CSRFToken';
*/ $httpProvider.defaults.xsrfCookieName = 'csrftoken';
http.factory('DocumentHttpService', function($resource) { }]);
return $resource( /*
window.ANNOTATION_API_URL + "document" + '/:docId/', * Read Document
{ */
docId: '@docId' http.factory('DocumentHttpService', function($resource) {
}, return $resource(
{ window.ANNOTATION_API_URL + "document/:docId/",
get: { {
method: 'GET', docId: '@docId'
params: {docId: '@docId'} },
} {
get: {
method: 'GET',
params: {docId: '@docId'}
} }
); }
}); );
});
/* /*
* Read Ngram Lists * Read all Ngrams
*/ */
http.factory('NgramListHttpService', function ($resource) { http.factory('NgramListHttpService', function ($resource) {
return $resource( return $resource(
window.ANNOTATION_API_URL + 'lists' + '/:listId/', window.ANNOTATION_API_URL + 'corpus/:corpusId/document/:docId',
{ {
listId: '@listId' corpusId: '@corpusId',
}, docId: '@docId'
{ },
get: { {
method: 'GET', get: {
params: {listId: '@listId'} method: 'GET',
} params: {}
} }
); }
}); );
});
/* /*
* Create, modify or delete on Ngram of a list * Create, modify or delete 1 Ngram
*/ */
http.factory('NgramHttpService', function ($resource) { http.factory('NgramHttpService', function ($resource) {
return $resource( return $resource(
window.ANNOTATION_API_URL + 'lists' + '/:listId/ngrams/' + ':ngramId/', window.ANNOTATION_API_URL + 'lists/:listId/ngrams/:ngramId',
{ {
listId: '@listId' listId: '@listId',
ngramId: '@id'
},
{
post: {
method: 'POST',
params: {'listId': '@listId', 'ngramId': ''}
}, },
{ delete: {
post: { method: 'DELETE',
method: 'POST', params: {'listId': '@listId', 'ngramId': '@id'}
params: {'listId': '@listId', 'ngramId': '@ngramId'}
},
delete: {
method: 'DELETE',
params: {'listId': '@listId', 'ngramId': '@ngramId'}
}
} }
); }
}); );
// return { });
// newAnnotationObject: function(text, category, level) {
// return {
// 'text': text.trim(),
// 'category': category,
// 'level': level
// };
// },
// create: function(keyword, $rootScope) {
// if ($rootScope.annotations === undefined) $rootScope.annotations = [];
// // find duplicate by text
// var existing = _.find(
// $rootScope.annotations,
// function(annotation) { return annotation.text.trim().toLowerCase() === keyword.text.trim().toLowerCase(); }
// );
// // delete existing conflicting data before adding new
// if (existing) {
// if (existing.category == keyword.category && existing.level == keyword.level) return;
// this.delete(existing, $rootScope);
// }
// // TODO remove server mocking
// var mock = _.extend(keyword, {
// 'uuid': jQuery.now().toString(),
// 'occurrences': 322
// });
//
// $timeout(function() {
// $rootScope.$apply(function() {
// $rootScope.annotations.push(mock);
// });
// });
//
// return mock;
// },
// delete: function(keyword, $rootScope) {
// var filtered = _.filter($rootScope.annotations, function(item) {
// if (item.uuid == keyword.uuid) {
// return false;
// } else {
// return true;
// }
// });
// $timeout(function() {
// $rootScope.$apply(function() {
// $rootScope.annotations = filtered;
// });
// });
// }
// };
})(window); })(window);
<span ng-if="keyword.category == 'miamlist'" ng-click='onDeleteClick()' class="delete-keyword" data-keyword-id="{{keyword.uuid}}" data-keyword-text="{{keyword.text}}" data-keyword-category="miamlist">×</span> <span ng-if="keyword.category == 'miamlist'" ng-click='onDeleteClick()' class="delete-keyword" data-keyword-id="{[{keyword.uuid}]}" data-keyword-text="{[{keyword.text}]}" data-keyword-category="miamlist">×</span>
<a ng-if="keyword.category == 'miamlist'" href="#" data-toggle="tooltip" class="keyword miamword">{{keyword.text}}</a> <span ng-if="keyword.category == 'miamlist'" data-toggle="tooltip" class="keyword miamword">{[{keyword.text}]}</span>
<span ng-if="keyword.category == 'stoplist'" ng-click='onDeleteClick()' class="delete-keyword" data-keyword-id="{{keyword.uuid}}" data-keyword-text="{{keyword.text}}" data-keyword-category="stoplist">×</span> <span ng-if="keyword.category == 'stoplist'" ng-click='onDeleteClick()' class="delete-keyword" data-keyword-id="{[{keyword.uuid}]}" data-keyword-text="{[{keyword.text}]}" data-keyword-category="stoplist">×</span>
<a ng-if="keyword.category == 'stoplist'" href="#" data-toggle="tooltip" class="keyword stopword">{{keyword.text}}</a> <span ng-if="keyword.category == 'stoplist'" data-toggle="tooltip" class="keyword stopword">{[{keyword.text}]}</span>
<span class="occurrences" data-keyword-id="{{keyword.uuid}}">{{keyword.occurrences}}</span> <span class="occurrences" data-keyword-id="{[{keyword.uuid}]}">{[{keyword.occurrences}]}</span>
...@@ -24,6 +24,7 @@ $script([ ...@@ -24,6 +24,7 @@ $script([
//'bower_components/angular-route/angular-route.js', //'bower_components/angular-route/angular-route.js',
], function() { ], function() {
$script([ $script([
S + 'bower_components/angular-cookies/angular-cookies.min.js',
S + 'bower_components/angular-resource/angular-resource.min.js'], function() { S + 'bower_components/angular-resource/angular-resource.min.js'], function() {
$script([S + 'annotations/http.js', S + 'annotations/app.js'], function() { $script([S + 'annotations/http.js', S + 'annotations/app.js'], function() {
// when all is done, execute bootstrap angular application (replace ng-app directive) // when all is done, execute bootstrap angular application (replace ng-app directive)
......
<ul class="noselection"> <ul class="noselection">
<li>{{level}}<span ng-if="category !== null"> {{category}}</span></li> <li>{[{level}]}<span ng-if="category !== null"> {[{category}]}</span></li>
<li class="miamword" ng-if="local_miamlist === true" ng-click="onClick($event, 'post', 'miamlist', 'local')">add to miam-list</li> <li class="miamword" ng-if="local_miamlist === true" ng-click="onClick($event, 'post', miamListId, 'local')">add to miam-list</li>
<li class="miamword" ng-if="local_miamlist === false" ng-click="onClick($event, 'delete', 'miamlist', 'local')">remove from miam-list</li> <li class="miamword" ng-if="local_miamlist === false" ng-click="onClick($event, 'delete', miamListId, 'local')">remove from miam-list</li>
<li class="stopword" ng-if="local_stoplist === true" ng-click="onClick($event, 'post', 'stoplist', 'local')">add to local stop-list</li> <li class="stopword" ng-if="local_stoplist === true" ng-click="onClick($event, 'post', stopListId, 'local')">add to local stop-list</li>
<li class="stopword" ng-if="local_stoplist === false" ng-click="onClick($event, 'delete', 'stoplist', 'local')">remove from local stop-list</li> <li class="stopword" ng-if="local_stoplist === false" ng-click="onClick($event, 'delete', stopListId, 'local')">remove from local stop-list</li>
<li class="stopword" ng-if="global_stoplist === true" ng-click="onClick($event, 'post', 'stoplist', 'global')">add to global stop-list</li> <!--<li class="stopword" ng-if="global_stoplist === true" ng-click="onClick($event, 'post', 'stoplist', 'global')">add to global stop-list</li>
<li class="stopword" ng-if="global_stoplist === false" ng-click="onClick($event, 'delete', 'stoplist', 'global')">remove from global stop-list</li> <li class="stopword" ng-if="global_stoplist === false" ng-click="onClick($event, 'delete', 'stoplist', 'global')">remove from global stop-list</li>-->
</ul> </ul>
/*
AngularJS v1.2.28
(c) 2010-2014 Google, Inc. http://angularjs.org
License: MIT
*/
(function(p,f,n){'use strict';f.module("ngCookies",["ng"]).factory("$cookies",["$rootScope","$browser",function(e,b){var c={},g={},h,k=!1,l=f.copy,m=f.isUndefined;b.addPollFn(function(){var a=b.cookies();h!=a&&(h=a,l(a,g),l(a,c),k&&e.$apply())})();k=!0;e.$watch(function(){var a,d,e;for(a in g)m(c[a])&&b.cookies(a,n);for(a in c)d=c[a],f.isString(d)||(d=""+d,c[a]=d),d!==g[a]&&(b.cookies(a,d),e=!0);if(e)for(a in d=b.cookies(),c)c[a]!==d[a]&&(m(d[a])?delete c[a]:c[a]=d[a])});return c}]).factory("$cookieStore",
["$cookies",function(e){return{get:function(b){return(b=e[b])?f.fromJson(b):b},put:function(b,c){e[b]=f.toJson(c)},remove:function(b){delete e[b]}}}])})(window,window.angular);
//# sourceMappingURL=angular-cookies.min.js.map
...@@ -23,14 +23,14 @@ ...@@ -23,14 +23,14 @@
<div class="container-fluid"> <div class="container-fluid">
<div class="row-fluid main-panel" ng-controller="IntraTextController"> <div class="row-fluid main-panel" ng-controller="IntraTextController">
<div class="col-md-4 col-xs-4 tabbable words-panel"> <div class="col-md-4 col-xs-4 tabbable words-panel">
<ul class="nav nav-tabs"> <ul class="nav nav-pills nav-justified">
<li class="active"><a href="#tab1" data-toggle="tab">Miamwords</a></li> <li class="active"><a href="#tab1" data-toggle="tab"><span class="glyphicon glyphicon-tags"></span>&nbsp;&nbsp;Miamwords</a></li>
<li><a href="#tab2" data-toggle="tab">Local stopwords</a></li> <!--<li><a href="#tab2" data-toggle="tab">Local stopwords</a></li>-->
</ul> </ul>
<div class="tab-content"> <div class="tab-content">
<div class="tab-pane active" id="tab1"> <div class="tab-pane active" id="tab1">
<ul class="list-group words-list"> <ul class="list-group words-list">
<div ng-if="extra_miamlist.length == 0" class="alert alert-info" role="alert">No extra-text miam-word yet</div> <div ng-if="extra_miamlist.length == 0" class="alert alert-info" role="alert">No extra text miam-word yet</div>
<li ng-repeat="keyword in extra_miamlist | startFrom:currentMiamPage*pageSize | limitTo:pageSize" class="list-group-item"> <li ng-repeat="keyword in extra_miamlist | startFrom:currentMiamPage*pageSize | limitTo:pageSize" class="list-group-item">
<div ng-controller="ExtraAnnotationController" keyword-template class="keyword-container"></div> <div ng-controller="ExtraAnnotationController" keyword-template class="keyword-container"></div>
...@@ -47,7 +47,7 @@ ...@@ -47,7 +47,7 @@
<button type="submit" class="btn btn-default btn-primary" ng-click="onMiamlistSubmit($event)">Add</button> <button type="submit" class="btn btn-default btn-primary" ng-click="onMiamlistSubmit($event)">Add</button>
</div> </div>
</div> </div>
<div class="tab-pane" id="tab2"> <!--<div class="tab-pane" id="tab2">
<ul class="list-group words-list clearfix"> <ul class="list-group words-list clearfix">
<div ng-if="extra_stoplist.length == 0" class="alert alert-info" role="alert">No extra-text stop-word yet</div> <div ng-if="extra_stoplist.length == 0" class="alert alert-info" role="alert">No extra-text stop-word yet</div>
<li ng-repeat="keyword in extra_stoplist | startFrom:currentStopPage*pageSize | limitTo:pageSize" class="list-group-item"><div ng-controller="ExtraAnnotationController" keyword-template></div></li> <li ng-repeat="keyword in extra_stoplist | startFrom:currentStopPage*pageSize | limitTo:pageSize" class="list-group-item"><div ng-controller="ExtraAnnotationController" keyword-template></div></li>
...@@ -62,7 +62,7 @@ ...@@ -62,7 +62,7 @@
<input type="text" class="form-control" id="stoplist-input" ng-keypress="onStoplistSubmit($event)"> <input type="text" class="form-control" id="stoplist-input" ng-keypress="onStoplistSubmit($event)">
<button type="submit" class="btn btn-default" ng-click="onStoplistSubmit($event)">Exclude</button> <button type="submit" class="btn btn-default" ng-click="onStoplistSubmit($event)">Exclude</button>
</div> </div>
</div> </div>-->
</div> </div>
</div> </div>
<div class="col-md-8 col-xs-8 text-panel" ng-controller="DocController" id="document"> <div class="col-md-8 col-xs-8 text-panel" ng-controller="DocController" id="document">
...@@ -86,10 +86,14 @@ ...@@ -86,10 +86,14 @@
<li class="active pull-right">{[{publication_date}]}</li> <li class="active pull-right">{[{publication_date}]}</li>
</ul> </ul>
</div> </div>
<h4>Abstract</h4> <h4 ng-if="abstract_text != null">Abstract</h4>
<p id="abstract-text" class="text-container"></p> <p id="abstract-text" class="text-container">
<h4>Article</h4> <div ng-if="abstract_text == null" class="alert alert-info" role="alert">No abstract text</div>
<p id="full-text" class="text-container"></p> </p>
<h4 ng-if="full_text != null">Full Article</h4>
<p id="full-text" class="text-container">
<div ng-if="full_text == null" class="alert alert-info" role="alert">No full text</div>
</p>
</div> </div>
</div> <!-- end of the main row --> </div> <!-- end of the main row -->
</div> </div>
...@@ -100,8 +104,10 @@ ...@@ -100,8 +104,10 @@
<p class="browsehappy">You are using an <strong>outdated</strong> browser. Please <a href="http://browsehappy.com/">upgrade your browser</a> to improve your experience.</p> <p class="browsehappy">You are using an <strong>outdated</strong> browser. Please <a href="http://browsehappy.com/">upgrade your browser</a> to improve your experience.</p>
<![endif]--> <![endif]-->
<script type="application/javascript"> <script type="application/javascript">
/* Constants required for annotations app JS to work */
window.STATIC_URL = "{% static '' %}"; window.STATIC_URL = "{% static '' %}";
window.ANNOTATION_API_URL = "{{ api_url }}"; window.ANNOTATION_API_URL = "{{ api_url }}";
window.NODES_API_URL = "{{ nodes_api_url }}";
</script> </script>
<script src="{% static 'annotations/main.js' %}"></script> <script src="{% static 'annotations/main.js' %}"></script>
......
...@@ -3,9 +3,7 @@ from annotations import views ...@@ -3,9 +3,7 @@ from annotations import views
urlpatterns = patterns('', urlpatterns = patterns('',
url(r'^demo/$', views.demo),
url(r'^document/(?P<doc_id>[0-9]+)$', views.Document.as_view()), # document view url(r'^document/(?P<doc_id>[0-9]+)$', views.Document.as_view()), # document view
#url(r'^document/(?P<doc_id>[0-9]+)/ngrams/(?P<ngram_id>[0-9]+)$', views.DocumentNgram.as_view()), # actions on ngram from a document url(r'^corpus/(?P<corpus_id>[0-9]+)/document/(?P<doc_id>[0-9]+)$', views.NgramList.as_view()), # the list associated with an ngram
url(r'^lists/(?P<list_id>[0-9]+)$', views.NgramList.as_view()), # actions on list filtered by document url(r'^lists/(?P<list_id>[0-9]+)/ngrams(?:/(?P<ngram_id>[0-9]+))?$', views.NgramEdit.as_view()), #
url(r'^lists/(?P<list_id>[0-9]+)/ngrams(?:/(?P<ngram_id>[0-9]+))?$', views.Ngram.as_view()), # actions on ngram from a list optionally filtered by document
) )
from urllib.parse import urljoin from urllib.parse import urljoin
import json import json
import datetime
from django.shortcuts import render_to_response from django.shortcuts import render_to_response
from django.template import RequestContext from django.template import RequestContext
from django.contrib.auth.decorators import login_required
from rest_framework.views import APIView from rest_framework.views import APIView
from rest_framework.response import Response from rest_framework.response import Response
from rest_framework.renderers import JSONRenderer from rest_framework.renderers import JSONRenderer
from rest_framework.exceptions import APIException
from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from node.models import Node from node.models import Node
from gargantext_web.db import * from gargantext_web.db import *
from ngram.lists import listIds, listNgramIds, ngramList from ngram.lists import listIds, listNgramIds, ngramList
import sqlalchemy
from sqlalchemy.sql import func
from sqlalchemy import desc, asc, or_, and_, Date, cast, select
from sqlalchemy import literal_column
from sqlalchemy.orm import aliased
def demo(request): @login_required
"""Demo page, temporary""" def main(request, project_id, corpus_id, document_id):
return render_to_response('annotations/demo.html', { """
'api_url': urljoin(request.get_host(), '/annotations/') Full page view
"""
return render_to_response('annotations/main.html', {
# TODO use reverse()
'api_url': urljoin(request.get_host(), '/annotations/'),
'nodes_api_url': urljoin(request.get_host(), '/api/'),
}, context_instance=RequestContext(request)) }, context_instance=RequestContext(request))
# This class below is a duplicate with the class Nodes in
# /srv/gargantext/gargantext_web/api.py
# All information you need from Nodes in api.py is in hyperdata
# You may modify api.py (keeping compatibility) for your own needs
# See in urls the url pattern to use
class Document(APIView):
"""Read-only Document"""
renderer_classes = (JSONRenderer,)
def get(self, request, doc_id):
"""Document by ID"""
node = session.query(Node).filter(Node.id == doc_id).first()
# TODO 404 if not Document or ID not found
data = {
'title': node.hyperdata.get('title'),
'authors': node.hyperdata.get('authors'),
'journal': node.hyperdata.get('journal'),
'publication_date': node.hyperdata.get('publication_date'),
'full_text': node.hyperdata.get('full_text'),
'abstract_text': node.hyperdata.get('abstract'),
'id': node.id,
'current_page_number': 4, # TODO remove, this is client side
'last_page_number': 30 # TODO remove, this is client side
}
# return formatted result
return Response(data)
class NgramList(APIView): class NgramList(APIView):
"""Read and Write Annotations""" """Read and Write Annotations"""
renderer_classes = (JSONRenderer,) renderer_classes = (JSONRenderer,)
def get(self, request, list_id): def get(self, request, corpus_id, doc_id):
"""Get All for on List ID""" """Get All for a doc id"""
doc_id = request.GET.get('docId') corpus_id = int(corpus_id)
# TODO DB query doc_id = int(doc_id)
# Example with 'MiamList', same with 'StopList' lists = dict()
corpus_id = session.query(Node.parent_id).filter(Node.id == doc_id).first() for list_type in ['MiamList', 'StopList']:
miamlist_ids = listIds(user_id=request.user.id, list_id = list()
corpus_id=corpus_id, list_id = listIds(user_id=request.user.id, corpus_id=int(corpus_id), typeList=list_type)
typeList='MiamList') lists["%s" % list_id[0][0]] = list_type
miamlist_id, miamlist_name = miamlist_ids[0]
# ngrams of list_id of corpus_id:
corpus_ngram_miam_list = listNgramIds(list_id=miamList_id)
# ngrams of list_id of corpus_id: # ngrams of list_id of corpus_id:
doc_ngram_miam_list = listNgramIds(list_id=miamList_id, doc_id=doc_id) doc_ngram_list = listNgramIds(corpus_id=corpus_id, doc_id=doc_id, user_id=request.user.id)
#doc_ngram_list = [(1, 'miam', 2, 1931), (2, 'stop', 2, 1932), (3, 'Potassium channels', 4, 1931)]
# now you can model your dict as you want (for doc or corpus level):
ngram_id, ngram_text, ngram_occurrences = doc_ngram_miam_list[0] data = { '%s' % corpus_id : {
'%s' % doc_id : [
data = { '%s' % list_id : { '%s' % doc_id : [ {
{ 'uuid': ngram_id,
'uuid': '1', 'text': ngram_text,
'text': 'what', 'occurrences': ngram_occurrences,
'category': 'stoplist', 'list_id': list_id,
'level': 'global', }
'occurrences': 1 for ngram_id, ngram_text, ngram_occurrences, list_id in doc_ngram_list],
}, 'lists': lists
{ }}
'uuid': '2',
'text': 'rotations',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '3',
'text': 'etsy',
'category': 'stoplist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '4',
'text': 'employees',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '5',
'text': '2010',
'category': 'stoplist',
'level': 'global',
'occurrences': 1
},
{
'uuid': '6',
'text': 'stoplist keyword',
'category': 'stoplist',
'level': 'local',
'occurrences': 255
},
{
'uuid': '7',
'text': 'another stoplist keyword',
'category': 'stoplist',
'level': 'local',
'occurrences': 23
},
{
'uuid': '8',
'text': 'dmc-gm5',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '9',
'text': 'scale of the GM-series',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '10',
'text': 'engineering rotations',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '11',
'text': 'pixel electronic viewfinder',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '12',
'text': 'viewfinder',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '13',
'text': 'pixel electronic',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '14',
'text': 'GM',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '15',
'text': 'support rotations',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '16',
'text': 'miamlist keyword',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '17',
'text': 'miamlist keyword',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '18',
'text': 'another miamlist keyword',
'category': 'miamlist',
'level': 'local',
'occurrences': 3
}
]}}
return Response(data) return Response(data)
class Ngram(APIView): class NgramEdit(APIView):
"""Read and Write Annotations""" """
Actions on one Ngram in one list
"""
renderer_classes = (JSONRenderer,) renderer_classes = (JSONRenderer,)
authentication_classes = (SessionAuthentication, BasicAuthentication)
def delete(self, request, list_id, ngram_id): def post(self, request, list_id, ngram_id):
""" """
TODO Delete one annotation by id Add a ngram in a list
associated with one Document (remove edge)
""" """
doc_id = request.GET.get('docId') # TODO - if Ngram is in miam-list, and adding it to stop-list,
annotationId = request.GET.get("annotationId") # then remove it from the previous list
print(annotationDict) list_id = int(list_id)
# TODO DB query # format the ngram's text
# Use the ngramList function in ngram.lists.py for that ngram_text = request.data.get('annotation', {}).get('text', None)
# It can return True or False ngram_text = ngram_text.strip().lower()
ngramList(do='del', ngram_ids=[ngram_id,], list_id=list_id) ngram_text = ' '.join(ngram_text.split())
# retrieve the ngram's id
return Response({}) ngram = session.query(Ngram).filter(Ngram.terms == ngram_text).first()
if ngram is None:
ngram = Ngram(n=len(ngram_text.split()), terms=ngram_text)
session.add(ngram)
session.commit()
ngram_id = ngram.id
# add the ngram to the list if not already done
node_ngram = session.query(Node_Ngram).filter(Node_Ngram.node_id==list_id).filter(Node_Ngram.ngram_id==ngram_id).first()
if node_ngram is None:
node_ngram = Node_Ngram(node_id=list_id, ngram_id=ngram_id, weight=1.0)
session.add(node_ngram)
session.commit()
ngram_occurrences = node_ngram.weight
# return the response
return Response({
'uuid': ngram_id,
'text': ngram_text,
'occurrences': ngram_occurrences,
'list_id': list_id,
})
def post(self, request, list_id, ngram_id): def delete(self, request, list_id, ngram_id):
""" """
TODO update one annotation (document level) Delete a ngram from a list
associated with one Document (add edge)
""" """
doc_id = request.GET.get('docId') session.query(Node_Ngram).filter(Node_Ngram.node_id==list_id).filter(Node_Ngram.ngram_id==ngram_id).delete()
annotationDict = json.loads(request.POST.get("annotation")) return Response(None, 204)
print(annotationDict)
# There is 2 main actions:
# 1) add ngram to the miamList : this step is tricky if the ngram does
# exist yet, it is experimental in this case.
# But according to your function, you have the ngram_id already
# The function is:
ngramList(do='add', ngram_ids=[ngram_id,], list_id=list_id)
#ngramList(do='add', ngram_ids=[ngram_id,], list_id=list_id)
# Note : depending on the list, maybe I should adapt the function to
# delete from a list when added to a specific type of list
# 2) get the list of ngrams of one miamList: for this step see above class Document(APIView):
# Use the ngramList function in ngram.lists.py for that """
Read-only Document view, similar to /api/nodes/
"""
renderer_classes = (JSONRenderer,)
def get(self, request, doc_id):
"""Document by ID"""
node = session.query(Node).filter(Node.id == doc_id).first()
if node is None:
raise APIException('This node does not exist', 404)
try:
pub_date = datetime.datetime.strptime(node.hyperdata.get('publication_date'),
"%Y-%m-%d %H:%M:%S")
pub_date = pub_date.strftime("%x")
except ValueError:
pub_date = node.hyperdata.get('publication_date')
# TODO DB query data = {
return Response(annotationDict) 'title': node.hyperdata.get('title'),
'authors': node.hyperdata.get('authors'),
'journal': node.hyperdata.get('journal'),
'publication_date': pub_date,
'full_text': node.hyperdata.get('full_text'),
'abstract_text': node.hyperdata.get('abstract'),
'id': node.id
}
return Response(data)
...@@ -562,6 +562,7 @@ class NodesList(APIView): ...@@ -562,6 +562,7 @@ class NodesList(APIView):
for node in query.all() for node in query.all()
]}) ]})
class Nodes(APIView): class Nodes(APIView):
def get(self, request, node_id): def get(self, request, node_id):
...@@ -652,39 +653,3 @@ class CorpusController: ...@@ -652,39 +653,3 @@ class CorpusController:
) )
else: else:
raise ValidationError('Unrecognized "format=%s", should be "csv" or "json"' % (format, )) raise ValidationError('Unrecognized "format=%s", should be "csv" or "json"' % (format, ))
from ngram.lists import listIds, ngramList
class ListManagement(APIView):
#authentication_classes = (SessionAuthentication, BasicAuthentication)
# TODO: Be carefull need authentication!
def get(self, request, corpus_id):
user_id = session.query(User.id).filter(User.username==str(request.user)).first()[0]
lists = dict()
for list_type in ['MiamList', 'StopList']:
list_id = list()
list_id = listIds(user_id=user_id, corpus_id=int(corpus_id), typeList=list_type)
lists[list_type] = int(list_id[0][0])
# lists[list_type]['id']['name'] = r[0][1]
return JsonHttpResponse({
'MiamList' : lists['MiamList'],
'StopList' : lists['StopList']
})
def post(self, request, corpus_id):
list_id = request.POST.get('list_id')
ngram_ids = request.POST.get('ngram_ids')
ngramList(do='add', ngram_ids=ngram_ids, list_id=list_id)
def delete(self, request, corpus_id):
list_id = request.POST.get('list_id')
ngram_ids = request.POST.get('ngram_ids')
ngramList(do='del', ngram_ids=ngram_ids, list_id=list_id)
...@@ -17,6 +17,7 @@ def apply_sum(x, y): ...@@ -17,6 +17,7 @@ def apply_sum(x, y):
from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf
from ngram.lists import ngrams2miam
from admin.utils import PrintException from admin.utils import PrintException
...@@ -34,13 +35,14 @@ def apply_workflow(corpus_id): ...@@ -34,13 +35,14 @@ def apply_workflow(corpus_id):
update_processing(corpus, 1) update_processing(corpus, 1)
parse_resources(corpus) parse_resources(corpus)
update_processing(corpus, 2) update_processing(corpus, 2)
extract_ngrams(corpus, ['title', 'abstract']) extract_ngrams(corpus, ['title', 'abstract'])
update_processing(corpus, 3) update_processing(corpus, 3)
compute_tfidf(corpus) compute_tfidf(corpus)
ngrams2miam(user_id=corpus.user_id, corpus_id=corpus_id)
update_processing(corpus, 0) update_processing(corpus, 0)
......
...@@ -55,7 +55,6 @@ MAINTENANCE = False ...@@ -55,7 +55,6 @@ MAINTENANCE = False
TEMPLATE_DEBUG = False TEMPLATE_DEBUG = False
TEMPLATE_DIRS = ( TEMPLATE_DIRS = (
# Put strings here, like "/home/html/django_templates" or "C:/www/django/templates". # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates".
# Always use forward slashes # Always use forward slashes
...@@ -187,6 +186,7 @@ TEMPLATE_CONTEXT_PROCESSORS = ( ...@@ -187,6 +186,7 @@ TEMPLATE_CONTEXT_PROCESSORS = (
"django.core.context_processors.static", "django.core.context_processors.static",
) )
LOGIN_URL = '/auth/'
# grappelli custom # grappelli custom
GRAPPELLI_ADMIN_TITLE = "Gargantext" GRAPPELLI_ADMIN_TITLE = "Gargantext"
......
from celery import shared_task
from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf
@shared_task
def apply_workflow(corpus):
parse_resources(corpus)
extract_ngrams(corpus, ['title'])
compute_tfidf(corpus)
...@@ -5,6 +5,8 @@ from django.contrib.auth.views import login ...@@ -5,6 +5,8 @@ from django.contrib.auth.views import login
from gargantext_web import views, views_optimized from gargantext_web import views, views_optimized
from annotations import urls as annotations_urls from annotations import urls as annotations_urls
from annotations.views import main as annotations_main_view
import gargantext_web.api import gargantext_web.api
import scrappers.scrap_pubmed.views as pubmedscrapper import scrappers.scrap_pubmed.views as pubmedscrapper
...@@ -39,6 +41,11 @@ urlpatterns = patterns('', ...@@ -39,6 +41,11 @@ urlpatterns = patterns('',
# Corpus management # Corpus management
url(r'^project/(\d+)/corpus/(\d+)/$', views.corpus), url(r'^project/(\d+)/corpus/(\d+)/$', views.corpus),
# annotations App
url(r'^project/(\d+)/corpus/(\d+)/document/(\d+)/$', annotations_main_view),
url(r'^annotations/', include(annotations_urls)),
#
url(r'^project/(\d+)/corpus/(\d+)/corpus.csv$', views.corpus_csv), url(r'^project/(\d+)/corpus/(\d+)/corpus.csv$', views.corpus_csv),
url(r'^project/(\d+)/corpus/(tests_mvc_listdocuments+)/corpus.tests_mvc_listdocuments$', views.corpus_csv), url(r'^project/(\d+)/corpus/(tests_mvc_listdocuments+)/corpus.tests_mvc_listdocuments$', views.corpus_csv),
...@@ -63,13 +70,8 @@ urlpatterns = patterns('', ...@@ -63,13 +70,8 @@ urlpatterns = patterns('',
url(r'^api/nodes/(\d+)/children/queries$', gargantext_web.api.NodesChildrenQueries.as_view()), url(r'^api/nodes/(\d+)/children/queries$', gargantext_web.api.NodesChildrenQueries.as_view()),
url(r'^api/nodes/(\d+)/children/duplicates$', gargantext_web.api.NodesChildrenDuplicates.as_view()), url(r'^api/nodes/(\d+)/children/duplicates$', gargantext_web.api.NodesChildrenDuplicates.as_view()),
# url(r'^api/nodes/(\d+)/children/duplicates/delete$', gargantext_web.api.NodesChildrenDuplicates.delete ), # url(r'^api/nodes/(\d+)/children/duplicates/delete$', gargantext_web.api.NodesChildrenDuplicates.delete ),
url(r'^api/corpus/(\d+)/lists$', gargantext_web.api.ListManagement.as_view()),
url(r'^api/nodes/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams), url(r'^api/nodes/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),
url(r'^annotations/', include(annotations_urls)),
# Provisory tests # Provisory tests
url(r'^ngrams$', views.ngrams), # to be removed url(r'^ngrams$', views.ngrams), # to be removed
url(r'^nodeinfo/(\d+)$', views.nodeinfo), # to be removed ? url(r'^nodeinfo/(\d+)$', views.nodeinfo), # to be removed ?
...@@ -100,12 +102,12 @@ if settings.DEBUG: ...@@ -100,12 +102,12 @@ if settings.DEBUG:
if settings.MAINTENANCE: if settings.MAINTENANCE:
urlpatterns = patterns('', urlpatterns = patterns('',
url(r'^img/logo.svg$', views.logo), url(r'^img/logo.svg$', views.logo),
url(r'^css/bootstrap.css$', views.css), url(r'^css/bootstrap.css$', views.css),
url(r'^$', views.home_view), url(r'^$', views.home_view),
url(r'^about/', views.get_about), url(r'^about/', views.get_about),
url(r'^admin/', include(admin.site.urls)), url(r'^admin/', include(admin.site.urls)),
url(r'^.*', views.get_maintenance), url(r'^.*', views.get_maintenance),
) )
...@@ -36,7 +36,7 @@ def project(request, project_id): ...@@ -36,7 +36,7 @@ def project(request, project_id):
project_id = int(project_id) project_id = int(project_id)
except ValueError: except ValueError:
raise Http404() raise Http404()
# do we have a valid project? # do we have a valid project?
project = (session project = (session
.query(Node) .query(Node)
...@@ -74,7 +74,7 @@ def project(request, project_id): ...@@ -74,7 +74,7 @@ def project(request, project_id):
documents_count_by_resourcetype = defaultdict(int) documents_count_by_resourcetype = defaultdict(int)
corpora_count = 0 corpora_count = 0
corpusID_dict = {} corpusID_dict = {}
for corpus_id, corpus_name, document_count, processing in corpus_query: for corpus_id, corpus_name, document_count, processing in corpus_query:
#print(corpus_id, processing) #print(corpus_id, processing)
...@@ -84,7 +84,7 @@ def project(request, project_id): ...@@ -84,7 +84,7 @@ def project(request, project_id):
.join(Node, Node.id == Node_Resource.node_id ) .join(Node, Node.id == Node_Resource.node_id )
.filter(Node.id==corpus_id) .filter(Node.id==corpus_id)
.first())[0] .first())[0]
if not corpus_id in corpusID_dict: if not corpus_id in corpusID_dict:
if resource_type_id is None: if resource_type_id is None:
resourcetype_name = '(no resource)' resourcetype_name = '(no resource)'
...@@ -104,7 +104,7 @@ def project(request, project_id): ...@@ -104,7 +104,7 @@ def project(request, project_id):
# do the donut # do the donut
total_documents_count = sum(documents_count_by_resourcetype.values()) total_documents_count = sum(documents_count_by_resourcetype.values())
donut = [ donut = [
{ 'source': re.sub(' \(.*$', '', key), { 'source': re.sub(' \(.*$', '', key),
'count': value, 'count': value,
'part' : round(value * 100 / total_documents_count) if total_documents_count else 0, 'part' : round(value * 100 / total_documents_count) if total_documents_count else 0,
} }
...@@ -116,12 +116,12 @@ def project(request, project_id): ...@@ -116,12 +116,12 @@ def project(request, project_id):
# form validation # form validation
form = CustomForm(request.POST, request.FILES) form = CustomForm(request.POST, request.FILES)
if form.is_valid(): if form.is_valid():
# extract information from the form # extract information from the form
name = form.cleaned_data['name'] name = form.cleaned_data['name']
thefile = form.cleaned_data['file'] thefile = form.cleaned_data['file']
resourcetype = cache.ResourceType[form.cleaned_data['type']] resourcetype = cache.ResourceType[form.cleaned_data['type']]
# which default language shall be used? # which default language shall be used?
if resourcetype.name == "Europress (French)": if resourcetype.name == "Europress (French)":
language_id = cache.Language['fr'].id language_id = cache.Language['fr'].id
...@@ -129,7 +129,7 @@ def project(request, project_id): ...@@ -129,7 +129,7 @@ def project(request, project_id):
language_id = cache.Language['en'].id language_id = cache.Language['en'].id
else: else:
language_id = None language_id = None
# corpus node instanciation as a Django model # corpus node instanciation as a Django model
corpus = Node( corpus = Node(
name = name, name = name,
...@@ -141,10 +141,10 @@ def project(request, project_id): ...@@ -141,10 +141,10 @@ def project(request, project_id):
) )
session.add(corpus) session.add(corpus)
session.commit() session.commit()
# If user is new, folder does not exist yet, create it then # If user is new, folder does not exist yet, create it then
ensure_dir(request.user) ensure_dir(request.user)
# Save the uploaded file # Save the uploaded file
filepath = '%s/corpora/%s/%s' % (MEDIA_ROOT, request.user.username, thefile._name) filepath = '%s/corpora/%s/%s' % (MEDIA_ROOT, request.user.username, thefile._name)
f = open(filepath, 'wb') f = open(filepath, 'wb')
...@@ -159,7 +159,7 @@ def project(request, project_id): ...@@ -159,7 +159,7 @@ def project(request, project_id):
# let's start the workflow # let's start the workflow
try: try:
if DEBUG is False: if DEBUG is False:
apply_workflow((corpus.id,),) apply_workflow.apply_async((corpus.id,),)
else: else:
#apply_workflow(corpus) #apply_workflow(corpus)
thread = Thread(target=apply_workflow, args=(corpus.id, ), daemon=True) thread = Thread(target=apply_workflow, args=(corpus.id, ), daemon=True)
...@@ -168,7 +168,7 @@ def project(request, project_id): ...@@ -168,7 +168,7 @@ def project(request, project_id):
print('WORKFLOW ERROR') print('WORKFLOW ERROR')
print(error) print(error)
# redirect to the main project page # redirect to the main project page
# TODO need to wait before response (need corpus update) # TODO need to wait before response (need corpus update)
sleep(2) sleep(2)
return HttpResponseRedirect('/project/' + str(project_id)) return HttpResponseRedirect('/project/' + str(project_id))
else: else:
...@@ -226,5 +226,5 @@ def tfidf(request, corpus_id, ngram_ids): ...@@ -226,5 +226,5 @@ def tfidf(request, corpus_id, ngram_ids):
nodes_list.append(node_dict) nodes_list.append(node_dict)
# print("= = = = = = = = \n") # print("= = = = = = = = \n")
data = json.dumps(nodes_list) data = json.dumps(nodes_list)
return JsonHttpResponse(data) return JsonHttpResponse(data)
...@@ -39,9 +39,9 @@ for name_, type_ in hyperdata.items(): ...@@ -39,9 +39,9 @@ for name_, type_ in hyperdata.items():
).first() ).first()
) )
if hyperdata is None: if data is None:
print('Hyper Data' + name + 'does not existe, creating it') print('Hyper Data' + name_ + 'does not existe, creating it')
hyperdata = Hyperdata(name=name, type=type_name) hyperdata = Hyperdata(name=name_, type=type_)
session.add(hyperdata) session.add(hyperdata)
session.commit() session.commit()
......
import sys
from admin.utils import PrintException from admin.utils import PrintException
from gargantext_web.db import NodeNgram from gargantext_web.db import NodeNgram
...@@ -11,9 +10,9 @@ from sqlalchemy import desc, asc, or_, and_, Date, cast, select ...@@ -11,9 +10,9 @@ from sqlalchemy import desc, asc, or_, and_, Date, cast, select
from sqlalchemy import literal_column from sqlalchemy import literal_column
from sqlalchemy.orm import aliased from sqlalchemy.orm import aliased
# from gargantext_web.db import Node, get_cursor
def listIds(user_id=None, corpus_id=None, typeList='MiamList'):
def listIds(typeList=None, user_id=None, corpus_id=None):
''' '''
nodeList : get or create NodeList. nodeList : get or create NodeList.
nodeList :: Integer -> Integer -> String -> [Node] nodeList :: Integer -> Integer -> String -> [Node]
...@@ -22,6 +21,9 @@ def listIds(user_id=None, corpus_id=None, typeList='MiamList'): ...@@ -22,6 +21,9 @@ def listIds(user_id=None, corpus_id=None, typeList='MiamList'):
typeList :: String, Type of the Node that should be created typeList :: String, Type of the Node that should be created
[Node] :: List of Int, returned or created by the function [Node] :: List of Int, returned or created by the function
''' '''
if typeList is None:
typeList = 'MiamList'
if corpus_id is not None and user_id is not None: if corpus_id is not None and user_id is not None:
# Nodes are either in root_list or user_list # Nodes are either in root_list or user_list
...@@ -39,9 +41,7 @@ def listIds(user_id=None, corpus_id=None, typeList='MiamList'): ...@@ -39,9 +41,7 @@ def listIds(user_id=None, corpus_id=None, typeList='MiamList'):
Node.type_id == cache.NodeType[typeList].id Node.type_id == cache.NodeType[typeList].id
).order_by(desc(Node.id)).all() ).order_by(desc(Node.id)).all()
else: else:
print('typeList not supported yet') raise Exception("typeList %s not supported yet" % typeList)
sys.exit(0)
if nodes == []: if nodes == []:
node = Node(user_id = user_id, node = Node(user_id = user_id,
...@@ -56,12 +56,12 @@ def listIds(user_id=None, corpus_id=None, typeList='MiamList'): ...@@ -56,12 +56,12 @@ def listIds(user_id=None, corpus_id=None, typeList='MiamList'):
return([(node.id, node.name) for node in nodes]) return([(node.id, node.name) for node in nodes])
else: else:
print("Usage (Warning): Need corpus_id and user_id") raise Exception("Usage (Warning): Need corpus_id and user_id")
# Some functions to manage ngrams according to the lists # Some functions to manage ngrams according to the lists
def listNgramIds(list_id=None, typeList=None, def listNgramIds(list_id=None, typeList=None,
corpus_id=None, doc_id=None, user_id=None): corpus_id=None, doc_id=None, user_id=None):
''' '''
listNgramsIds :: Int | String, Int, Int, Int -> [(Int, String, Int)] listNgramsIds :: Int | String, Int, Int, Int -> [(Int, String, Int)]
return has types: [(ngram_id, ngram_terms, occurrences)] return has types: [(ngram_id, ngram_terms, occurrences)]
...@@ -75,49 +75,49 @@ def listNgramIds(list_id=None, typeList=None, ...@@ -75,49 +75,49 @@ def listNgramIds(list_id=None, typeList=None,
doc_id : to get specific ngrams related to a document with Node.id=doc_id doc_id : to get specific ngrams related to a document with Node.id=doc_id
user_id : needed to create list if it does not exist user_id : needed to create list if it does not exist
''' '''
if typeList is None:
typeList = ['MiamList', 'StopList']
elif isinstance(typeList, string):
typeList = [typeList]
if list_id is None : if list_id is None and corpus_id is None:
if corpus_id is not None : raise Exception('Need a listId or corpusId to query')
if typeList is not None :
if user_id is not None : if user_id is None:
try: raise Exception("Need a user_id to create list if needed")
list_id = listIds(user_id=user_id,
corpus_id=corpus_id, # iterate over every list in a corpus
typeList=typeList)[0][0] try:
except: allLists = []
PrintException() for aType in typeList:
else: allLists += listIds(user_id=user_id, corpus_id=corpus_id, typeList=aType)
print('Need a user_id to create list if needed') except Exception as exc:
sys.exit() PrintException()
else: raise exc
print('Need a typeList parameter')
sys.exit() ListNgram = aliased(NodeNgram)
else: or_args = [ListNgram.node_id == l[0] for l in allLists]
print('Need a node_id to take default list of type' + typeList) query = (session.query(Ngram.id, Ngram.terms, func.count(), ListNgram.node_id)
sys.exit() .join(ListNgram, ListNgram.ngram_id == Ngram.id)
else: .filter(or_(*or_args))
ListNgram = aliased(NodeNgram) .group_by(Ngram.id, ListNgram)
query = (session.query(Ngram.id, Ngram.terms, func.count()) )
.join(ListNgram, ListNgram.ngram_id == Ngram.id)
.filter(ListNgram.node_id == list_id)
.group_by(Ngram.id)
)
if doc_id is not None :
Doc = aliased(Node)
DocNgram = aliased(NodeNgram)
query = (query
.join(DocNgram, DocNgram.ngram_id == Ngram.id)
.join(Doc, Doc.id == doc_id)
.filter(DocNgram.node_id == Doc.id)
)
return(query.all()) if doc_id is not None:
Doc = aliased(Node)
DocNgram = aliased(NodeNgram)
query = (query
.join(DocNgram, DocNgram.ngram_id == Ngram.id)
.join(Doc, Doc.id == doc_id)
.filter(DocNgram.node_id == Doc.id)
)
return(query.all())
def ngramList(do=None, ngram_ids=[], list_id=None) :
def ngramList(do, list_id, ngram_ids=None) :
''' '''
,gramList :: ([Int], Int, String) -> Bool ngramList :: ([Int], Int, String) -> Bool
Do (delete | add) [ngram_id] (from | to) the list_id Do (delete | add) [ngram_id] (from | to) the list_id
options: options:
...@@ -125,69 +125,97 @@ def ngramList(do=None, ngram_ids=[], list_id=None) : ...@@ -125,69 +125,97 @@ def ngramList(do=None, ngram_ids=[], list_id=None) :
ngram_id = [Int] : list of Ngrams id (Ngrams.id) ngram_id = [Int] : list of Ngrams id (Ngrams.id)
list_id = Int : list id (Node.id) list_id = Int : list id (Node.id)
''' '''
if do is None or ngram_ids == [] or list_id is None : results = []
print('Need more options: do, ngram_id, list_id')
sys.exit(0) if do == 'create':
else: terms = copy(ngram_ids)
try: ngram_ids = []
node_type_id = (session.query(Node.type_id) for ngram_term in terms:
.filter(Node.id == list_id) # TODO set the language correctly
.first() ngram = Ngram.objects.get_or_create(terms=ngram_term, n=len(terms.split()),
) language='en')
ngram_ids += [ngram.id]
for ngram_id in ngram_ids:
# First we test to know if ngram exist in database already # TODO there should not be a try/except here, let the code crash as soon as possible
#ngram = (session.query(Ngram).filter(Ngram.id == ngram_id).first() try:
# Need to be optimized with list of ids for ngram_id in ngram_ids:
node_ngram = (session.query(NodeNgram) # Fetch the ngram from database
.filter(NodeNgram.ngram_id == ngram_id) ngram = session.query(Ngram.id, Ngram.terms, func.count()).filter(Ngram.id == ngram_id).first()
.filter(NodeNgram.node_id == list_id) # Need to be optimized with list of ids
.first() node_ngram = (session.query(NodeNgram)
) .filter(NodeNgram.ngram_id == ngram_id)
if node_ngram is None : .filter(NodeNgram.node_id == list_id)
node_ngram = NodeNgram(node_id = list_id, .first()
ngram_id=ngram_id, )
weight=1) # create NodeNgram if does not exists
if do == 'add' : if node_ngram is None :
session.add(node_ngram) node_ngram = NodeNgram(node_id = list_id, ngram_id=ngram_id,
elif do == 'del' : weight=1)
session.delete(node_ngram) if do == 'add' :
session.add(node_ngram)
results += [ngram]
session.commit() elif do == 'del' :
return(True) session.delete(node_ngram)
except: session.commit()
PrintException() return(results)
return(False)
except Exception as exc:
PrintException()
raise exc
# Some functions to manage automatically the lists # Some functions to manage automatically the lists
def doStopList(user_id=None, corpus_id=None, def doStopList(user_id=None, corpus_id=None, stop_id=None, reset=False, limit=None):
stop_id=None,
reset=False, limit=None
):
''' '''
Compute automatically the stopList and returns its Node.id Compute automatically the stopList and returns its Node.id
Algo: TODO tfidf according type of corpora Algo: TODO tfidf according type of corpora
''' '''
if stop_id is None: if stop_id is None:
stop_id = nodeListIds(user_id=user_id, stop_id = listNgramIds(user_id=user_id,
corpus_id=corpus_id, corpus_id=corpus_id,
typeList='StopList')[0] typeList='StopList')[0]
# according to type of corpus, choose the right default stopList # according to type of corpus, choose the right default stopList
def ngrams2miam(user_id=None, corpus_id=None):
'''
Create a Miam List only
'''
miam_id = listIds(typeList='MiamList', user_id=user_id, corpus_id=corpus_id)[0][0]
print(miam_id)
query = (session.query(
literal_column(str(miam_id)).label("node_id"),
Ngram.id,
func.count(),
)
.select_from(Ngram)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.join(Node, NodeNgram.node_id == Node.id)
.filter(Node.parent_id == corpus_id)
.filter(Node.type_id == cache.NodeType['Document'].id)
.group_by(Ngram.id)
#.limit(10)
.all()
)
bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], query)
def doList( def doList(
type_list='miam', type_list='MiamList',
user_id=None, corpus_id=None, user_id=None, corpus_id=None,
miam_id=None, stop_id=None, main_id=None, miam_id=None, stop_id=None, main_id=None,
lem_id=None, stem_id=None, cvalue_id=None, group_id=None, lem_id=None, stem_id=None, cvalue_id=None, group_id=None,
reset=True, limit=None reset=True, limit=None
): ):
''' '''
Compute the miamList and returns its Node.id Compute the miamList and returns its Node.id
miamList = allList - stopList miamList = allList - stopList
...@@ -206,9 +234,8 @@ def doList( ...@@ -206,9 +234,8 @@ def doList(
cvalue = equivalent N-Words according to C-Value (but the main form) cvalue = equivalent N-Words according to C-Value (but the main form)
''' '''
if type_list not in ['miam', 'main']: if type_list not in ['MiamList', 'MainList']:
print('Type List supported: \'miam\' or \'main\'') raise Exception("Type List (%s) not supported, try: \'MiamList\' or \'MainList\'" % type_list)
sys.exit(0)
try: try:
list_dict = { list_dict = {
...@@ -228,7 +255,7 @@ def doList( ...@@ -228,7 +255,7 @@ def doList(
for list_ in list_dict.keys(): for list_ in list_dict.keys():
if list_dict[list_]['id'] is None: if list_dict[list_]['id'] is None:
list_dict[list_]['id'] = nodeListIds(user_id=user_id, list_dict[list_]['id'] = listNgramIds(user_id=user_id,
corpus_id=corpus_id, corpus_id=corpus_id,
typeList=list_dict[list_]['type'])[0][0] typeList=list_dict[list_]['type'])[0][0]
# Delete previous List ? # Delete previous List ?
...@@ -241,10 +268,9 @@ def doList( ...@@ -241,10 +268,9 @@ def doList(
except: except:
PrintException() PrintException()
stopNgram = aliased(NodeNgram) stopNgram = aliased(NodeNgram)
if type_list == 'MiamList' :
if 'miam' == type_list:
query = (session.query( query = (session.query(
literal_column(str(list_dict['miam']['id'])).label("node_id"), literal_column(str(list_dict['miam']['id'])).label("node_id"),
Ngram.id, Ngram.id,
...@@ -264,7 +290,7 @@ def doList( ...@@ -264,7 +290,7 @@ def doList(
.group_by(Ngram.id) .group_by(Ngram.id)
) )
elif 'main' == type_list: elif type_list == 'MainList' :
# Query to get Ngrams for main list # Query to get Ngrams for main list
query = (session.query( query = (session.query(
literal_column(str(list_dict['main']['id'])).label("node_id"), literal_column(str(list_dict['main']['id'])).label("node_id"),
...@@ -314,4 +340,3 @@ def doList( ...@@ -314,4 +340,3 @@ def doList(
bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], query) bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], query)
return(list_dict[type_list]['id']) return(list_dict[type_list]['id'])
...@@ -9,7 +9,6 @@ from gargantext_web.db import * ...@@ -9,7 +9,6 @@ from gargantext_web.db import *
from .parsers_config import parsers as _parsers from .parsers_config import parsers as _parsers
class DebugTime: class DebugTime:
def __init__(self, prefix): def __init__(self, prefix):
...@@ -29,7 +28,6 @@ class DebugTime: ...@@ -29,7 +28,6 @@ class DebugTime:
# keep all the parsers in a cache # keep all the parsers in a cache
class Parsers(defaultdict): class Parsers(defaultdict):
def __init__(self): def __init__(self):
self._parsers = _parsers self._parsers = _parsers
...@@ -44,9 +42,7 @@ class Parsers(defaultdict): ...@@ -44,9 +42,7 @@ class Parsers(defaultdict):
parsers = Parsers() parsers = Parsers()
# resources management # resources management
def add_resource(corpus, **kwargs): def add_resource(corpus, **kwargs):
# only for tests # only for tests
session = Session() session = Session()
...@@ -83,7 +79,6 @@ def add_resource(corpus, **kwargs): ...@@ -83,7 +79,6 @@ def add_resource(corpus, **kwargs):
# return result # return result
return resource return resource
def parse_resources(corpus, user=None, user_id=None): def parse_resources(corpus, user=None, user_id=None):
dbg = DebugTime('Corpus #%d - parsing' % corpus.id) dbg = DebugTime('Corpus #%d - parsing' % corpus.id)
session = Session() session = Session()
...@@ -102,8 +97,7 @@ def parse_resources(corpus, user=None, user_id=None): ...@@ -102,8 +97,7 @@ def parse_resources(corpus, user=None, user_id=None):
.filter(Node_Resource.parsed == False) .filter(Node_Resource.parsed == False)
) )
# make a new node for every parsed document of the corpus # make a new node for every parsed document of the corpus
print("HERE MOFOs") # print(resources_query)
print(resources_query)
dbg.show('analyze documents') dbg.show('analyze documents')
nodes = list() nodes = list()
for resource, resourcetype in resources_query: for resource, resourcetype in resources_query:
...@@ -147,13 +141,13 @@ def parse_resources(corpus, user=None, user_id=None): ...@@ -147,13 +141,13 @@ def parse_resources(corpus, user=None, user_id=None):
hyperdata.name: hyperdata hyperdata.name: hyperdata
for hyperdata in session.query(Hyperdata) for hyperdata in session.query(Hyperdata)
} }
#print('hyperdata_types', hyperdata_types)
for node in nodes: for node in nodes:
node_id = node.id node_id = node.id
for hyperdata_key, hyperdata_value in node.hyperdata.items(): for hyperdata_key, hyperdata_value in node.hyperdata.items():
try: try:
hyperdata = hyperdata_types[hyperdata_key] hyperdata = hyperdata_types[hyperdata_key]
except KeyError: except KeyError:
# Why silent continue here ?
continue continue
if hyperdata.type == 'string': if hyperdata.type == 'string':
hyperdata_value = hyperdata_value[:255] hyperdata_value = hyperdata_value[:255]
...@@ -163,16 +157,17 @@ def parse_resources(corpus, user=None, user_id=None): ...@@ -163,16 +157,17 @@ def parse_resources(corpus, user=None, user_id=None):
hyperdata_value, hyperdata_value,
)) ))
#print('I am here', node_hyperdata_lists.items())
for key, values in node_hyperdata_lists.items(): for key, values in node_hyperdata_lists.items():
#print('here', key, values)
bulk_insert(Node_Hyperdata, ['node_id', 'hyperdata_id', 'value_'+key], values) bulk_insert(Node_Hyperdata, ['node_id', 'hyperdata_id', 'value_'+key], values)
# mark the corpus as parsed # mark the corpus as parsed
corpus.parsed = True corpus.parsed = True
# ngrams extraction # ngrams extraction
from .NgramsExtractors import EnglishNgramsExtractor, FrenchNgramsExtractor, NgramsExtractor from .NgramsExtractors import EnglishNgramsExtractor, FrenchNgramsExtractor, NgramsExtractor
class NgramsExtractors(defaultdict): class NgramsExtractors(defaultdict):
def __init__(self): def __init__(self):
......
...@@ -118,7 +118,11 @@ function Final_UpdateTable( action ) { ...@@ -118,7 +118,11 @@ function Final_UpdateTable( action ) {
// Get all the duplicates using the Django-Garg API // Get all the duplicates using the Django-Garg API
var current_docs = {} var current_docs = {}
var BIS_dict = {} var BIS_dict = {}
var corpusid = window.location.href.split("corpus")[1].replace(/\//g, '')//replace all the slashes
var path = window.location.pathname.match(/\/project\/(.*)\/corpus\/(.*)\//);
var projectid = path[1]
var corpusid = path[2]
var theurl = "/api/nodes/"+corpusid+"/children/duplicates?keys=title&limit=9999" var theurl = "/api/nodes/"+corpusid+"/children/duplicates?keys=title&limit=9999"
// $.ajax({ // $.ajax({
// url: theurl, // url: theurl,
...@@ -231,7 +235,7 @@ function ulWriter(rowIndex, record, columns, cellWriter) { ...@@ -231,7 +235,7 @@ function ulWriter(rowIndex, record, columns, cellWriter) {
var orig_id = parseInt(data.records[i].id) var orig_id = parseInt(data.records[i].id)
var arr_id = parseInt(i) var arr_id = parseInt(i)
RecDict[orig_id] = arr_id; RecDict[orig_id] = arr_id;
data.records[i]["name"] = '<a target="_blank" href="/nodeinfo/'+orig_id+'">'+data.records[i]["name"]+'</a>' data.records[i]["name"] = '<a target="_blank" href="/project/'+projectid+'/corpus/'+ corpusid + '/document/'+orig_id+'">'+data.records[i]["name"]+'</a>'
data.records[i]["del"] = false data.records[i]["del"] = false
var date = data.records[i]["date"]; var date = data.records[i]["date"];
......
...@@ -8,7 +8,7 @@ from ngram.lists import * ...@@ -8,7 +8,7 @@ from ngram.lists import *
#from gargantext_web.views import empty_trash #from gargantext_web.views import empty_trash
#empty_trash() #empty_trash()
#
#user = session.query(User).all()[0] #user = session.query(User).all()[0]
user = session.query(User).filter(User.username=='alexandre').first() user = session.query(User).filter(User.username=='alexandre').first()
...@@ -36,6 +36,8 @@ if project is None: ...@@ -36,6 +36,8 @@ if project is None:
corpus = session.query(Node).filter(Node.parent_id == project.id, corpus = session.query(Node).filter(Node.parent_id == project.id,
Node.type_id == cache.NodeType['Corpus'].id).first() Node.type_id == cache.NodeType['Corpus'].id).first()
print('Corpus is', corpus)
if corpus is None: if corpus is None:
corpus = Node( corpus = Node(
parent_id = project.id, parent_id = project.id,
...@@ -66,14 +68,14 @@ print('Working on corpus:', corpus.id, corpus.name) ...@@ -66,14 +68,14 @@ print('Working on corpus:', corpus.id, corpus.name)
stem_id = stem_corpus(corpus_id=corpus.id) stem_id = stem_corpus(corpus_id=corpus.id)
print('Stem Node.id is', stem_id) print('Stem Node.id is', stem_id)
for typeList in ['MiamList', 'StopList', 'MainList', 'Group']: #for typeList in ['MiamList', 'StopList', 'MainList', 'Group']:
n = listIds(user_id=user.id, # n = listIds(user_id=user.id,
corpus_id=corpus.id, # corpus_id=corpus.id,
typeList=typeList) # typeList=typeList)
#print(n[0][0]) # #print(n[0][0])
print('Test having list_id') # print('Test having list_id')
print(n, listNgramIds(list_id=n[0][0])[:3]) # print(n, listNgramIds(list_id=n[0][0])[:3])
#
stop_list_id = listIds(user_id=user.id, stop_list_id = listIds(user_id=user.id,
corpus_id=corpus.id, corpus_id=corpus.id,
...@@ -87,30 +89,53 @@ miam_list_id = listIds(user_id=user.id, ...@@ -87,30 +89,53 @@ miam_list_id = listIds(user_id=user.id,
print('Stop List', stop_list_id) print('Stop List', stop_list_id)
print('Miam List', miam_list_id) print('Miam List', miam_list_id)
ngram_id = listNgramIds(list_id=miam_list_id)[0][0]
print('ngram_id', ngram_id)
ngramList(do='add', ngram_ids=[ngram_id,], list_id=stop_list_id) ngrams2miam(user_id=user.id, corpus_id=corpus.id)
print(listNgramIds(list_id=miam_list_id, user_id=user.id, corpus_id=corpus.id))
#type_list='MiamList'
#try:
# d = doList(type_list=type_list, user_id = user.id, corpus_id = corpus.id, limit=150)
## print('Size of the ' + type_list + ' list:',
## session.query(NodeNgram).filter(NodeNgram.node_id == d).count()
## )
#except:
# PrintException()
##
#print(listNgramIds(list_id=miam_list_id, user_id=user.id, corpus_id=corpus.id))
#
#ngram_id = listNgramIds(list_id=miam_list_id, user_id=user.id, corpus_id=corpus.id)[0][0]
#print('ngram_id', ngram_id)
# #
#ngramList(do='add', ngram_ids=[ngram_id,], list_id=stop_list_id)
# print('Test having typeList and corpus.id') # print('Test having typeList and corpus.id')
# print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, user_id=user.id)[:3]) # print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, user_id=user.id)[:3])
## ##
# print('Test having typeList and corpus.id and doc_id') # print('Test having typeList and corpus.id and doc_id')
# print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, doc_id=doc_id, user_id=user.id)[:3]) # print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, doc_id=doc_id, user_id=user.id)[:3])
#
#
#type_list='miam'
#try:
# d = doList(type_list=type_list, user_id = user.id, corpus_id = corpus.id, stem_id=stem_id, limit=150)
# print('Size of the ' + type_list + ' list:',
# session.query(NodeNgram).filter(NodeNgram.node_id == d).count()
# )
#except:
# PrintException()
#
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment