Commit 08539c33 authored by PkSM3's avatar PkSM3

Merge branch 'unstable' of ssh://delanoe.org:1979/gargantext into samuel

parents 08dc8379 addc6e19
from collections import defaultdict
class Translations:
def __init__(self, other=None):
if other is None:
self.items = defaultdict(int)
self.groups = defaultdict(set)
elif isinstance(other, Translations):
self.items = other.items.copy()
self.groups = other.groups.copy()
elif hasattr(other, '__iter__'):
self.items = defaultdict(int, other)
self.groups = defaultdict(set)
for key, value in self.items.items():
self.groups[value].add(key)
else:
raise TypeError
def __add__(self, other):
result = self.__class__(self)
result.items.update(other)
for key, value in other.groups:
result.groups[key] += value
return result
def __sub__(self, other):
result = self.__class__(self)
if isinstance(other, Translations):
for key, value in other.items.items():
result.items.pop(key, None)
result.groups[value].remove(key)
if len(result.groups[value]) == 0:
result.groups.pop(value)
return result
def __iter__(self):
for key, value in self.items.items():
yield key, value
class WeightedMatrix:
def __init__(self, other=None):
if other is None:
self.items = defaultdict(lambda: defaultdict(float))
elif isinstance(other, WeightedMatrix):
self.items = other.items.copy()
elif hasattr(other, '__iter__'):
self.items = defaultdict(lambda: defaultdict(float))
for row in other:
self.items[other[0]][other[1]] = [other[2]]
else:
raise TypeError
def __iter__(self):
for key1, key2_value in self.items.items():
for key2, value in key2_value.items():
yield key1, key2, value
def __sub__(self, other):
"""Remove elements of the other list from the current one
Can only be substracted to another list of coocurrences.
"""
pass
def __mul__(self, other):
if isinstance(other, Translations):
result = WeightedMatrix()
for key1, key2_value in self.items.items():
for key2, value in self.items:
result.items[
other.items.get(key, key)
] = value
else:
raise TypeError
return result
class UnweightedList:
def __init__(self, other=None):
if other is None:
self.items = set()
elif isinstance(other, WeightedList):
self.items = set(other.items.keys())
elif isinstance(other, UnweightedList):
self.items = other.items.copy()
elif hasattr(other, '__iter__'):
items = (item for item in other)
if len(items) == 0:
self.items = set()
else:
if hasattr(items[0], '__iter__'):
self.items = set(item[0] for item in items)
else:
self.items = set(item for item in items)
else:
raise TypeError
def __add__(self, other):
result = self.__class__(self)
if isinstance(other, UnweightedList):
result.items |= other.items
elif isinstance(other, WeightedList):
result.items |= set(other.items.keys())
else:
raise TypeError
return result
__or__ = __add__
def __sub__(self, other):
result = self.__class__(self)
if isinstance(other, UnweightedList):
result.items -= other.items
elif isinstance(other, WeightedList):
result.items -= set(other.items.keys())
else:
raise TypeError
return result
def __and__(self, other):
result = self.__class__(self)
if isinstance(other, UnweightedList):
result.items &= other.items
elif isinstance(other, WeightedList):
result.items &= set(other.items.keys())
else:
raise TypeError
return result
class WeightedList:
def __init__(self, other=None):
if other is None:
self.items = defaultdict(float)
elif isinstance(other, WeightedList):
self.items = other.items.copy()
elif isinstance(other, UnweightedList):
self.items = defaultdict(float)
for key in other.items:
self.items[key] = 1.0
elif hasattr(other, '__iter__'):
self.items = defaultdict(float, items)
else:
raise TypeError
def __iter__(self):
for key, value in self.items.items():
yield key, value
def __add__(self, other):
"""Add elements from the other list to the current one
"""
result = self.__class__(self)
if isinstance(other, UnweightedList):
for key, value in other.items:
result.items[key] += 1.0
elif isinstance(other, WeightedList):
for key, value in other.items:
result.items[key] += value
else:
raise TypeError
return result
def __sub__(self, other):
"""Remove elements of the other list from the current one
"""
result = self.__class__(self)
if isinstance(other, UnweightedList):
for key in other.items:
result.items.pop(key, None)
else:
raise TypeError
return result
def __and__(self, other):
if isinstance(other, UnweightedList):
result = defaultdict(float)
for key, value in self.items.items():
if item in other.items:
result[key] = value
else:
raise TypeError
return result
def __mul__(self, other):
if isinstance(other, Translations):
result = WeightedList()
for key, value in self.items:
result.items[
other.items.get(key, key)
] += value
else:
raise TypeError
return result
# if __name__ == '__main__':
# l = Coocurrences()
# l = List()
# for i in l:
# print(i)
# t1 = Translations()
# t2 = Translations()
# t2.items = {1: 2}
# for i in t1 + t2:
# print(i)
......@@ -8,7 +8,8 @@
"angular": "~1.2.x",
"angular-loader": "~1.2.x",
"angular-resource": "~1.2.x",
"bootstrap": "~3.x"
"bootstrap": "~3.x",
"angular-cookies": "1.2"
},
"resolutions": {
"angular": "~1.2.x"
......
......@@ -46,7 +46,7 @@
}
.main-panel, .text-panel, .words-panel {
height: 400px;
height: 800px;
margin: 10px 0px;
}
......
......@@ -18,15 +18,28 @@
};
});
window.annotationsApp.controller('ExtraAnnotationController', ['$scope', '$rootScope', '$element', 'NgramHttpService',
window.annotationsApp.controller('ExtraAnnotationController',
['$scope', '$rootScope', '$element', 'NgramHttpService',
function ($scope, $rootScope, $element, NgramHttpService) {
// TODO use the tooltip ?
$scope.onDeleteClick = function () {
NgramHttpService.delete($scope.keyword, $rootScope);
NgramHttpService.delete({
'listId': $scope.keyword.list_id,
'ngramId': $scope.keyword.uuid
}).$promise.then(function(data) {
NgramListHttpService.get(
{'corpusId': $rootScope.corpusId, 'docId': $rootScope.docId}
).$promise.then(function(data) {
$rootScope.annotations = data[$rootScope.corpusId.toString()][$rootScope.docId.toString()];
$rootScope.lists = data[$rootScope.corpusId.toString()]['lists'];
});
});
};
}]);
window.annotationsApp.controller('AnnotationController', ['$scope', '$rootScope', '$element', function ($scope, $rootScope, $element) {
window.annotationsApp.controller('AnnotationController',
['$scope', '$rootScope', '$element',
function ($scope, $rootScope, $element) {
// FIXME maybe use angular.copy of the annotation
var keyword = _.find(
$rootScope.annotations,
......@@ -52,7 +65,8 @@
};
});
window.annotationsApp.controller('AnnotationMenuController', ['$scope', '$rootScope', '$element', '$timeout', 'NgramHttpService',
window.annotationsApp.controller('AnnotationMenuController',
['$scope', '$rootScope', '$element', '$timeout', 'NgramHttpService',
function ($scope, $rootScope, $element, $timeout, NgramHttpService) {
/*
* Universal text selection
......@@ -84,12 +98,14 @@
}
}
function toggleMenu(context, annotation){
function toggleMenu(context, annotation) {
$timeout(function() {
$scope.$apply(function() {
if (angular.isObject(annotation)) {
$scope.level = angular.copy(annotation.level);
$scope.category = angular.copy(annotation.category);
$scope.level = angular.copy(annotation.level || 'global');
$scope.category = $rootScope.lists[annotation.list_id].toLowerCase();
$scope.listId = angular.copy(annotation.list_id);
// used in onClick
$scope.selection_text = angular.copy(annotation);
......@@ -114,7 +130,7 @@
}
else if (annotation.trim() !== "") {
$scope.selection_text = angular.copy(annotation);
$scope.level = "Create from current selection";
$scope.level = "New Ngram from selection";
$scope.category = null;
$scope.local_miamlist = true;
$scope.local_stoplist = true;
......@@ -162,36 +178,41 @@
$rootScope.$on("positionAnnotationMenu", positionElement);
$rootScope.$on("toggleAnnotationMenu", toggleMenu);
$scope.onClick = function($event, action, category, level) {
$scope.onClick = function($event, action, listId, level) {
if (angular.isObject($scope.selection_text)) {
// change the status of an existing Ngram
$scope.selection_text.category = category;
$scope.selection_text.level = level;
NgramHttpService[action](
{
'listId': $rootScope.listId,
// delete from the current list
NgramHttpService[action]({
'listId': listId,
'ngramId': $scope.selection_text.uuid
},
{'annotation': $scope.selection_text}
);
}).$promise.then(function(data) {
$.each($rootScope.annotations, function(index, element) {
if (element.list_id == listId && element.uuid == $scope.selection_text.uuid) {
$rootScope.annotations.splice(index, 1);
return false;
}
});
});
} else if ($scope.selection_text.trim() !== "") {
// new annotation from selection
NgramHttpService.post(
{
'listId': $rootScope.listId
'listId': listId
},
{'annotation' : {'text': $scope.selection_text.trim(), 'category': category, 'level': level}}
);
{'annotation' : {'text': $scope.selection_text.trim()}}
).$promise.then(function(data) {
$rootScope.annotations.push(data);
});
}
// hide selection highlighted text and the menu
$(".text-panel").removeClass("selection");
$element.fadeOut(100);
};
}
]);
}]);
window.annotationsApp.controller('IntraTextController', ['$scope', '$rootScope', '$compile', 'NgramHttpService',
window.annotationsApp.controller('IntraTextController',
['$scope', '$rootScope', '$compile', 'NgramHttpService',
function ($scope, $rootScope, $compile, NgramHttpService) {
$scope.extra_stoplist = [];
......@@ -204,15 +225,15 @@
/*
* Replace the text by and html template
*/
function replaceTextByTemplate(text, annotation, template, pattern) {
function replaceTextByTemplate(text, annotation, template, pattern, lists) {
return text.replace(pattern, function(matched) {
var tpl = angular.element(template);
tpl.append(matched);
tpl.attr('title', annotation.tooltip_content);
tpl.attr('uuid', annotation.uuid);
if (annotation.category == 'miamlist') tpl.addClass("miamword");
if (annotation.category == 'stoplist' && annotation.level == 'local') tpl.addClass("stopword");
if ('MiamList' == lists[annotation.list_id]) tpl.addClass("miamword");
if ('StopList' == lists[annotation.list_id]) tpl.addClass("stopword");
//if (annotation.category == 'stoplist' && annotation.level == 'global') tpl.addClass("global-stopword");
return tpl.get(0).outerHTML;
......@@ -248,30 +269,32 @@
_.each(sortedSizeAnnotations, function (annotation) {
// TODO better split to manage two-words with minus sign
annotation.category = $rootScope.lists[annotation.list_id].toLowerCase();
var words = annotation.text.split(" ");
var pattern = new RegExp(startPattern + words.join(middlePattern) + endPattern, 'gmi');
var textRegexp = new RegExp("\\b"+annotation.text+"\\b", 'igm');
if (pattern.test(fullText) === true) {
fullText = replaceTextByTemplate(fullText, annotation, template, pattern);
fullText = replaceTextByTemplate(fullText, annotation, template, pattern, $rootScope.lists);
// TODO remove debug
counter++;
} else if (pattern.test(abstractText) === true) {
abstractText = replaceTextByTemplate(abstractText, annotation, template, pattern);
abstractText = replaceTextByTemplate(abstractText, annotation, template, pattern, $rootScope.lists);
counter++;
} else if (!textRegexp.test($rootScope.full_text) && !textRegexp.test($rootScope.abstract_text)) {
if (annotation.category == "stoplist" && annotation.level == 'local') {
if (annotation.category == "stoplist") {
// Deactivated stoplist for the moment
// if ($.inArray(annotation.uuid, $scope.extra_stoplist.map(function (item) {
// return item.uuid;
// })) == -1) {
extra_stoplist = lengthSort(extra_stoplist.concat(annotation), "text");
// extra_stoplist = lengthSort(extra_stoplist.concat(annotation), "text");
// }
} else if (annotation.category == "miamlist") {
// if ($.inArray(annotation.uuid, $scope.extra_miamlist.map(function (item) {
// return item.uuid;
// })) == -1) {
if ($.inArray(annotation.uuid, $scope.extra_miamlist.map(function (item) {
return item.uuid;
})) == -1) {
extra_miamlist = lengthSort(extra_miamlist.concat(annotation), "text");
// }
}
}
}
});
......@@ -288,6 +311,9 @@
if ($rootScope.annotations === undefined) return;
if (angular.equals(newValue, oldValue)) return;
$rootScope.miamListId = _.invert($rootScope.lists)['MiamList'];
$rootScope.stopListId = _.invert($rootScope.lists)['StopList'];
$scope.extra_stoplist = [];
$scope.extra_miamlist = [];
......@@ -295,7 +321,8 @@
$rootScope.annotations,
angular.copy($rootScope.full_text),
angular.copy($rootScope.abstract_text),
$rootScope);
$rootScope
);
console.log($rootScope.annotations.length);
console.log(counter);
......@@ -308,16 +335,17 @@
});
});
function submitNewAnnotation($event, inputEltId, category) {
function submitNewAnnotation($event, inputEltId, listId) {
if ($event.keyCode !== undefined && $event.keyCode != 13) return;
var value = $(inputEltId).val().trim();
if (value === "") return;
NgramHttpService.post(
{
'listId': $rootScope.listId
'listId': listId,
'ngramId': 'new'
},
{'annotation' : {'text': value, 'category': category, 'level': 'local'}},
{'annotation' : {'text': value}},
function(data) {
// on success
if (data) {
......@@ -329,11 +357,11 @@
}
$scope.onMiamlistSubmit = function ($event) {
submitNewAnnotation($event, "#miamlist-input", "miamlist");
submitNewAnnotation($event, "#miamlist-input", _.invert($rootScope.lists)['MiamList']);
};
// TODO refactor
$scope.onStoplistSubmit = function ($event) {
submitNewAnnotation($event, "#stoplist-input", "stoplist");
submitNewAnnotation($event, "#stoplist-input", _.invert($rootScope.lists)['MiamList']);
};
$scope.numStopPages = function () {
if ($scope.extra_stoplist === undefined) return 0;
......@@ -355,7 +383,8 @@
$scope.previousStopPage = function() {
$scope.currentStopPage = $scope.currentStopPage - 1;
};
}]);
}
]);
window.annotationsApp.filter('startFrom', function () {
return function (input, start) {
......@@ -365,29 +394,33 @@
};
});
window.annotationsApp.controller('DocController', ['$scope', '$rootScope', 'NgramListHttpService', 'DocumentHttpService',
window.annotationsApp.controller('DocController',
['$scope', '$rootScope', 'NgramListHttpService', 'DocumentHttpService',
function ($scope, $rootScope, NgramListHttpService, DocumentHttpService) {
//$rootScope.$on('loadNewDoc', function (event, listId, docId) {
$rootScope.documentResource = DocumentHttpService.get({docId: $rootScope.docId}, function(data, responseHeaders) {
$rootScope.documentResource = DocumentHttpService.get(
{'docId': $rootScope.docId},
function(data, responseHeaders) {
$scope.title = data.title;
$scope.authors = data.authors;
$scope.journal = data.journal;
$scope.publication_date = data.publication_date;
// TODO this data have to be deleted
$scope.current_page_number = data.current_page_number;
$scope.last_page_number = data.last_page_number;
//$scope.current_page_number = data.current_page_number;
//$scope.last_page_number = data.last_page_number;
// put in rootScope because used by many components
$rootScope.docId = data.id;
$rootScope.full_text = data.full_text;
$rootScope.abstract_text = data.abstract_text;
// GET the annotations
// TODO
$rootScope.annotationsResource = NgramListHttpService.get(
{'listId': $rootScope.listId, 'docId': $rootScope.docId}
{'corpusId': $rootScope.corpusId, 'docId': $rootScope.docId}
).$promise.then(function(data) {
$rootScope.annotations = data[$rootScope.listId.toString()][$rootScope.docId.toString()];
$rootScope.annotations = data[$rootScope.corpusId.toString()][$rootScope.docId.toString()];
$rootScope.lists = data[$rootScope.corpusId.toString()]['lists'];
});
});
//});
// TODO setup pagination client-side
$scope.onPreviousClick = function () {
DocumentHttpService.get($scope.docId - 1);
......@@ -399,9 +432,10 @@
window.annotationsApp.run(function ($rootScope) {
/* GET the document node and all the annotations in the list associated */
// TODO debug
$rootScope.docId = 4;
$rootScope.listId = 1;
var path = window.location.pathname.match(/\/project\/(.*)\/corpus\/(.*)\/document\/(.*)\//);
$rootScope.projectId = path[1];
$rootScope.corpusId = path[2];
$rootScope.docId = path[3];
});
})(window);
(function () {
'use strict';
var http = angular.module('annotationsAppHttp', ['ngResource']);
var http = angular.module('annotationsAppHttp', ['ngResource', 'ngCookies']);
http.config(['$httpProvider', function($httpProvider){
$httpProvider.defaults.xsrfHeaderName = 'X-CSRFToken';
$httpProvider.defaults.xsrfCookieName = 'csrftoken';
}]);
/*
* Read Document
*/
http.factory('DocumentHttpService', function($resource) {
return $resource(
window.ANNOTATION_API_URL + "document" + '/:docId/',
window.ANNOTATION_API_URL + "document/:docId/",
{
docId: '@docId'
},
......@@ -22,92 +26,44 @@
});
/*
* Read Ngram Lists
* Read all Ngrams
*/
http.factory('NgramListHttpService', function ($resource) {
return $resource(
window.ANNOTATION_API_URL + 'lists' + '/:listId/',
window.ANNOTATION_API_URL + 'corpus/:corpusId/document/:docId',
{
listId: '@listId'
corpusId: '@corpusId',
docId: '@docId'
},
{
get: {
method: 'GET',
params: {listId: '@listId'}
params: {}
}
}
);
});
/*
* Create, modify or delete on Ngram of a list
* Create, modify or delete 1 Ngram
*/
http.factory('NgramHttpService', function ($resource) {
return $resource(
window.ANNOTATION_API_URL + 'lists' + '/:listId/ngrams/' + ':ngramId/',
window.ANNOTATION_API_URL + 'lists/:listId/ngrams/:ngramId',
{
listId: '@listId'
listId: '@listId',
ngramId: '@id'
},
{
post: {
method: 'POST',
params: {'listId': '@listId', 'ngramId': '@ngramId'}
params: {'listId': '@listId', 'ngramId': ''}
},
delete: {
method: 'DELETE',
params: {'listId': '@listId', 'ngramId': '@ngramId'}
params: {'listId': '@listId', 'ngramId': '@id'}
}
}
);
});
// return {
// newAnnotationObject: function(text, category, level) {
// return {
// 'text': text.trim(),
// 'category': category,
// 'level': level
// };
// },
// create: function(keyword, $rootScope) {
// if ($rootScope.annotations === undefined) $rootScope.annotations = [];
// // find duplicate by text
// var existing = _.find(
// $rootScope.annotations,
// function(annotation) { return annotation.text.trim().toLowerCase() === keyword.text.trim().toLowerCase(); }
// );
// // delete existing conflicting data before adding new
// if (existing) {
// if (existing.category == keyword.category && existing.level == keyword.level) return;
// this.delete(existing, $rootScope);
// }
// // TODO remove server mocking
// var mock = _.extend(keyword, {
// 'uuid': jQuery.now().toString(),
// 'occurrences': 322
// });
//
// $timeout(function() {
// $rootScope.$apply(function() {
// $rootScope.annotations.push(mock);
// });
// });
//
// return mock;
// },
// delete: function(keyword, $rootScope) {
// var filtered = _.filter($rootScope.annotations, function(item) {
// if (item.uuid == keyword.uuid) {
// return false;
// } else {
// return true;
// }
// });
// $timeout(function() {
// $rootScope.$apply(function() {
// $rootScope.annotations = filtered;
// });
// });
// }
// };
})(window);
<span ng-if="keyword.category == 'miamlist'" ng-click='onDeleteClick()' class="delete-keyword" data-keyword-id="{{keyword.uuid}}" data-keyword-text="{{keyword.text}}" data-keyword-category="miamlist">×</span>
<a ng-if="keyword.category == 'miamlist'" href="#" data-toggle="tooltip" class="keyword miamword">{{keyword.text}}</a>
<span ng-if="keyword.category == 'stoplist'" ng-click='onDeleteClick()' class="delete-keyword" data-keyword-id="{{keyword.uuid}}" data-keyword-text="{{keyword.text}}" data-keyword-category="stoplist">×</span>
<a ng-if="keyword.category == 'stoplist'" href="#" data-toggle="tooltip" class="keyword stopword">{{keyword.text}}</a>
<span class="occurrences" data-keyword-id="{{keyword.uuid}}">{{keyword.occurrences}}</span>
<span ng-if="keyword.category == 'miamlist'" ng-click='onDeleteClick()' class="delete-keyword" data-keyword-id="{[{keyword.uuid}]}" data-keyword-text="{[{keyword.text}]}" data-keyword-category="miamlist">×</span>
<span ng-if="keyword.category == 'miamlist'" data-toggle="tooltip" class="keyword miamword">{[{keyword.text}]}</span>
<span ng-if="keyword.category == 'stoplist'" ng-click='onDeleteClick()' class="delete-keyword" data-keyword-id="{[{keyword.uuid}]}" data-keyword-text="{[{keyword.text}]}" data-keyword-category="stoplist">×</span>
<span ng-if="keyword.category == 'stoplist'" data-toggle="tooltip" class="keyword stopword">{[{keyword.text}]}</span>
<span class="occurrences" data-keyword-id="{[{keyword.uuid}]}">{[{keyword.occurrences}]}</span>
......@@ -24,6 +24,7 @@ $script([
//'bower_components/angular-route/angular-route.js',
], function() {
$script([
S + 'bower_components/angular-cookies/angular-cookies.min.js',
S + 'bower_components/angular-resource/angular-resource.min.js'], function() {
$script([S + 'annotations/http.js', S + 'annotations/app.js'], function() {
// when all is done, execute bootstrap angular application (replace ng-app directive)
......
<ul class="noselection">
<li>{{level}}<span ng-if="category !== null"> {{category}}</span></li>
<li class="miamword" ng-if="local_miamlist === true" ng-click="onClick($event, 'post', 'miamlist', 'local')">add to miam-list</li>
<li class="miamword" ng-if="local_miamlist === false" ng-click="onClick($event, 'delete', 'miamlist', 'local')">remove from miam-list</li>
<li>{[{level}]}<span ng-if="category !== null"> {[{category}]}</span></li>
<li class="miamword" ng-if="local_miamlist === true" ng-click="onClick($event, 'post', miamListId, 'local')">add to miam-list</li>
<li class="miamword" ng-if="local_miamlist === false" ng-click="onClick($event, 'delete', miamListId, 'local')">remove from miam-list</li>
<li class="stopword" ng-if="local_stoplist === true" ng-click="onClick($event, 'post', 'stoplist', 'local')">add to local stop-list</li>
<li class="stopword" ng-if="local_stoplist === false" ng-click="onClick($event, 'delete', 'stoplist', 'local')">remove from local stop-list</li>
<li class="stopword" ng-if="local_stoplist === true" ng-click="onClick($event, 'post', stopListId, 'local')">add to local stop-list</li>
<li class="stopword" ng-if="local_stoplist === false" ng-click="onClick($event, 'delete', stopListId, 'local')">remove from local stop-list</li>
<li class="stopword" ng-if="global_stoplist === true" ng-click="onClick($event, 'post', 'stoplist', 'global')">add to global stop-list</li>
<li class="stopword" ng-if="global_stoplist === false" ng-click="onClick($event, 'delete', 'stoplist', 'global')">remove from global stop-list</li>
<!--<li class="stopword" ng-if="global_stoplist === true" ng-click="onClick($event, 'post', 'stoplist', 'global')">add to global stop-list</li>
<li class="stopword" ng-if="global_stoplist === false" ng-click="onClick($event, 'delete', 'stoplist', 'global')">remove from global stop-list</li>-->
</ul>
/*
AngularJS v1.2.28
(c) 2010-2014 Google, Inc. http://angularjs.org
License: MIT
*/
(function(p,f,n){'use strict';f.module("ngCookies",["ng"]).factory("$cookies",["$rootScope","$browser",function(e,b){var c={},g={},h,k=!1,l=f.copy,m=f.isUndefined;b.addPollFn(function(){var a=b.cookies();h!=a&&(h=a,l(a,g),l(a,c),k&&e.$apply())})();k=!0;e.$watch(function(){var a,d,e;for(a in g)m(c[a])&&b.cookies(a,n);for(a in c)d=c[a],f.isString(d)||(d=""+d,c[a]=d),d!==g[a]&&(b.cookies(a,d),e=!0);if(e)for(a in d=b.cookies(),c)c[a]!==d[a]&&(m(d[a])?delete c[a]:c[a]=d[a])});return c}]).factory("$cookieStore",
["$cookies",function(e){return{get:function(b){return(b=e[b])?f.fromJson(b):b},put:function(b,c){e[b]=f.toJson(c)},remove:function(b){delete e[b]}}}])})(window,window.angular);
//# sourceMappingURL=angular-cookies.min.js.map
......@@ -23,14 +23,14 @@
<div class="container-fluid">
<div class="row-fluid main-panel" ng-controller="IntraTextController">
<div class="col-md-4 col-xs-4 tabbable words-panel">
<ul class="nav nav-tabs">
<li class="active"><a href="#tab1" data-toggle="tab">Miamwords</a></li>
<li><a href="#tab2" data-toggle="tab">Local stopwords</a></li>
<ul class="nav nav-pills nav-justified">
<li class="active"><a href="#tab1" data-toggle="tab"><span class="glyphicon glyphicon-tags"></span>&nbsp;&nbsp;Miamwords</a></li>
<!--<li><a href="#tab2" data-toggle="tab">Local stopwords</a></li>-->
</ul>
<div class="tab-content">
<div class="tab-pane active" id="tab1">
<ul class="list-group words-list">
<div ng-if="extra_miamlist.length == 0" class="alert alert-info" role="alert">No extra-text miam-word yet</div>
<div ng-if="extra_miamlist.length == 0" class="alert alert-info" role="alert">No extra text miam-word yet</div>
<li ng-repeat="keyword in extra_miamlist | startFrom:currentMiamPage*pageSize | limitTo:pageSize" class="list-group-item">
<div ng-controller="ExtraAnnotationController" keyword-template class="keyword-container"></div>
......@@ -47,7 +47,7 @@
<button type="submit" class="btn btn-default btn-primary" ng-click="onMiamlistSubmit($event)">Add</button>
</div>
</div>
<div class="tab-pane" id="tab2">
<!--<div class="tab-pane" id="tab2">
<ul class="list-group words-list clearfix">
<div ng-if="extra_stoplist.length == 0" class="alert alert-info" role="alert">No extra-text stop-word yet</div>
<li ng-repeat="keyword in extra_stoplist | startFrom:currentStopPage*pageSize | limitTo:pageSize" class="list-group-item"><div ng-controller="ExtraAnnotationController" keyword-template></div></li>
......@@ -62,7 +62,7 @@
<input type="text" class="form-control" id="stoplist-input" ng-keypress="onStoplistSubmit($event)">
<button type="submit" class="btn btn-default" ng-click="onStoplistSubmit($event)">Exclude</button>
</div>
</div>
</div>-->
</div>
</div>
<div class="col-md-8 col-xs-8 text-panel" ng-controller="DocController" id="document">
......@@ -86,10 +86,14 @@
<li class="active pull-right">{[{publication_date}]}</li>
</ul>
</div>
<h4>Abstract</h4>
<p id="abstract-text" class="text-container"></p>
<h4>Article</h4>
<p id="full-text" class="text-container"></p>
<h4 ng-if="abstract_text != null">Abstract</h4>
<p id="abstract-text" class="text-container">
<div ng-if="abstract_text == null" class="alert alert-info" role="alert">No abstract text</div>
</p>
<h4 ng-if="full_text != null">Full Article</h4>
<p id="full-text" class="text-container">
<div ng-if="full_text == null" class="alert alert-info" role="alert">No full text</div>
</p>
</div>
</div> <!-- end of the main row -->
</div>
......@@ -100,8 +104,10 @@
<p class="browsehappy">You are using an <strong>outdated</strong> browser. Please <a href="http://browsehappy.com/">upgrade your browser</a> to improve your experience.</p>
<![endif]-->
<script type="application/javascript">
/* Constants required for annotations app JS to work */
window.STATIC_URL = "{% static '' %}";
window.ANNOTATION_API_URL = "{{ api_url }}";
window.NODES_API_URL = "{{ nodes_api_url }}";
</script>
<script src="{% static 'annotations/main.js' %}"></script>
......
......@@ -3,9 +3,7 @@ from annotations import views
urlpatterns = patterns('',
url(r'^demo/$', views.demo),
url(r'^document/(?P<doc_id>[0-9]+)$', views.Document.as_view()), # document view
#url(r'^document/(?P<doc_id>[0-9]+)/ngrams/(?P<ngram_id>[0-9]+)$', views.DocumentNgram.as_view()), # actions on ngram from a document
url(r'^lists/(?P<list_id>[0-9]+)$', views.NgramList.as_view()), # actions on list filtered by document
url(r'^lists/(?P<list_id>[0-9]+)/ngrams(?:/(?P<ngram_id>[0-9]+))?$', views.Ngram.as_view()), # actions on ngram from a list optionally filtered by document
url(r'^corpus/(?P<corpus_id>[0-9]+)/document/(?P<doc_id>[0-9]+)$', views.NgramList.as_view()), # the list associated with an ngram
url(r'^lists/(?P<list_id>[0-9]+)/ngrams(?:/(?P<ngram_id>[0-9]+))?$', views.NgramEdit.as_view()), #
)
from urllib.parse import urljoin
import json
import datetime
from django.shortcuts import render_to_response
from django.template import RequestContext
from django.contrib.auth.decorators import login_required
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework.renderers import JSONRenderer
from rest_framework.exceptions import APIException
from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from node.models import Node
from gargantext_web.db import *
from ngram.lists import listIds, listNgramIds, ngramList
import sqlalchemy
from sqlalchemy.sql import func
from sqlalchemy import desc, asc, or_, and_, Date, cast, select
from sqlalchemy import literal_column
from sqlalchemy.orm import aliased
def demo(request):
"""Demo page, temporary"""
return render_to_response('annotations/demo.html', {
'api_url': urljoin(request.get_host(), '/annotations/')
@login_required
def main(request, project_id, corpus_id, document_id):
"""
Full page view
"""
return render_to_response('annotations/main.html', {
# TODO use reverse()
'api_url': urljoin(request.get_host(), '/annotations/'),
'nodes_api_url': urljoin(request.get_host(), '/api/'),
}, context_instance=RequestContext(request))
# This class below is a duplicate with the class Nodes in
# /srv/gargantext/gargantext_web/api.py
# All information you need from Nodes in api.py is in hyperdata
# You may modify api.py (keeping compatibility) for your own needs
# See in urls the url pattern to use
class Document(APIView):
"""Read-only Document"""
renderer_classes = (JSONRenderer,)
def get(self, request, doc_id):
"""Document by ID"""
node = session.query(Node).filter(Node.id == doc_id).first()
# TODO 404 if not Document or ID not found
data = {
'title': node.hyperdata.get('title'),
'authors': node.hyperdata.get('authors'),
'journal': node.hyperdata.get('journal'),
'publication_date': node.hyperdata.get('publication_date'),
'full_text': node.hyperdata.get('full_text'),
'abstract_text': node.hyperdata.get('abstract'),
'id': node.id,
'current_page_number': 4, # TODO remove, this is client side
'last_page_number': 30 # TODO remove, this is client side
}
# return formatted result
return Response(data)
class NgramList(APIView):
"""Read and Write Annotations"""
renderer_classes = (JSONRenderer,)
def get(self, request, list_id):
"""Get All for on List ID"""
doc_id = request.GET.get('docId')
# TODO DB query
# Example with 'MiamList', same with 'StopList'
corpus_id = session.query(Node.parent_id).filter(Node.id == doc_id).first()
miamlist_ids = listIds(user_id=request.user.id,
corpus_id=corpus_id,
typeList='MiamList')
miamlist_id, miamlist_name = miamlist_ids[0]
# ngrams of list_id of corpus_id:
corpus_ngram_miam_list = listNgramIds(list_id=miamList_id)
def get(self, request, corpus_id, doc_id):
"""Get All for a doc id"""
corpus_id = int(corpus_id)
doc_id = int(doc_id)
lists = dict()
for list_type in ['MiamList', 'StopList']:
list_id = list()
list_id = listIds(user_id=request.user.id, corpus_id=int(corpus_id), typeList=list_type)
lists["%s" % list_id[0][0]] = list_type
# ngrams of list_id of corpus_id:
doc_ngram_miam_list = listNgramIds(list_id=miamList_id, doc_id=doc_id)
# now you can model your dict as you want (for doc or corpus level):
ngram_id, ngram_text, ngram_occurrences = doc_ngram_miam_list[0]
doc_ngram_list = listNgramIds(corpus_id=corpus_id, doc_id=doc_id, user_id=request.user.id)
#doc_ngram_list = [(1, 'miam', 2, 1931), (2, 'stop', 2, 1932), (3, 'Potassium channels', 4, 1931)]
data = { '%s' % list_id : { '%s' % doc_id : [
{
'uuid': '1',
'text': 'what',
'category': 'stoplist',
'level': 'global',
'occurrences': 1
},
{
'uuid': '2',
'text': 'rotations',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '3',
'text': 'etsy',
'category': 'stoplist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '4',
'text': 'employees',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '5',
'text': '2010',
'category': 'stoplist',
'level': 'global',
'occurrences': 1
},
{
'uuid': '6',
'text': 'stoplist keyword',
'category': 'stoplist',
'level': 'local',
'occurrences': 255
},
{
'uuid': '7',
'text': 'another stoplist keyword',
'category': 'stoplist',
'level': 'local',
'occurrences': 23
},
{
'uuid': '8',
'text': 'dmc-gm5',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
data = { '%s' % corpus_id : {
'%s' % doc_id : [
{
'uuid': '9',
'text': 'scale of the GM-series',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '10',
'text': 'engineering rotations',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '11',
'text': 'pixel electronic viewfinder',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '12',
'text': 'viewfinder',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '13',
'text': 'pixel electronic',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '14',
'text': 'GM',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '15',
'text': 'support rotations',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '16',
'text': 'miamlist keyword',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '17',
'text': 'miamlist keyword',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '18',
'text': 'another miamlist keyword',
'category': 'miamlist',
'level': 'local',
'occurrences': 3
'uuid': ngram_id,
'text': ngram_text,
'occurrences': ngram_occurrences,
'list_id': list_id,
}
]}}
for ngram_id, ngram_text, ngram_occurrences, list_id in doc_ngram_list],
'lists': lists
}}
return Response(data)
class Ngram(APIView):
"""Read and Write Annotations"""
class NgramEdit(APIView):
"""
Actions on one Ngram in one list
"""
renderer_classes = (JSONRenderer,)
authentication_classes = (SessionAuthentication, BasicAuthentication)
def delete(self, request, list_id, ngram_id):
def post(self, request, list_id, ngram_id):
"""
TODO Delete one annotation by id
associated with one Document (remove edge)
Add a ngram in a list
"""
doc_id = request.GET.get('docId')
annotationId = request.GET.get("annotationId")
print(annotationDict)
# TODO DB query
# Use the ngramList function in ngram.lists.py for that
# It can return True or False
ngramList(do='del', ngram_ids=[ngram_id,], list_id=list_id)
# TODO - if Ngram is in miam-list, and adding it to stop-list,
# then remove it from the previous list
list_id = int(list_id)
# format the ngram's text
ngram_text = request.data.get('annotation', {}).get('text', None)
ngram_text = ngram_text.strip().lower()
ngram_text = ' '.join(ngram_text.split())
# retrieve the ngram's id
ngram = session.query(Ngram).filter(Ngram.terms == ngram_text).first()
if ngram is None:
ngram = Ngram(n=len(ngram_text.split()), terms=ngram_text)
session.add(ngram)
session.commit()
ngram_id = ngram.id
# add the ngram to the list if not already done
node_ngram = session.query(Node_Ngram).filter(Node_Ngram.node_id==list_id).filter(Node_Ngram.ngram_id==ngram_id).first()
if node_ngram is None:
node_ngram = Node_Ngram(node_id=list_id, ngram_id=ngram_id, weight=1.0)
session.add(node_ngram)
session.commit()
ngram_occurrences = node_ngram.weight
# return the response
return Response({
'uuid': ngram_id,
'text': ngram_text,
'occurrences': ngram_occurrences,
'list_id': list_id,
})
return Response({})
def post(self, request, list_id, ngram_id):
def delete(self, request, list_id, ngram_id):
"""
TODO update one annotation (document level)
associated with one Document (add edge)
Delete a ngram from a list
"""
doc_id = request.GET.get('docId')
annotationDict = json.loads(request.POST.get("annotation"))
print(annotationDict)
session.query(Node_Ngram).filter(Node_Ngram.node_id==list_id).filter(Node_Ngram.ngram_id==ngram_id).delete()
return Response(None, 204)
# There is 2 main actions:
# 1) add ngram to the miamList : this step is tricky if the ngram does
# exist yet, it is experimental in this case.
# But according to your function, you have the ngram_id already
# The function is:
ngramList(do='add', ngram_ids=[ngram_id,], list_id=list_id)
#ngramList(do='add', ngram_ids=[ngram_id,], list_id=list_id)
# Note : depending on the list, maybe I should adapt the function to
# delete from a list when added to a specific type of list
# 2) get the list of ngrams of one miamList: for this step see above
# Use the ngramList function in ngram.lists.py for that
class Document(APIView):
"""
Read-only Document view, similar to /api/nodes/
"""
renderer_classes = (JSONRenderer,)
def get(self, request, doc_id):
"""Document by ID"""
node = session.query(Node).filter(Node.id == doc_id).first()
if node is None:
raise APIException('This node does not exist', 404)
try:
pub_date = datetime.datetime.strptime(node.hyperdata.get('publication_date'),
"%Y-%m-%d %H:%M:%S")
pub_date = pub_date.strftime("%x")
except ValueError:
pub_date = node.hyperdata.get('publication_date')
# TODO DB query
return Response(annotationDict)
data = {
'title': node.hyperdata.get('title'),
'authors': node.hyperdata.get('authors'),
'journal': node.hyperdata.get('journal'),
'publication_date': pub_date,
'full_text': node.hyperdata.get('full_text'),
'abstract_text': node.hyperdata.get('abstract'),
'id': node.id
}
return Response(data)
......@@ -562,6 +562,7 @@ class NodesList(APIView):
for node in query.all()
]})
class Nodes(APIView):
def get(self, request, node_id):
......@@ -652,39 +653,3 @@ class CorpusController:
)
else:
raise ValidationError('Unrecognized "format=%s", should be "csv" or "json"' % (format, ))
from ngram.lists import listIds, ngramList
class ListManagement(APIView):
#authentication_classes = (SessionAuthentication, BasicAuthentication)
# TODO: Be carefull need authentication!
def get(self, request, corpus_id):
user_id = session.query(User.id).filter(User.username==str(request.user)).first()[0]
lists = dict()
for list_type in ['MiamList', 'StopList']:
list_id = list()
list_id = listIds(user_id=user_id, corpus_id=int(corpus_id), typeList=list_type)
lists[list_type] = int(list_id[0][0])
# lists[list_type]['id']['name'] = r[0][1]
return JsonHttpResponse({
'MiamList' : lists['MiamList'],
'StopList' : lists['StopList']
})
def post(self, request, corpus_id):
list_id = request.POST.get('list_id')
ngram_ids = request.POST.get('ngram_ids')
ngramList(do='add', ngram_ids=ngram_ids, list_id=list_id)
def delete(self, request, corpus_id):
list_id = request.POST.get('list_id')
ngram_ids = request.POST.get('ngram_ids')
ngramList(do='del', ngram_ids=ngram_ids, list_id=list_id)
......@@ -17,6 +17,7 @@ def apply_sum(x, y):
from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf
from ngram.lists import ngrams2miam
from admin.utils import PrintException
......@@ -41,6 +42,7 @@ def apply_workflow(corpus_id):
update_processing(corpus, 3)
compute_tfidf(corpus)
ngrams2miam(user_id=corpus.user_id, corpus_id=corpus_id)
update_processing(corpus, 0)
......
......@@ -55,7 +55,6 @@ MAINTENANCE = False
TEMPLATE_DEBUG = False
TEMPLATE_DIRS = (
# Put strings here, like "/home/html/django_templates" or "C:/www/django/templates".
# Always use forward slashes
......@@ -187,6 +186,7 @@ TEMPLATE_CONTEXT_PROCESSORS = (
"django.core.context_processors.static",
)
LOGIN_URL = '/auth/'
# grappelli custom
GRAPPELLI_ADMIN_TITLE = "Gargantext"
......
from celery import shared_task
from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf
@shared_task
def apply_workflow(corpus):
parse_resources(corpus)
extract_ngrams(corpus, ['title'])
compute_tfidf(corpus)
......@@ -5,6 +5,8 @@ from django.contrib.auth.views import login
from gargantext_web import views, views_optimized
from annotations import urls as annotations_urls
from annotations.views import main as annotations_main_view
import gargantext_web.api
import scrappers.scrap_pubmed.views as pubmedscrapper
......@@ -39,6 +41,11 @@ urlpatterns = patterns('',
# Corpus management
url(r'^project/(\d+)/corpus/(\d+)/$', views.corpus),
# annotations App
url(r'^project/(\d+)/corpus/(\d+)/document/(\d+)/$', annotations_main_view),
url(r'^annotations/', include(annotations_urls)),
#
url(r'^project/(\d+)/corpus/(\d+)/corpus.csv$', views.corpus_csv),
url(r'^project/(\d+)/corpus/(tests_mvc_listdocuments+)/corpus.tests_mvc_listdocuments$', views.corpus_csv),
......@@ -63,13 +70,8 @@ urlpatterns = patterns('',
url(r'^api/nodes/(\d+)/children/queries$', gargantext_web.api.NodesChildrenQueries.as_view()),
url(r'^api/nodes/(\d+)/children/duplicates$', gargantext_web.api.NodesChildrenDuplicates.as_view()),
# url(r'^api/nodes/(\d+)/children/duplicates/delete$', gargantext_web.api.NodesChildrenDuplicates.delete ),
url(r'^api/corpus/(\d+)/lists$', gargantext_web.api.ListManagement.as_view()),
url(r'^api/nodes/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),
url(r'^annotations/', include(annotations_urls)),
# Provisory tests
url(r'^ngrams$', views.ngrams), # to be removed
url(r'^nodeinfo/(\d+)$', views.nodeinfo), # to be removed ?
......
......@@ -159,7 +159,7 @@ def project(request, project_id):
# let's start the workflow
try:
if DEBUG is False:
apply_workflow((corpus.id,),)
apply_workflow.apply_async((corpus.id,),)
else:
#apply_workflow(corpus)
thread = Thread(target=apply_workflow, args=(corpus.id, ), daemon=True)
......
......@@ -39,9 +39,9 @@ for name_, type_ in hyperdata.items():
).first()
)
if hyperdata is None:
print('Hyper Data' + name + 'does not existe, creating it')
hyperdata = Hyperdata(name=name, type=type_name)
if data is None:
print('Hyper Data' + name_ + 'does not existe, creating it')
hyperdata = Hyperdata(name=name_, type=type_)
session.add(hyperdata)
session.commit()
......
import sys
from admin.utils import PrintException
from gargantext_web.db import NodeNgram
......@@ -11,9 +10,9 @@ from sqlalchemy import desc, asc, or_, and_, Date, cast, select
from sqlalchemy import literal_column
from sqlalchemy.orm import aliased
# from gargantext_web.db import Node, get_cursor
def listIds(user_id=None, corpus_id=None, typeList='MiamList'):
def listIds(typeList=None, user_id=None, corpus_id=None):
'''
nodeList : get or create NodeList.
nodeList :: Integer -> Integer -> String -> [Node]
......@@ -22,6 +21,9 @@ def listIds(user_id=None, corpus_id=None, typeList='MiamList'):
typeList :: String, Type of the Node that should be created
[Node] :: List of Int, returned or created by the function
'''
if typeList is None:
typeList = 'MiamList'
if corpus_id is not None and user_id is not None:
# Nodes are either in root_list or user_list
......@@ -39,9 +41,7 @@ def listIds(user_id=None, corpus_id=None, typeList='MiamList'):
Node.type_id == cache.NodeType[typeList].id
).order_by(desc(Node.id)).all()
else:
print('typeList not supported yet')
sys.exit(0)
raise Exception("typeList %s not supported yet" % typeList)
if nodes == []:
node = Node(user_id = user_id,
......@@ -56,7 +56,7 @@ def listIds(user_id=None, corpus_id=None, typeList='MiamList'):
return([(node.id, node.name) for node in nodes])
else:
print("Usage (Warning): Need corpus_id and user_id")
raise Exception("Usage (Warning): Need corpus_id and user_id")
# Some functions to manage ngrams according to the lists
......@@ -75,37 +75,37 @@ def listNgramIds(list_id=None, typeList=None,
doc_id : to get specific ngrams related to a document with Node.id=doc_id
user_id : needed to create list if it does not exist
'''
if typeList is None:
typeList = ['MiamList', 'StopList']
elif isinstance(typeList, string):
typeList = [typeList]
if list_id is None :
if corpus_id is not None :
if typeList is not None :
if user_id is not None :
if list_id is None and corpus_id is None:
raise Exception('Need a listId or corpusId to query')
if user_id is None:
raise Exception("Need a user_id to create list if needed")
# iterate over every list in a corpus
try:
list_id = listIds(user_id=user_id,
corpus_id=corpus_id,
typeList=typeList)[0][0]
except:
allLists = []
for aType in typeList:
allLists += listIds(user_id=user_id, corpus_id=corpus_id, typeList=aType)
except Exception as exc:
PrintException()
else:
print('Need a user_id to create list if needed')
sys.exit()
else:
print('Need a typeList parameter')
sys.exit()
else:
print('Need a node_id to take default list of type' + typeList)
sys.exit()
else:
raise exc
ListNgram = aliased(NodeNgram)
query = (session.query(Ngram.id, Ngram.terms, func.count())
or_args = [ListNgram.node_id == l[0] for l in allLists]
query = (session.query(Ngram.id, Ngram.terms, func.count(), ListNgram.node_id)
.join(ListNgram, ListNgram.ngram_id == Ngram.id)
.filter(ListNgram.node_id == list_id)
.group_by(Ngram.id)
.filter(or_(*or_args))
.group_by(Ngram.id, ListNgram)
)
if doc_id is not None :
if doc_id is not None:
Doc = aliased(Node)
DocNgram = aliased(NodeNgram)
query = (query
.join(DocNgram, DocNgram.ngram_id == Ngram.id)
.join(Doc, Doc.id == doc_id)
......@@ -115,9 +115,9 @@ def listNgramIds(list_id=None, typeList=None,
return(query.all())
def ngramList(do=None, ngram_ids=[], list_id=None) :
def ngramList(do, list_id, ngram_ids=None) :
'''
,gramList :: ([Int], Int, String) -> Bool
ngramList :: ([Int], Int, String) -> Bool
Do (delete | add) [ngram_id] (from | to) the list_id
options:
......@@ -125,64 +125,92 @@ def ngramList(do=None, ngram_ids=[], list_id=None) :
ngram_id = [Int] : list of Ngrams id (Ngrams.id)
list_id = Int : list id (Node.id)
'''
if do is None or ngram_ids == [] or list_id is None :
print('Need more options: do, ngram_id, list_id')
sys.exit(0)
else:
results = []
if do == 'create':
terms = copy(ngram_ids)
ngram_ids = []
for ngram_term in terms:
# TODO set the language correctly
ngram = Ngram.objects.get_or_create(terms=ngram_term, n=len(terms.split()),
language='en')
ngram_ids += [ngram.id]
# TODO there should not be a try/except here, let the code crash as soon as possible
try:
node_type_id = (session.query(Node.type_id)
.filter(Node.id == list_id)
.first()
)
for ngram_id in ngram_ids:
# First we test to know if ngram exist in database already
#ngram = (session.query(Ngram).filter(Ngram.id == ngram_id).first()
# Fetch the ngram from database
ngram = session.query(Ngram.id, Ngram.terms, func.count()).filter(Ngram.id == ngram_id).first()
# Need to be optimized with list of ids
node_ngram = (session.query(NodeNgram)
.filter(NodeNgram.ngram_id == ngram_id)
.filter(NodeNgram.node_id == list_id)
.first()
)
# create NodeNgram if does not exists
if node_ngram is None :
node_ngram = NodeNgram(node_id = list_id,
ngram_id=ngram_id,
node_ngram = NodeNgram(node_id = list_id, ngram_id=ngram_id,
weight=1)
if do == 'add' :
session.add(node_ngram)
results += [ngram]
elif do == 'del' :
session.delete(node_ngram)
session.commit()
return(True)
return(results)
except:
except Exception as exc:
PrintException()
return(False)
raise exc
# Some functions to manage automatically the lists
def doStopList(user_id=None, corpus_id=None,
stop_id=None,
reset=False, limit=None
):
def doStopList(user_id=None, corpus_id=None, stop_id=None, reset=False, limit=None):
'''
Compute automatically the stopList and returns its Node.id
Algo: TODO tfidf according type of corpora
'''
if stop_id is None:
stop_id = nodeListIds(user_id=user_id,
stop_id = listNgramIds(user_id=user_id,
corpus_id=corpus_id,
typeList='StopList')[0]
# according to type of corpus, choose the right default stopList
def ngrams2miam(user_id=None, corpus_id=None):
'''
Create a Miam List only
'''
miam_id = listIds(typeList='MiamList', user_id=user_id, corpus_id=corpus_id)[0][0]
print(miam_id)
query = (session.query(
literal_column(str(miam_id)).label("node_id"),
Ngram.id,
func.count(),
)
.select_from(Ngram)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.join(Node, NodeNgram.node_id == Node.id)
.filter(Node.parent_id == corpus_id)
.filter(Node.type_id == cache.NodeType['Document'].id)
.group_by(Ngram.id)
#.limit(10)
.all()
)
bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], query)
def doList(
type_list='miam',
type_list='MiamList',
user_id=None, corpus_id=None,
miam_id=None, stop_id=None, main_id=None,
lem_id=None, stem_id=None, cvalue_id=None, group_id=None,
......@@ -206,9 +234,8 @@ def doList(
cvalue = equivalent N-Words according to C-Value (but the main form)
'''
if type_list not in ['miam', 'main']:
print('Type List supported: \'miam\' or \'main\'')
sys.exit(0)
if type_list not in ['MiamList', 'MainList']:
raise Exception("Type List (%s) not supported, try: \'MiamList\' or \'MainList\'" % type_list)
try:
list_dict = {
......@@ -228,7 +255,7 @@ def doList(
for list_ in list_dict.keys():
if list_dict[list_]['id'] is None:
list_dict[list_]['id'] = nodeListIds(user_id=user_id,
list_dict[list_]['id'] = listNgramIds(user_id=user_id,
corpus_id=corpus_id,
typeList=list_dict[list_]['type'])[0][0]
# Delete previous List ?
......@@ -243,8 +270,7 @@ def doList(
stopNgram = aliased(NodeNgram)
if 'miam' == type_list:
if type_list == 'MiamList' :
query = (session.query(
literal_column(str(list_dict['miam']['id'])).label("node_id"),
Ngram.id,
......@@ -264,7 +290,7 @@ def doList(
.group_by(Ngram.id)
)
elif 'main' == type_list:
elif type_list == 'MainList' :
# Query to get Ngrams for main list
query = (session.query(
literal_column(str(list_dict['main']['id'])).label("node_id"),
......@@ -314,4 +340,3 @@ def doList(
bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], query)
return(list_dict[type_list]['id'])
......@@ -9,7 +9,6 @@ from gargantext_web.db import *
from .parsers_config import parsers as _parsers
class DebugTime:
def __init__(self, prefix):
......@@ -29,7 +28,6 @@ class DebugTime:
# keep all the parsers in a cache
class Parsers(defaultdict):
def __init__(self):
self._parsers = _parsers
......@@ -44,9 +42,7 @@ class Parsers(defaultdict):
parsers = Parsers()
# resources management
def add_resource(corpus, **kwargs):
# only for tests
session = Session()
......@@ -83,7 +79,6 @@ def add_resource(corpus, **kwargs):
# return result
return resource
def parse_resources(corpus, user=None, user_id=None):
dbg = DebugTime('Corpus #%d - parsing' % corpus.id)
session = Session()
......@@ -102,8 +97,7 @@ def parse_resources(corpus, user=None, user_id=None):
.filter(Node_Resource.parsed == False)
)
# make a new node for every parsed document of the corpus
print("HERE MOFOs")
print(resources_query)
# print(resources_query)
dbg.show('analyze documents')
nodes = list()
for resource, resourcetype in resources_query:
......@@ -147,13 +141,13 @@ def parse_resources(corpus, user=None, user_id=None):
hyperdata.name: hyperdata
for hyperdata in session.query(Hyperdata)
}
#print('hyperdata_types', hyperdata_types)
for node in nodes:
node_id = node.id
for hyperdata_key, hyperdata_value in node.hyperdata.items():
try:
hyperdata = hyperdata_types[hyperdata_key]
except KeyError:
# Why silent continue here ?
continue
if hyperdata.type == 'string':
hyperdata_value = hyperdata_value[:255]
......@@ -163,16 +157,17 @@ def parse_resources(corpus, user=None, user_id=None):
hyperdata_value,
))
#print('I am here', node_hyperdata_lists.items())
for key, values in node_hyperdata_lists.items():
#print('here', key, values)
bulk_insert(Node_Hyperdata, ['node_id', 'hyperdata_id', 'value_'+key], values)
# mark the corpus as parsed
corpus.parsed = True
# ngrams extraction
from .NgramsExtractors import EnglishNgramsExtractor, FrenchNgramsExtractor, NgramsExtractor
class NgramsExtractors(defaultdict):
def __init__(self):
......
......@@ -118,7 +118,11 @@ function Final_UpdateTable( action ) {
// Get all the duplicates using the Django-Garg API
var current_docs = {}
var BIS_dict = {}
var corpusid = window.location.href.split("corpus")[1].replace(/\//g, '')//replace all the slashes
var path = window.location.pathname.match(/\/project\/(.*)\/corpus\/(.*)\//);
var projectid = path[1]
var corpusid = path[2]
var theurl = "/api/nodes/"+corpusid+"/children/duplicates?keys=title&limit=9999"
// $.ajax({
// url: theurl,
......@@ -231,7 +235,7 @@ function ulWriter(rowIndex, record, columns, cellWriter) {
var orig_id = parseInt(data.records[i].id)
var arr_id = parseInt(i)
RecDict[orig_id] = arr_id;
data.records[i]["name"] = '<a target="_blank" href="/nodeinfo/'+orig_id+'">'+data.records[i]["name"]+'</a>'
data.records[i]["name"] = '<a target="_blank" href="/project/'+projectid+'/corpus/'+ corpusid + '/document/'+orig_id+'">'+data.records[i]["name"]+'</a>'
data.records[i]["del"] = false
var date = data.records[i]["date"];
......
......@@ -8,7 +8,7 @@ from ngram.lists import *
#from gargantext_web.views import empty_trash
#empty_trash()
#
#user = session.query(User).all()[0]
user = session.query(User).filter(User.username=='alexandre').first()
......@@ -36,6 +36,8 @@ if project is None:
corpus = session.query(Node).filter(Node.parent_id == project.id,
Node.type_id == cache.NodeType['Corpus'].id).first()
print('Corpus is', corpus)
if corpus is None:
corpus = Node(
parent_id = project.id,
......@@ -66,14 +68,14 @@ print('Working on corpus:', corpus.id, corpus.name)
stem_id = stem_corpus(corpus_id=corpus.id)
print('Stem Node.id is', stem_id)
for typeList in ['MiamList', 'StopList', 'MainList', 'Group']:
n = listIds(user_id=user.id,
corpus_id=corpus.id,
typeList=typeList)
#print(n[0][0])
print('Test having list_id')
print(n, listNgramIds(list_id=n[0][0])[:3])
#for typeList in ['MiamList', 'StopList', 'MainList', 'Group']:
# n = listIds(user_id=user.id,
# corpus_id=corpus.id,
# typeList=typeList)
# #print(n[0][0])
# print('Test having list_id')
# print(n, listNgramIds(list_id=n[0][0])[:3])
#
stop_list_id = listIds(user_id=user.id,
corpus_id=corpus.id,
......@@ -87,30 +89,53 @@ miam_list_id = listIds(user_id=user.id,
print('Stop List', stop_list_id)
print('Miam List', miam_list_id)
ngram_id = listNgramIds(list_id=miam_list_id)[0][0]
print('ngram_id', ngram_id)
ngramList(do='add', ngram_ids=[ngram_id,], list_id=stop_list_id)
ngrams2miam(user_id=user.id, corpus_id=corpus.id)
print(listNgramIds(list_id=miam_list_id, user_id=user.id, corpus_id=corpus.id))
#type_list='MiamList'
#try:
# d = doList(type_list=type_list, user_id = user.id, corpus_id = corpus.id, limit=150)
## print('Size of the ' + type_list + ' list:',
## session.query(NodeNgram).filter(NodeNgram.node_id == d).count()
## )
#except:
# PrintException()
##
#print(listNgramIds(list_id=miam_list_id, user_id=user.id, corpus_id=corpus.id))
#
#ngram_id = listNgramIds(list_id=miam_list_id, user_id=user.id, corpus_id=corpus.id)[0][0]
#print('ngram_id', ngram_id)
#
#ngramList(do='add', ngram_ids=[ngram_id,], list_id=stop_list_id)
# print('Test having typeList and corpus.id')
# print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, user_id=user.id)[:3])
##
# print('Test having typeList and corpus.id and doc_id')
# print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, doc_id=doc_id, user_id=user.id)[:3])
#
#
#type_list='miam'
#try:
# d = doList(type_list=type_list, user_id = user.id, corpus_id = corpus.id, stem_id=stem_id, limit=150)
# print('Size of the ' + type_list + ' list:',
# session.query(NodeNgram).filter(NodeNgram.node_id == d).count()
# )
#except:
# PrintException()
#
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment