Commit 08539c33 authored by PkSM3's avatar PkSM3

Merge branch 'unstable' of ssh://delanoe.org:1979/gargantext into samuel

parents 08dc8379 addc6e19
from collections import defaultdict
class Translations:
def __init__(self, other=None):
if other is None:
self.items = defaultdict(int)
self.groups = defaultdict(set)
elif isinstance(other, Translations):
self.items = other.items.copy()
self.groups = other.groups.copy()
elif hasattr(other, '__iter__'):
self.items = defaultdict(int, other)
self.groups = defaultdict(set)
for key, value in self.items.items():
self.groups[value].add(key)
else:
raise TypeError
def __add__(self, other):
result = self.__class__(self)
result.items.update(other)
for key, value in other.groups:
result.groups[key] += value
return result
def __sub__(self, other):
result = self.__class__(self)
if isinstance(other, Translations):
for key, value in other.items.items():
result.items.pop(key, None)
result.groups[value].remove(key)
if len(result.groups[value]) == 0:
result.groups.pop(value)
return result
def __iter__(self):
for key, value in self.items.items():
yield key, value
class WeightedMatrix:
def __init__(self, other=None):
if other is None:
self.items = defaultdict(lambda: defaultdict(float))
elif isinstance(other, WeightedMatrix):
self.items = other.items.copy()
elif hasattr(other, '__iter__'):
self.items = defaultdict(lambda: defaultdict(float))
for row in other:
self.items[other[0]][other[1]] = [other[2]]
else:
raise TypeError
def __iter__(self):
for key1, key2_value in self.items.items():
for key2, value in key2_value.items():
yield key1, key2, value
def __sub__(self, other):
"""Remove elements of the other list from the current one
Can only be substracted to another list of coocurrences.
"""
pass
def __mul__(self, other):
if isinstance(other, Translations):
result = WeightedMatrix()
for key1, key2_value in self.items.items():
for key2, value in self.items:
result.items[
other.items.get(key, key)
] = value
else:
raise TypeError
return result
class UnweightedList:
def __init__(self, other=None):
if other is None:
self.items = set()
elif isinstance(other, WeightedList):
self.items = set(other.items.keys())
elif isinstance(other, UnweightedList):
self.items = other.items.copy()
elif hasattr(other, '__iter__'):
items = (item for item in other)
if len(items) == 0:
self.items = set()
else:
if hasattr(items[0], '__iter__'):
self.items = set(item[0] for item in items)
else:
self.items = set(item for item in items)
else:
raise TypeError
def __add__(self, other):
result = self.__class__(self)
if isinstance(other, UnweightedList):
result.items |= other.items
elif isinstance(other, WeightedList):
result.items |= set(other.items.keys())
else:
raise TypeError
return result
__or__ = __add__
def __sub__(self, other):
result = self.__class__(self)
if isinstance(other, UnweightedList):
result.items -= other.items
elif isinstance(other, WeightedList):
result.items -= set(other.items.keys())
else:
raise TypeError
return result
def __and__(self, other):
result = self.__class__(self)
if isinstance(other, UnweightedList):
result.items &= other.items
elif isinstance(other, WeightedList):
result.items &= set(other.items.keys())
else:
raise TypeError
return result
class WeightedList:
def __init__(self, other=None):
if other is None:
self.items = defaultdict(float)
elif isinstance(other, WeightedList):
self.items = other.items.copy()
elif isinstance(other, UnweightedList):
self.items = defaultdict(float)
for key in other.items:
self.items[key] = 1.0
elif hasattr(other, '__iter__'):
self.items = defaultdict(float, items)
else:
raise TypeError
def __iter__(self):
for key, value in self.items.items():
yield key, value
def __add__(self, other):
"""Add elements from the other list to the current one
"""
result = self.__class__(self)
if isinstance(other, UnweightedList):
for key, value in other.items:
result.items[key] += 1.0
elif isinstance(other, WeightedList):
for key, value in other.items:
result.items[key] += value
else:
raise TypeError
return result
def __sub__(self, other):
"""Remove elements of the other list from the current one
"""
result = self.__class__(self)
if isinstance(other, UnweightedList):
for key in other.items:
result.items.pop(key, None)
else:
raise TypeError
return result
def __and__(self, other):
if isinstance(other, UnweightedList):
result = defaultdict(float)
for key, value in self.items.items():
if item in other.items:
result[key] = value
else:
raise TypeError
return result
def __mul__(self, other):
if isinstance(other, Translations):
result = WeightedList()
for key, value in self.items:
result.items[
other.items.get(key, key)
] += value
else:
raise TypeError
return result
# if __name__ == '__main__':
# l = Coocurrences()
# l = List()
# for i in l:
# print(i)
# t1 = Translations()
# t2 = Translations()
# t2.items = {1: 2}
# for i in t1 + t2:
# print(i)
......@@ -8,7 +8,8 @@
"angular": "~1.2.x",
"angular-loader": "~1.2.x",
"angular-resource": "~1.2.x",
"bootstrap": "~3.x"
"bootstrap": "~3.x",
"angular-cookies": "1.2"
},
"resolutions": {
"angular": "~1.2.x"
......
......@@ -46,7 +46,7 @@
}
.main-panel, .text-panel, .words-panel {
height: 400px;
height: 800px;
margin: 10px 0px;
}
......
This diff is collapsed.
(function () {
'use strict';
var http = angular.module('annotationsAppHttp', ['ngResource']);
var http = angular.module('annotationsAppHttp', ['ngResource', 'ngCookies']);
/*
* Read Document
*/
http.factory('DocumentHttpService', function($resource) {
return $resource(
window.ANNOTATION_API_URL + "document" + '/:docId/',
{
docId: '@docId'
},
{
get: {
method: 'GET',
params: {docId: '@docId'}
}
http.config(['$httpProvider', function($httpProvider){
$httpProvider.defaults.xsrfHeaderName = 'X-CSRFToken';
$httpProvider.defaults.xsrfCookieName = 'csrftoken';
}]);
/*
* Read Document
*/
http.factory('DocumentHttpService', function($resource) {
return $resource(
window.ANNOTATION_API_URL + "document/:docId/",
{
docId: '@docId'
},
{
get: {
method: 'GET',
params: {docId: '@docId'}
}
);
});
}
);
});
/*
* Read Ngram Lists
*/
http.factory('NgramListHttpService', function ($resource) {
return $resource(
window.ANNOTATION_API_URL + 'lists' + '/:listId/',
{
listId: '@listId'
},
{
get: {
method: 'GET',
params: {listId: '@listId'}
}
}
);
});
/*
* Read all Ngrams
*/
http.factory('NgramListHttpService', function ($resource) {
return $resource(
window.ANNOTATION_API_URL + 'corpus/:corpusId/document/:docId',
{
corpusId: '@corpusId',
docId: '@docId'
},
{
get: {
method: 'GET',
params: {}
}
}
);
});
/*
* Create, modify or delete on Ngram of a list
*/
http.factory('NgramHttpService', function ($resource) {
return $resource(
window.ANNOTATION_API_URL + 'lists' + '/:listId/ngrams/' + ':ngramId/',
{
listId: '@listId'
/*
* Create, modify or delete 1 Ngram
*/
http.factory('NgramHttpService', function ($resource) {
return $resource(
window.ANNOTATION_API_URL + 'lists/:listId/ngrams/:ngramId',
{
listId: '@listId',
ngramId: '@id'
},
{
post: {
method: 'POST',
params: {'listId': '@listId', 'ngramId': ''}
},
{
post: {
method: 'POST',
params: {'listId': '@listId', 'ngramId': '@ngramId'}
},
delete: {
method: 'DELETE',
params: {'listId': '@listId', 'ngramId': '@ngramId'}
}
delete: {
method: 'DELETE',
params: {'listId': '@listId', 'ngramId': '@id'}
}
);
});
// return {
// newAnnotationObject: function(text, category, level) {
// return {
// 'text': text.trim(),
// 'category': category,
// 'level': level
// };
// },
// create: function(keyword, $rootScope) {
// if ($rootScope.annotations === undefined) $rootScope.annotations = [];
// // find duplicate by text
// var existing = _.find(
// $rootScope.annotations,
// function(annotation) { return annotation.text.trim().toLowerCase() === keyword.text.trim().toLowerCase(); }
// );
// // delete existing conflicting data before adding new
// if (existing) {
// if (existing.category == keyword.category && existing.level == keyword.level) return;
// this.delete(existing, $rootScope);
// }
// // TODO remove server mocking
// var mock = _.extend(keyword, {
// 'uuid': jQuery.now().toString(),
// 'occurrences': 322
// });
//
// $timeout(function() {
// $rootScope.$apply(function() {
// $rootScope.annotations.push(mock);
// });
// });
//
// return mock;
// },
// delete: function(keyword, $rootScope) {
// var filtered = _.filter($rootScope.annotations, function(item) {
// if (item.uuid == keyword.uuid) {
// return false;
// } else {
// return true;
// }
// });
// $timeout(function() {
// $rootScope.$apply(function() {
// $rootScope.annotations = filtered;
// });
// });
// }
// };
}
);
});
})(window);
<span ng-if="keyword.category == 'miamlist'" ng-click='onDeleteClick()' class="delete-keyword" data-keyword-id="{{keyword.uuid}}" data-keyword-text="{{keyword.text}}" data-keyword-category="miamlist">×</span>
<a ng-if="keyword.category == 'miamlist'" href="#" data-toggle="tooltip" class="keyword miamword">{{keyword.text}}</a>
<span ng-if="keyword.category == 'stoplist'" ng-click='onDeleteClick()' class="delete-keyword" data-keyword-id="{{keyword.uuid}}" data-keyword-text="{{keyword.text}}" data-keyword-category="stoplist">×</span>
<a ng-if="keyword.category == 'stoplist'" href="#" data-toggle="tooltip" class="keyword stopword">{{keyword.text}}</a>
<span class="occurrences" data-keyword-id="{{keyword.uuid}}">{{keyword.occurrences}}</span>
<span ng-if="keyword.category == 'miamlist'" ng-click='onDeleteClick()' class="delete-keyword" data-keyword-id="{[{keyword.uuid}]}" data-keyword-text="{[{keyword.text}]}" data-keyword-category="miamlist">×</span>
<span ng-if="keyword.category == 'miamlist'" data-toggle="tooltip" class="keyword miamword">{[{keyword.text}]}</span>
<span ng-if="keyword.category == 'stoplist'" ng-click='onDeleteClick()' class="delete-keyword" data-keyword-id="{[{keyword.uuid}]}" data-keyword-text="{[{keyword.text}]}" data-keyword-category="stoplist">×</span>
<span ng-if="keyword.category == 'stoplist'" data-toggle="tooltip" class="keyword stopword">{[{keyword.text}]}</span>
<span class="occurrences" data-keyword-id="{[{keyword.uuid}]}">{[{keyword.occurrences}]}</span>
......@@ -24,6 +24,7 @@ $script([
//'bower_components/angular-route/angular-route.js',
], function() {
$script([
S + 'bower_components/angular-cookies/angular-cookies.min.js',
S + 'bower_components/angular-resource/angular-resource.min.js'], function() {
$script([S + 'annotations/http.js', S + 'annotations/app.js'], function() {
// when all is done, execute bootstrap angular application (replace ng-app directive)
......
<ul class="noselection">
<li>{{level}}<span ng-if="category !== null"> {{category}}</span></li>
<li class="miamword" ng-if="local_miamlist === true" ng-click="onClick($event, 'post', 'miamlist', 'local')">add to miam-list</li>
<li class="miamword" ng-if="local_miamlist === false" ng-click="onClick($event, 'delete', 'miamlist', 'local')">remove from miam-list</li>
<li>{[{level}]}<span ng-if="category !== null"> {[{category}]}</span></li>
<li class="miamword" ng-if="local_miamlist === true" ng-click="onClick($event, 'post', miamListId, 'local')">add to miam-list</li>
<li class="miamword" ng-if="local_miamlist === false" ng-click="onClick($event, 'delete', miamListId, 'local')">remove from miam-list</li>
<li class="stopword" ng-if="local_stoplist === true" ng-click="onClick($event, 'post', 'stoplist', 'local')">add to local stop-list</li>
<li class="stopword" ng-if="local_stoplist === false" ng-click="onClick($event, 'delete', 'stoplist', 'local')">remove from local stop-list</li>
<li class="stopword" ng-if="local_stoplist === true" ng-click="onClick($event, 'post', stopListId, 'local')">add to local stop-list</li>
<li class="stopword" ng-if="local_stoplist === false" ng-click="onClick($event, 'delete', stopListId, 'local')">remove from local stop-list</li>
<li class="stopword" ng-if="global_stoplist === true" ng-click="onClick($event, 'post', 'stoplist', 'global')">add to global stop-list</li>
<li class="stopword" ng-if="global_stoplist === false" ng-click="onClick($event, 'delete', 'stoplist', 'global')">remove from global stop-list</li>
<!--<li class="stopword" ng-if="global_stoplist === true" ng-click="onClick($event, 'post', 'stoplist', 'global')">add to global stop-list</li>
<li class="stopword" ng-if="global_stoplist === false" ng-click="onClick($event, 'delete', 'stoplist', 'global')">remove from global stop-list</li>-->
</ul>
/*
AngularJS v1.2.28
(c) 2010-2014 Google, Inc. http://angularjs.org
License: MIT
*/
(function(p,f,n){'use strict';f.module("ngCookies",["ng"]).factory("$cookies",["$rootScope","$browser",function(e,b){var c={},g={},h,k=!1,l=f.copy,m=f.isUndefined;b.addPollFn(function(){var a=b.cookies();h!=a&&(h=a,l(a,g),l(a,c),k&&e.$apply())})();k=!0;e.$watch(function(){var a,d,e;for(a in g)m(c[a])&&b.cookies(a,n);for(a in c)d=c[a],f.isString(d)||(d=""+d,c[a]=d),d!==g[a]&&(b.cookies(a,d),e=!0);if(e)for(a in d=b.cookies(),c)c[a]!==d[a]&&(m(d[a])?delete c[a]:c[a]=d[a])});return c}]).factory("$cookieStore",
["$cookies",function(e){return{get:function(b){return(b=e[b])?f.fromJson(b):b},put:function(b,c){e[b]=f.toJson(c)},remove:function(b){delete e[b]}}}])})(window,window.angular);
//# sourceMappingURL=angular-cookies.min.js.map
......@@ -23,14 +23,14 @@
<div class="container-fluid">
<div class="row-fluid main-panel" ng-controller="IntraTextController">
<div class="col-md-4 col-xs-4 tabbable words-panel">
<ul class="nav nav-tabs">
<li class="active"><a href="#tab1" data-toggle="tab">Miamwords</a></li>
<li><a href="#tab2" data-toggle="tab">Local stopwords</a></li>
<ul class="nav nav-pills nav-justified">
<li class="active"><a href="#tab1" data-toggle="tab"><span class="glyphicon glyphicon-tags"></span>&nbsp;&nbsp;Miamwords</a></li>
<!--<li><a href="#tab2" data-toggle="tab">Local stopwords</a></li>-->
</ul>
<div class="tab-content">
<div class="tab-pane active" id="tab1">
<ul class="list-group words-list">
<div ng-if="extra_miamlist.length == 0" class="alert alert-info" role="alert">No extra-text miam-word yet</div>
<div ng-if="extra_miamlist.length == 0" class="alert alert-info" role="alert">No extra text miam-word yet</div>
<li ng-repeat="keyword in extra_miamlist | startFrom:currentMiamPage*pageSize | limitTo:pageSize" class="list-group-item">
<div ng-controller="ExtraAnnotationController" keyword-template class="keyword-container"></div>
......@@ -47,7 +47,7 @@
<button type="submit" class="btn btn-default btn-primary" ng-click="onMiamlistSubmit($event)">Add</button>
</div>
</div>
<div class="tab-pane" id="tab2">
<!--<div class="tab-pane" id="tab2">
<ul class="list-group words-list clearfix">
<div ng-if="extra_stoplist.length == 0" class="alert alert-info" role="alert">No extra-text stop-word yet</div>
<li ng-repeat="keyword in extra_stoplist | startFrom:currentStopPage*pageSize | limitTo:pageSize" class="list-group-item"><div ng-controller="ExtraAnnotationController" keyword-template></div></li>
......@@ -62,7 +62,7 @@
<input type="text" class="form-control" id="stoplist-input" ng-keypress="onStoplistSubmit($event)">
<button type="submit" class="btn btn-default" ng-click="onStoplistSubmit($event)">Exclude</button>
</div>
</div>
</div>-->
</div>
</div>
<div class="col-md-8 col-xs-8 text-panel" ng-controller="DocController" id="document">
......@@ -86,10 +86,14 @@
<li class="active pull-right">{[{publication_date}]}</li>
</ul>
</div>
<h4>Abstract</h4>
<p id="abstract-text" class="text-container"></p>
<h4>Article</h4>
<p id="full-text" class="text-container"></p>
<h4 ng-if="abstract_text != null">Abstract</h4>
<p id="abstract-text" class="text-container">
<div ng-if="abstract_text == null" class="alert alert-info" role="alert">No abstract text</div>
</p>
<h4 ng-if="full_text != null">Full Article</h4>
<p id="full-text" class="text-container">
<div ng-if="full_text == null" class="alert alert-info" role="alert">No full text</div>
</p>
</div>
</div> <!-- end of the main row -->
</div>
......@@ -100,8 +104,10 @@
<p class="browsehappy">You are using an <strong>outdated</strong> browser. Please <a href="http://browsehappy.com/">upgrade your browser</a> to improve your experience.</p>
<![endif]-->
<script type="application/javascript">
/* Constants required for annotations app JS to work */
window.STATIC_URL = "{% static '' %}";
window.ANNOTATION_API_URL = "{{ api_url }}";
window.NODES_API_URL = "{{ nodes_api_url }}";
</script>
<script src="{% static 'annotations/main.js' %}"></script>
......
......@@ -3,9 +3,7 @@ from annotations import views
urlpatterns = patterns('',
url(r'^demo/$', views.demo),
url(r'^document/(?P<doc_id>[0-9]+)$', views.Document.as_view()), # document view
#url(r'^document/(?P<doc_id>[0-9]+)/ngrams/(?P<ngram_id>[0-9]+)$', views.DocumentNgram.as_view()), # actions on ngram from a document
url(r'^lists/(?P<list_id>[0-9]+)$', views.NgramList.as_view()), # actions on list filtered by document
url(r'^lists/(?P<list_id>[0-9]+)/ngrams(?:/(?P<ngram_id>[0-9]+))?$', views.Ngram.as_view()), # actions on ngram from a list optionally filtered by document
url(r'^corpus/(?P<corpus_id>[0-9]+)/document/(?P<doc_id>[0-9]+)$', views.NgramList.as_view()), # the list associated with an ngram
url(r'^lists/(?P<list_id>[0-9]+)/ngrams(?:/(?P<ngram_id>[0-9]+))?$', views.NgramEdit.as_view()), #
)
This diff is collapsed.
......@@ -562,6 +562,7 @@ class NodesList(APIView):
for node in query.all()
]})
class Nodes(APIView):
def get(self, request, node_id):
......@@ -652,39 +653,3 @@ class CorpusController:
)
else:
raise ValidationError('Unrecognized "format=%s", should be "csv" or "json"' % (format, ))
from ngram.lists import listIds, ngramList
class ListManagement(APIView):
#authentication_classes = (SessionAuthentication, BasicAuthentication)
# TODO: Be carefull need authentication!
def get(self, request, corpus_id):
user_id = session.query(User.id).filter(User.username==str(request.user)).first()[0]
lists = dict()
for list_type in ['MiamList', 'StopList']:
list_id = list()
list_id = listIds(user_id=user_id, corpus_id=int(corpus_id), typeList=list_type)
lists[list_type] = int(list_id[0][0])
# lists[list_type]['id']['name'] = r[0][1]
return JsonHttpResponse({
'MiamList' : lists['MiamList'],
'StopList' : lists['StopList']
})
def post(self, request, corpus_id):
list_id = request.POST.get('list_id')
ngram_ids = request.POST.get('ngram_ids')
ngramList(do='add', ngram_ids=ngram_ids, list_id=list_id)
def delete(self, request, corpus_id):
list_id = request.POST.get('list_id')
ngram_ids = request.POST.get('ngram_ids')
ngramList(do='del', ngram_ids=ngram_ids, list_id=list_id)
......@@ -17,6 +17,7 @@ def apply_sum(x, y):
from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf
from ngram.lists import ngrams2miam
from admin.utils import PrintException
......@@ -34,13 +35,14 @@ def apply_workflow(corpus_id):
update_processing(corpus, 1)
parse_resources(corpus)
update_processing(corpus, 2)
extract_ngrams(corpus, ['title', 'abstract'])
update_processing(corpus, 3)
compute_tfidf(corpus)
ngrams2miam(user_id=corpus.user_id, corpus_id=corpus_id)
update_processing(corpus, 0)
......
......@@ -55,7 +55,6 @@ MAINTENANCE = False
TEMPLATE_DEBUG = False
TEMPLATE_DIRS = (
# Put strings here, like "/home/html/django_templates" or "C:/www/django/templates".
# Always use forward slashes
......@@ -187,6 +186,7 @@ TEMPLATE_CONTEXT_PROCESSORS = (
"django.core.context_processors.static",
)
LOGIN_URL = '/auth/'
# grappelli custom
GRAPPELLI_ADMIN_TITLE = "Gargantext"
......
from celery import shared_task
from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf
@shared_task
def apply_workflow(corpus):
parse_resources(corpus)
extract_ngrams(corpus, ['title'])
compute_tfidf(corpus)
......@@ -5,6 +5,8 @@ from django.contrib.auth.views import login
from gargantext_web import views, views_optimized
from annotations import urls as annotations_urls
from annotations.views import main as annotations_main_view
import gargantext_web.api
import scrappers.scrap_pubmed.views as pubmedscrapper
......@@ -39,6 +41,11 @@ urlpatterns = patterns('',
# Corpus management
url(r'^project/(\d+)/corpus/(\d+)/$', views.corpus),
# annotations App
url(r'^project/(\d+)/corpus/(\d+)/document/(\d+)/$', annotations_main_view),
url(r'^annotations/', include(annotations_urls)),
#
url(r'^project/(\d+)/corpus/(\d+)/corpus.csv$', views.corpus_csv),
url(r'^project/(\d+)/corpus/(tests_mvc_listdocuments+)/corpus.tests_mvc_listdocuments$', views.corpus_csv),
......@@ -63,13 +70,8 @@ urlpatterns = patterns('',
url(r'^api/nodes/(\d+)/children/queries$', gargantext_web.api.NodesChildrenQueries.as_view()),
url(r'^api/nodes/(\d+)/children/duplicates$', gargantext_web.api.NodesChildrenDuplicates.as_view()),
# url(r'^api/nodes/(\d+)/children/duplicates/delete$', gargantext_web.api.NodesChildrenDuplicates.delete ),
url(r'^api/corpus/(\d+)/lists$', gargantext_web.api.ListManagement.as_view()),
url(r'^api/nodes/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),
url(r'^annotations/', include(annotations_urls)),
# Provisory tests
url(r'^ngrams$', views.ngrams), # to be removed
url(r'^nodeinfo/(\d+)$', views.nodeinfo), # to be removed ?
......@@ -100,12 +102,12 @@ if settings.DEBUG:
if settings.MAINTENANCE:
urlpatterns = patterns('',
url(r'^img/logo.svg$', views.logo),
url(r'^css/bootstrap.css$', views.css),
url(r'^img/logo.svg$', views.logo),
url(r'^css/bootstrap.css$', views.css),
url(r'^$', views.home_view),
url(r'^about/', views.get_about),
url(r'^admin/', include(admin.site.urls)),
url(r'^$', views.home_view),
url(r'^about/', views.get_about),
url(r'^admin/', include(admin.site.urls)),
url(r'^.*', views.get_maintenance),
url(r'^.*', views.get_maintenance),
)
......@@ -36,7 +36,7 @@ def project(request, project_id):
project_id = int(project_id)
except ValueError:
raise Http404()
# do we have a valid project?
project = (session
.query(Node)
......@@ -74,7 +74,7 @@ def project(request, project_id):
documents_count_by_resourcetype = defaultdict(int)
corpora_count = 0
corpusID_dict = {}
for corpus_id, corpus_name, document_count, processing in corpus_query:
#print(corpus_id, processing)
......@@ -84,7 +84,7 @@ def project(request, project_id):
.join(Node, Node.id == Node_Resource.node_id )
.filter(Node.id==corpus_id)
.first())[0]
if not corpus_id in corpusID_dict:
if resource_type_id is None:
resourcetype_name = '(no resource)'
......@@ -104,7 +104,7 @@ def project(request, project_id):
# do the donut
total_documents_count = sum(documents_count_by_resourcetype.values())
donut = [
{ 'source': re.sub(' \(.*$', '', key),
{ 'source': re.sub(' \(.*$', '', key),
'count': value,
'part' : round(value * 100 / total_documents_count) if total_documents_count else 0,
}
......@@ -116,12 +116,12 @@ def project(request, project_id):
# form validation
form = CustomForm(request.POST, request.FILES)
if form.is_valid():
# extract information from the form
name = form.cleaned_data['name']
thefile = form.cleaned_data['file']
resourcetype = cache.ResourceType[form.cleaned_data['type']]
# which default language shall be used?
if resourcetype.name == "Europress (French)":
language_id = cache.Language['fr'].id
......@@ -129,7 +129,7 @@ def project(request, project_id):
language_id = cache.Language['en'].id
else:
language_id = None
# corpus node instanciation as a Django model
corpus = Node(
name = name,
......@@ -141,10 +141,10 @@ def project(request, project_id):
)
session.add(corpus)
session.commit()
# If user is new, folder does not exist yet, create it then
ensure_dir(request.user)
# Save the uploaded file
filepath = '%s/corpora/%s/%s' % (MEDIA_ROOT, request.user.username, thefile._name)
f = open(filepath, 'wb')
......@@ -159,7 +159,7 @@ def project(request, project_id):
# let's start the workflow
try:
if DEBUG is False:
apply_workflow((corpus.id,),)
apply_workflow.apply_async((corpus.id,),)
else:
#apply_workflow(corpus)
thread = Thread(target=apply_workflow, args=(corpus.id, ), daemon=True)
......@@ -168,7 +168,7 @@ def project(request, project_id):
print('WORKFLOW ERROR')
print(error)
# redirect to the main project page
# TODO need to wait before response (need corpus update)
# TODO need to wait before response (need corpus update)
sleep(2)
return HttpResponseRedirect('/project/' + str(project_id))
else:
......@@ -226,5 +226,5 @@ def tfidf(request, corpus_id, ngram_ids):
nodes_list.append(node_dict)
# print("= = = = = = = = \n")
data = json.dumps(nodes_list)
data = json.dumps(nodes_list)
return JsonHttpResponse(data)
......@@ -39,9 +39,9 @@ for name_, type_ in hyperdata.items():
).first()
)
if hyperdata is None:
print('Hyper Data' + name + 'does not existe, creating it')
hyperdata = Hyperdata(name=name, type=type_name)
if data is None:
print('Hyper Data' + name_ + 'does not existe, creating it')
hyperdata = Hyperdata(name=name_, type=type_)
session.add(hyperdata)
session.commit()
......
This diff is collapsed.
......@@ -9,7 +9,6 @@ from gargantext_web.db import *
from .parsers_config import parsers as _parsers
class DebugTime:
def __init__(self, prefix):
......@@ -29,7 +28,6 @@ class DebugTime:
# keep all the parsers in a cache
class Parsers(defaultdict):
def __init__(self):
self._parsers = _parsers
......@@ -44,9 +42,7 @@ class Parsers(defaultdict):
parsers = Parsers()
# resources management
def add_resource(corpus, **kwargs):
# only for tests
session = Session()
......@@ -83,7 +79,6 @@ def add_resource(corpus, **kwargs):
# return result
return resource
def parse_resources(corpus, user=None, user_id=None):
dbg = DebugTime('Corpus #%d - parsing' % corpus.id)
session = Session()
......@@ -102,8 +97,7 @@ def parse_resources(corpus, user=None, user_id=None):
.filter(Node_Resource.parsed == False)
)
# make a new node for every parsed document of the corpus
print("HERE MOFOs")
print(resources_query)
# print(resources_query)
dbg.show('analyze documents')
nodes = list()
for resource, resourcetype in resources_query:
......@@ -147,13 +141,13 @@ def parse_resources(corpus, user=None, user_id=None):
hyperdata.name: hyperdata
for hyperdata in session.query(Hyperdata)
}
#print('hyperdata_types', hyperdata_types)
for node in nodes:
node_id = node.id
for hyperdata_key, hyperdata_value in node.hyperdata.items():
try:
hyperdata = hyperdata_types[hyperdata_key]
except KeyError:
# Why silent continue here ?
continue
if hyperdata.type == 'string':
hyperdata_value = hyperdata_value[:255]
......@@ -163,16 +157,17 @@ def parse_resources(corpus, user=None, user_id=None):
hyperdata_value,
))
#print('I am here', node_hyperdata_lists.items())
for key, values in node_hyperdata_lists.items():
#print('here', key, values)
bulk_insert(Node_Hyperdata, ['node_id', 'hyperdata_id', 'value_'+key], values)
# mark the corpus as parsed
corpus.parsed = True
# ngrams extraction
from .NgramsExtractors import EnglishNgramsExtractor, FrenchNgramsExtractor, NgramsExtractor
class NgramsExtractors(defaultdict):
def __init__(self):
......
......@@ -118,7 +118,11 @@ function Final_UpdateTable( action ) {
// Get all the duplicates using the Django-Garg API
var current_docs = {}
var BIS_dict = {}
var corpusid = window.location.href.split("corpus")[1].replace(/\//g, '')//replace all the slashes
var path = window.location.pathname.match(/\/project\/(.*)\/corpus\/(.*)\//);
var projectid = path[1]
var corpusid = path[2]
var theurl = "/api/nodes/"+corpusid+"/children/duplicates?keys=title&limit=9999"
// $.ajax({
// url: theurl,
......@@ -231,7 +235,7 @@ function ulWriter(rowIndex, record, columns, cellWriter) {
var orig_id = parseInt(data.records[i].id)
var arr_id = parseInt(i)
RecDict[orig_id] = arr_id;
data.records[i]["name"] = '<a target="_blank" href="/nodeinfo/'+orig_id+'">'+data.records[i]["name"]+'</a>'
data.records[i]["name"] = '<a target="_blank" href="/project/'+projectid+'/corpus/'+ corpusid + '/document/'+orig_id+'">'+data.records[i]["name"]+'</a>'
data.records[i]["del"] = false
var date = data.records[i]["date"];
......
......@@ -8,7 +8,7 @@ from ngram.lists import *
#from gargantext_web.views import empty_trash
#empty_trash()
#
#user = session.query(User).all()[0]
user = session.query(User).filter(User.username=='alexandre').first()
......@@ -36,6 +36,8 @@ if project is None:
corpus = session.query(Node).filter(Node.parent_id == project.id,
Node.type_id == cache.NodeType['Corpus'].id).first()
print('Corpus is', corpus)
if corpus is None:
corpus = Node(
parent_id = project.id,
......@@ -66,14 +68,14 @@ print('Working on corpus:', corpus.id, corpus.name)
stem_id = stem_corpus(corpus_id=corpus.id)
print('Stem Node.id is', stem_id)
for typeList in ['MiamList', 'StopList', 'MainList', 'Group']:
n = listIds(user_id=user.id,
corpus_id=corpus.id,
typeList=typeList)
#print(n[0][0])
print('Test having list_id')
print(n, listNgramIds(list_id=n[0][0])[:3])
#for typeList in ['MiamList', 'StopList', 'MainList', 'Group']:
# n = listIds(user_id=user.id,
# corpus_id=corpus.id,
# typeList=typeList)
# #print(n[0][0])
# print('Test having list_id')
# print(n, listNgramIds(list_id=n[0][0])[:3])
#
stop_list_id = listIds(user_id=user.id,
corpus_id=corpus.id,
......@@ -87,30 +89,53 @@ miam_list_id = listIds(user_id=user.id,
print('Stop List', stop_list_id)
print('Miam List', miam_list_id)
ngram_id = listNgramIds(list_id=miam_list_id)[0][0]
print('ngram_id', ngram_id)
ngramList(do='add', ngram_ids=[ngram_id,], list_id=stop_list_id)
ngrams2miam(user_id=user.id, corpus_id=corpus.id)
print(listNgramIds(list_id=miam_list_id, user_id=user.id, corpus_id=corpus.id))
#type_list='MiamList'
#try:
# d = doList(type_list=type_list, user_id = user.id, corpus_id = corpus.id, limit=150)
## print('Size of the ' + type_list + ' list:',
## session.query(NodeNgram).filter(NodeNgram.node_id == d).count()
## )
#except:
# PrintException()
##
#print(listNgramIds(list_id=miam_list_id, user_id=user.id, corpus_id=corpus.id))
#
#ngram_id = listNgramIds(list_id=miam_list_id, user_id=user.id, corpus_id=corpus.id)[0][0]
#print('ngram_id', ngram_id)
#
#ngramList(do='add', ngram_ids=[ngram_id,], list_id=stop_list_id)
# print('Test having typeList and corpus.id')
# print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, user_id=user.id)[:3])
##
# print('Test having typeList and corpus.id and doc_id')
# print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, doc_id=doc_id, user_id=user.id)[:3])
#
#
#type_list='miam'
#try:
# d = doList(type_list=type_list, user_id = user.id, corpus_id = corpus.id, stem_id=stem_id, limit=150)
# print('Size of the ' + type_list + ' list:',
# session.query(NodeNgram).filter(NodeNgram.node_id == d).count()
# )
#except:
# PrintException()
#
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment