Commit 46e0c379 authored by delanoe's avatar delanoe

Merge branch 'refactoring' into refactoring-alex

parents fe1902be 07bec393
# API
Be more careful about authorizations.
# Constants
Remove "magic numbers" (such as 4096, etc.) from the code and put them in
`constants.py`.
# Projects # Projects
## Overview of all projects ## Overview of all projects
......
...@@ -89,14 +89,30 @@ class Node(Base): ...@@ -89,14 +89,30 @@ class Node(Base):
{'type': type, 'path':path, 'url':url, 'extracted': False} {'type': type, 'path':path, 'url':url, 'extracted': False}
)) ))
def status(self, action=None, progress=None): def status(self, action=None, progress=None, complete=False):
if 'status' not in self.hyperdata: """Get the status of the given action
self['status'] = MutableDict( """
{'action': action, 'progress': progress} # if the hyperdata do not have data about status
) if 'statuses' not in self.hyperdata:
else: self['statuses'] = MutableList()
if action is not None: # if no action name is given, return the last appended status
self['status']['action'] = action if action is None:
if progress is not None: for status in self['statuses']:
self['status']['progress'] = progress if not status['complete']:
return self['status'] return status
if len(self['statuses']):
return self['statuses'][-1]
return None
# retrieve the status concerning by the given action name
for status in self['statuses']:
if status['action'] == action:
if progress is not None:
status['progress'] = progress
if complete:
status['complete'] = complete
return status
# if no status has been found for the action, append a new one
self['statuses'].append(MutableDict(
{'action': action, 'progress': progress, 'complete': complete}
))
return self['statuses'][-1]
...@@ -44,7 +44,7 @@ def extract_ngrams(corpus, rule='{<JJ.*>*<NN.*>+<JJ.*>*}', keys=('title', 'abstr ...@@ -44,7 +44,7 @@ def extract_ngrams(corpus, rule='{<JJ.*>*<NN.*>+<JJ.*>*}', keys=('title', 'abstr
resource_type_index = corpus.resources()[0]['type'] resource_type_index = corpus.resources()[0]['type']
resource_type = RESOURCETYPES[resource_type_index] resource_type = RESOURCETYPES[resource_type_index]
default_language_iso2 = resource_type['default_language'] default_language_iso2 = resource_type['default_language']
for document in corpus.children('DOCUMENT'): for documents_count, document in enumerate(corpus.children('DOCUMENT')):
# get ngrams extractor for the current document # get ngrams extractor for the current document
language_iso2 = document.hyperdata.get('language_iso2', default_language_iso2) language_iso2 = document.hyperdata.get('language_iso2', default_language_iso2)
try: try:
...@@ -68,5 +68,12 @@ def extract_ngrams(corpus, rule='{<JJ.*>*<NN.*>+<JJ.*>*}', keys=('title', 'abstr ...@@ -68,5 +68,12 @@ def extract_ngrams(corpus, rule='{<JJ.*>*<NN.*>+<JJ.*>*}', keys=('title', 'abstr
_integrate_associations(nodes_ngrams_count, ngrams_data, db, cursor) _integrate_associations(nodes_ngrams_count, ngrams_data, db, cursor)
nodes_ngrams_count.clear() nodes_ngrams_count.clear()
ngrams_data.clear() ngrams_data.clear()
if documents_count % 1024 == 0:
corpus.status('ngrams_extraction', progress=documents_count+1)
corpus.save_hyperdata()
session.commit()
# integrate ngrams and nodes-ngrams # integrate ngrams and nodes-ngrams
_integrate_associations(nodes_ngrams_count, ngrams_data, db, cursor) _integrate_associations(nodes_ngrams_count, ngrams_data, db, cursor)
corpus.status('ngrams_extraction', progress=documents_count+1, complete=True)
corpus.save_hyperdata()
session.commit()
...@@ -21,12 +21,13 @@ def parse(corpus): ...@@ -21,12 +21,13 @@ def parse(corpus):
) )
session.add(document) session.add(document)
if documents_count % 64 == 0: if documents_count % 64 == 0:
corpus.status(action='parsing', progress=documents_count) corpus.status('parsing', progress=documents_count)
corpus.save_hyperdata() corpus.save_hyperdata()
session.commit()
documents_count += 1 documents_count += 1
# update info about the resource # update info about the resource
resource['extracted'] = True resource['extracted'] = True
corpus.save_hyperdata()
# commit all changes # commit all changes
corpus.status(action='parsing', progress=documents_count) corpus.status('parsing', progress=documents_count, complete=True)
corpus.save_hyperdata()
session.commit() session.commit()
...@@ -63,6 +63,9 @@ class NodeListResource(APIView): ...@@ -63,6 +63,9 @@ class NodeListResource(APIView):
'records': [dict(zip(parameters['fields'], node)) for node in query] 'records': [dict(zip(parameters['fields'], node)) for node in query]
}) })
def post(self, request):
pass
def delete(self, request): def delete(self, request):
"""Removes the list of nodes corresponding to the query. """Removes the list of nodes corresponding to the query.
WARNING! THIS IS TOTALLY UNTESTED!!!!! WARNING! THIS IS TOTALLY UNTESTED!!!!!
...@@ -76,6 +79,7 @@ class NodeListResource(APIView): ...@@ -76,6 +79,7 @@ class NodeListResource(APIView):
'count': count, 'count': count,
}, 200) }, 200)
class NodeResource(APIView): class NodeResource(APIView):
def _query(self, request, node_id): def _query(self, request, node_id):
...@@ -88,7 +92,6 @@ class NodeResource(APIView): ...@@ -88,7 +92,6 @@ class NodeResource(APIView):
return user, node return user, node
def get(self, request, node_id): def get(self, request, node_id):
from sqlalchemy import delete
user, node = self._query(request, node_id) user, node = self._query(request, node_id)
return JsonHttpResponse({ return JsonHttpResponse({
'id': node.id, 'id': node.id,
...@@ -98,6 +101,7 @@ class NodeResource(APIView): ...@@ -98,6 +101,7 @@ class NodeResource(APIView):
}) })
def delete(self, request, node_id): def delete(self, request, node_id):
from sqlalchemy import delete
user, node = self._query(request, node_id) user, node = self._query(request, node_id)
result = session.execute( result = session.execute(
delete(Node).where(Node.id == node_id) delete(Node).where(Node.id == node_id)
......
...@@ -35,7 +35,7 @@ def corpus(request, project_id, corpus_id): ...@@ -35,7 +35,7 @@ def corpus(request, project_id, corpus_id):
'date': datetime.now(), 'date': datetime.now(),
'project': project, 'project': project,
'corpus': corpus, 'corpus': corpus,
# 'processing': processing, # 'processing': corpus['extracted'],
# 'number': number, # 'number': number,
'view': 'documents' 'view': 'documents'
}, },
......
...@@ -102,10 +102,20 @@ def project(request, project_id): ...@@ -102,10 +102,20 @@ def project(request, project_id):
sourcename2corpora = defaultdict(list) sourcename2corpora = defaultdict(list)
for corpus in corpora: for corpus in corpora:
# we only consider the first resource of the corpus to determine its type # we only consider the first resource of the corpus to determine its type
corpus.count = corpus.children('DOCUMENT').count()
resource = corpus.resources()[0] resource = corpus.resources()[0]
resource_type = RESOURCETYPES[resource['type']] resource_type_name = RESOURCETYPES[resource['type']]['name']
sourcename2corpora[resource_type['name']].append(corpus) # add some data for the viewer
corpus.count = corpus.children('DOCUMENT').count()
status = corpus.status()
if status is not None and not status['complete']:
corpus.status_message = '(in progress: %s, %d complete)' % (
status['action'].replace('_', ' '),
status['progress'],
)
else:
corpus.status_message = ''
# add
sourcename2corpora[resource_type_name].append(corpus)
# source & their respective counts # source & their respective counts
total_documentscount = 0 total_documentscount = 0
sourcename2documentscount = defaultdict(int) sourcename2documentscount = defaultdict(int)
......
// check if an object is an array
if (typeof Array.isArray != 'function') {
Array.isArray = function(stuff) {
return Object.prototype.toString.call(stuff) == '[object Array]';
};
}
// CSRF token management for Django
var getCookie = function(name) {
var cookieValue = null;
if (document.cookie && document.cookie != '') {
var cookies = document.cookie.split(';');
for (var i = 0; i < cookies.length; i++) {
var cookie = jQuery.trim(cookies[i]);
// Does this cookie string begin with the name we want?
if (cookie.substring(0, name.length + 1) == (name + '=')) {
cookieValue = decodeURIComponent(cookie.substring(name.length + 1));
break;
}
}
}
return cookieValue;
}
var csrftoken = getCookie('csrftoken');
$.ajaxSetup({
beforeSend: function(xhr, settings) {
xhr.setRequestHeader("X-CSRFToken", csrftoken);
}
});
// Resource class
var Resource = function(url_path) {
// retrieve one or many items
this.get = function(criteria, callback) {
var url = url_path;
switch (typeof criteria) {
// get the list, according to the criteria passed as parameters
case 'object':
var url_parameters = '';
$.each(criteria, function(key, value) {
if (Array.isArray(value)) {
$.each(value, function(i, item) {
url_parameters += url_parameters.length ? '&' : '?';
url_parameters += encodeURIComponent(key) + '[]=' + encodeURIComponent(item);
});
} else {
url_parameters += url_parameters.length ? '&' : '?';
url_parameters += encodeURIComponent(key) + '=' + encodeURIComponent(value);
}
});
url += url_parameters;
break;
// get the list, without paramters
case 'function':
callback = criteria;
break;
case 'number':
case 'string':
url += '/' + criteria;
break;
}
$.ajax({
url: url,
type: 'GET',
success: callback
});
};
// change an item
this.change = this.update = function(item, callback) {
$.ajax({
url: url_path + '/' + item.id,
type: 'PATCH',
success: callback
});
};
// remove an item
this.delete = this.remove = function(id, callback) {
if (id.id != undefined) {
id = id.id;
}
$.ajax({
url: url_path + '/' + id,
type: 'DELETE',
success: callback
});
};
// add an item
this.add = this.append = function(value, callback) {
$.ajax({
url: url_path + '/' + id,
type: 'POST',
success: callback
});
};
};
var GarganRest = function(base_path, path_list) {
var that = this;
$.each(path_list, function(p, path){
that[path] = new Resource(base_path + path);
});
};
garganrest = new GarganRest('/api/', ['nodes']);
// var log = function(result){console.log(result);};
// garganrest.nodes.get(log);
// garganrest.nodes.get(167, log);
// garganrest.nodes.delete(167, log);
// garganrest.nodes.get({
// pagination_offset: 0,
// pagination_limit: 10,
// type: ['DOCUMENT'],
// parent_id: 2,
// }, log);
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
{% load staticfiles %} {% load staticfiles %}
<link rel="stylesheet" href="{% static "css/bootstrap.css" %}"> <link rel="stylesheet" href="{% static "css/bootstrap.css" %}">
<script type="text/javascript" src="{% static "js/jquery/jquery.min.js" %}"></script> <script type="text/javascript" src="{% static "js/jquery/jquery.min.js" %}"></script>
<script type="text/javascript" src="{% static "js/gargantext/garganrest.js" %}"></script>
<link rel="stylesheet" href="http://code.jquery.com/ui/1.11.2/themes/smoothness/jquery-ui.css"> <link rel="stylesheet" href="http://code.jquery.com/ui/1.11.2/themes/smoothness/jquery-ui.css">
<script type="text/javascript" src="{% static "js/morris.min.js" %}"></script> <script type="text/javascript" src="{% static "js/morris.min.js" %}"></script>
...@@ -75,22 +76,27 @@ ...@@ -75,22 +76,27 @@
{{ key }} {{ key }}
<ul> <ul>
{% for corpus in corpora %} {% for corpus in corpora %}
<li> <li id="corpus_{{corpus.id}}">
{% ifequal corpus.processing 1 %} {% ifequal corpus.processing 1 %}
{{corpus.name}}: {{corpus.name}}:
<img width="20px" src="{% static "js/libs/img2/loading-bar.gif" %}"> <img width="20px" src="{% static "js/libs/img2/loading-bar.gif" %}">
Processing, drink a cup of tea, and refresh the page :) Processing, drink a cup of tea, and refresh the page :)
{% else %} {% else %}
<a href="/projects/{{project.id}}/corpora/{{corpus.id}}"> {{corpus.name}} , {{ corpus.count }} documents</a> <a href="/projects/{{project.id}}/corpora/{{corpus.id}}">
{{corpus.name}}, {{ corpus.count }} documents {{ corpus.status_message }}
</a>
{% endifequal %} {% endifequal %}
<button type="button" class="btn btn-xs btn-default" data-container="body" data-toggle="popover" data-placement="bottom" <button type="button" class="btn btn-xs btn-default" data-container="body" data-toggle="popover" data-placement="bottom"
data-content=' data-content="
<ul> <ul>
<li> Rename </li> <li>Rename</li>
<li> Add new documents </li> <li>Add new documents</li>
<li><a href="/delete/{{corpus.id}}">Delete</a></li> <li onclick=&quot;
garganrest.nodes.delete({{corpus.id}}, function(){$('#corpus_'+{{corpus.id}}).remove()});
$(this).parent().parent().remove();
&quot;><a href=&quot;#&quot;>Delete</a></li>
</ul> </ul>
'>Manage</button> ">Manage</button>
</li> </li>
{% endfor %} {% endfor %}
</ul> </ul>
...@@ -104,7 +110,9 @@ ...@@ -104,7 +110,9 @@
<h3> <h3>
<a href="/projects/{{project.id}}/corpora/{{corpus.id}}">{{corpus.name}}</a> <a href="/projects/{{project.id}}/corpora/{{corpus.id}}">{{corpus.name}}</a>
</h3> </h3>
<h4>{{ corpus.count }} Documents </h4> <h4>
{{ corpus.count }} documents
</h4>
<h5>Activity:</h5> <h5>Activity:</h5>
<div class="chart" data-percent="73">73%</div> <div class="chart" data-percent="73">73%</div>
</div> </div>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment