Commit d9d93ae0 authored by delanoe's avatar delanoe

Merge remote-tracking branch 'origin/romain-testing' into testing

parents 4ef20306 5f4a09a7
......@@ -15,7 +15,7 @@
{'docId': $rootScope.docId},
function(data, responseHeaders) {
$scope.authors = data.authors;
$scope.journal = data.journal;
$scope.source = data.source;
$scope.publication_date = data.publication_date;
//$scope.current_page_number = data.current_page_number;
//$scope.last_page_number = data.last_page_number;
......@@ -23,25 +23,34 @@
$rootScope.docId = data.id;
$rootScope.full_text = data.full_text;
$rootScope.abstract_text = data.abstract_text;
console.log("annotations.document.DocController.getannotations")
// GET the annotationss
NgramListHttpService.get(
{
'corpusId': $rootScope.corpusId,
'docId': $rootScope.docId
},
function(data) {
$rootScope.annotations = data[$rootScope.corpusId.toString()][$rootScope.docId.toString()];
// eg id => 'MAPLIST'
$rootScope.lists = data[$rootScope.corpusId.toString()].lists;
// inverted 'MAPLIST' => id
$rootScope.listIds = _.invert($rootScope.lists)
$scope.dataLoading = false ;
},
function(data) {
console.error("unable to get the list of ngrams");
}
);
$rootScope.workflow_finished = data.corpus_status['complete'] ;
console.log("workflow status", $rootScope.workflow_finished)
if ($scope.workflow_finished) {
console.log("annotations.document.DocController.getannotations")
// GET the annotationss
NgramListHttpService.get(
{
'corpusId': $rootScope.corpusId,
'docId': $rootScope.docId
},
function(data) {
$rootScope.annotations = data[$rootScope.corpusId.toString()][$rootScope.docId.toString()];
// eg id => 'MAPLIST'
$rootScope.lists = data[$rootScope.corpusId.toString()].lists;
// inverted 'MAPLIST' => id
$rootScope.listIds = _.invert($rootScope.lists)
$scope.dataLoading = false ;
},
function(data) {
console.error("unable to get the list of ngrams");
}
);
}
else {
$scope.dataLoading = false ;
}
});
......
......@@ -24,7 +24,7 @@
<div id="annotationsApp" ng-cloak>
<div class="container-fluid">
<div class="row-fluid main-panel" ng-controller="NGramHighlightController">
<div class="col-md-4 col-xs-4 tabbable words-panel">
<div ng-if="workflow_finished" class="col-md-4 col-xs-4 tabbable words-panel">
<div class="list-selector">
<h5>Select highlighted list(s)
<select class="selectpicker" multiple ng-change="activeListsChange()" ng-model="lists" ng-controller="ActiveListsController">
......@@ -89,7 +89,7 @@
</div>
<div class="row-fluid">
<ul class="list-group clearfix">
<li class="list-group-item small"><span class="badge">journal</span>{[{journal}]}</li>
<li class="list-group-item small"><span class="badge">source</span>{[{source}]}</li>
<li class="list-group-item small"><span class="badge">authors</span>{[{authors}]}</li>
<li class="list-group-item small"><span class="badge">date</span>{[{publication_date}]}</li>
</ul>
......@@ -108,12 +108,14 @@
<span class="badge">abstract</span>
</div>
<p id="abstract-text" class="text-container">
{[{abstract_text}]}
<div ng-if="abstract_text == null" class="alert alert-info small" role="alert">Empty abstract text</div>
</p>
<div ng-if="full_text != null">
<span class="badge">full article</span>
</div>
<p id="full-text" class="text-container">
{[{full_text}]}
<div ng-if="full_text == null" class="alert alert-info small" role="alert">Empty full text</div>
</p>
</div>
......
......@@ -172,8 +172,9 @@ class Document(APIView):
def get(self, request, doc_id):
"""Document by ID"""
# implicit global session
node = session.query(Node).filter(Node.id == doc_id).first()
corpus = session.query(Node).filter(Node.id == node.parent_id).first()
corpus_workflow_status = corpus.hyperdata['statuses'][0]
if node is None:
raise APIException('This node does not exist', 404)
......@@ -185,9 +186,10 @@ class Document(APIView):
pub_date = node.hyperdata.get('publication_date')
data = {
'corpus_status': corpus_workflow_status,
'title': node.hyperdata.get('title'),
'authors': node.hyperdata.get('authors'),
'journal': node.hyperdata.get('journal'),
'source': node.hyperdata.get('source'),
'publication_date': pub_date,
'full_text': node.hyperdata.get('full_text'),
'abstract_text': node.hyperdata.get('abstract'),
......
......@@ -382,6 +382,9 @@ BATCH_NGRAMSEXTRACTION_SIZE = 3000 # how many new node-ngram relations before
QUERY_SIZE_N_MAX = 1000
QUERY_SIZE_N_DEFAULT = 1000
# Refresh corpora workflow status for project view's progressbar
PROJECT_VIEW_REFRESH_INTERVAL = 3000 # 1st refresh in ms (then increasing arithmetically)
PROJECT_VIEW_MAX_REFRESH_ATTEMPTS = 10 # how many times before we give up
# ------------------------------------------------------------------------------
# Graph <=> nodes API parameters
......
......@@ -46,6 +46,9 @@ class ModelCache(dict):
class Cache:
def __getattr__(self, key):
'''
lazy init of new modelcaches: self.Node, self.User...
'''
try:
model = getattr(models, key)
except AttributeError:
......@@ -54,4 +57,15 @@ class Cache:
setattr(self, key, modelcache)
return modelcache
def clean_all(self):
'''
re-init any existing modelcaches
'''
for modelname in self.__dict__:
old_modelcache = getattr(cache, modelname)
new_modelcache = ModelCache(old_modelcache._model)
del old_modelcache
setattr(cache, modelname, new_modelcache)
cache = Cache()
......@@ -32,6 +32,10 @@ def requires_auth(func):
from gargantext.util.db import session
session.rollback()
print("=== session rollback ok!")
# re init the global cache (it must still have detached instances)
from gargantext.util.db_cache import cache
cache.clean_all()
print("=== cache reinit ok!")
# and relogin for safety
url = '/auth/login/?next=%s' % urlencode(request.path)
return redirect(url)
......
......@@ -23,6 +23,8 @@ from gargantext.util.db import bulk_insert_ifnotexists # £TODO debug
from sqlalchemy import distinct
from re import findall, IGNORECASE
from gargantext.util.toolchain.main import t # timer
# TODO from gargantext.constants import LIST_OF_KEYS_TO_INDEX = title, abstract
def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
......@@ -40,6 +42,8 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
@param corpus: the CORPUS node
@param keys: the hyperdata fields to index
# FIXME too slow: index_new_ngrams should be faster via tsvector on DB
"""
# retrieve *all* the ngrams from our list
......@@ -56,7 +60,11 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
node_ngram_to_write = {}
# loop throught the docs and their text fields
for doc in corpus.children('DOCUMENT'):
for (i, doc) in enumerate(corpus.children('DOCUMENT')):
if (i % 100 == 0):
print('CORPUS #%d: [%s] ngrams_addition: doc %i' % (corpus.id, t(), i))
print()
# a new empty counting subdict
node_ngram_to_write[doc.id] = {}
......
......@@ -373,6 +373,10 @@ class ListChange(APIView):
def put(self, request):
"""
Adds one or more ngrams to a list.
NB: we assume ngram_ids don't contain subforms !!
(this assumption is not checked here because it would be
slow: if you want to add a subform, send the mainform's id)
"""
# union of items ----------------------------
new_list = self.base_list + self.change_list
......
......@@ -17,7 +17,11 @@ _node_available_fields = ['id', 'parent_id', 'name', 'typename', 'hyperdata', 'n
_node_default_fields = ['id', 'parent_id', 'name', 'typename']
_node_available_types = NODETYPES
#_hyperdata_available_fields = ['title', 'resourcetype']
_hyperdata_available_fields = ['title', 'source', 'abstract', 'statuses',
'language_name', 'language_iso3','language_iso2','language_id',
'publication_date',
'publication_year','publication_month', 'publication_day',
'publication_hour','publication_minute','publication_second']
#_node_available_formats = ['json', 'csv', 'bibex']
......@@ -32,24 +36,38 @@ def _query_nodes(request, node_id=None):
# parameters validation
# fixme: this validation does not allow custom keys in url (eg '?name=' for rename action)
parameters = get_parameters(request)
parameters = validate(parameters, {'type': dict, 'items': {
'formated': {'type': str, 'required' : False, 'default': 'json'},
# 'hyperdata': {'type': list, 'default' : _hyperdata_available_fields, 'items': {
# 'type': str, 'range' : _node_available_fields,
# }},
'pagination_limit': {'type': int, 'default': 10},
'pagination_offset': {'type': int, 'default': 0},
'fields': {'type': list, 'default': _node_default_fields, 'items': {
'type': str, 'range': _node_available_fields,
}},
# choice of hyperdata fields
'hyperdata_filter': {'type': list, 'required':False,
'items': {
'type': str, 'range': _hyperdata_available_fields,
}},
# optional filtering parameters
'types': {'type': list, 'required': False, 'items': {
'type': str, 'range': _node_available_types,
}},
'parent_id': {'type': int, 'required': False},
}})
# debug
# print('PARAMS', parameters)
# additional validation for hyperdata_filter
if (('hyperdata_filter' in parameters)
and (not ('hyperdata' in parameters['fields']))):
raise ValidationException("Using the hyperdata_filter filter requires fields[]=hyperdata")
# start the query
query = user.nodes()
# filter by id
if node_id is not None:
query = query.filter(Node.id == node_id)
......@@ -63,6 +81,7 @@ def _query_nodes(request, node_id=None):
count = query.count()
# order
query = query.order_by(Node.hyperdata['publication_date'], Node.id)
# paginate the query
if parameters['pagination_limit'] == -1:
query = query[parameters['pagination_offset']:]
......@@ -72,8 +91,54 @@ def _query_nodes(request, node_id=None):
parameters['pagination_limit']
]
# return the result!
# (the receiver function does the filtering of fields and hyperdata_filter)
return parameters, query, count
def _filter_node_fields(node, parameters):
"""
Filters the properties of a Node object before sending them to response
@parameters: a dict comming from get_parameters
that must only contain a 'fields' key
Usually the dict looks like this :
{'fields': ['parent_id', 'id', 'name', 'typename', 'hyperdata'],
'hyperdata_filter': ['title'], 'parent_id': '55054',
'types': ['DOCUMENT'], 'pagination_limit': '15'}
History:
1) this used to be single line:
res = {field: getattr(node, field) for field in parameters['fields']}
2) it was in both NodeResource.get() and NodeListResource.get()
3) it's now expanded to add support for parameters['hyperdata_filter']
- if absent, entire hyperdata is considered as one field
(as before)
- if present, the hyperdata subfields are picked
(new)
"""
# FIXME all this filtering
# could be done in rawsql
# (in _query_nodes)
result = {}
for field in parameters['fields']:
# normal field or entire hyperdata
if field != 'hyperdata' or (not 'hyperdata_filter' in parameters):
result[field] = getattr(node,field)
# hyperdata if needs to be filtered
else:
this_filtered_hyp = {}
for hfield in parameters['hyperdata_filter']:
if hfield in node.hyperdata:
this_filtered_hyp[hfield] = node.hyperdata[hfield]
result['hyperdata'] = this_filtered_hyp
return result
class Status(APIView):
'''API endpoint that represent the current status of the node'''
renderer_classes = (JSONRenderer, BrowsableAPIRenderer)
......@@ -84,17 +149,27 @@ class Status(APIView):
return HttpResponse('Unauthorized', status=401)
user = cache.User[request.user.id]
check_rights(request, node_id)
# check_rights(request, node_id)
# I commented check_rights because filter on user_id below does the job
node = session.query(Node).filter(Node.id == node_id, Node.user_id== user.id).first()
if node is None:
return Response({"detail":"Node not Found for this user"}, status=HTTP_404_NOT_FOUND)
else:
context = format_response(node, [n for n in node.children()])
# FIXME using the more generic strategy ---------------------------
# context = format_response(node, [n for n in node.children()])
# or perhaps ? context = format_response(None, [node])
# -----------------------------------------------------------------
# using a more direct strategy
context = {}
try:
context["status"] = node.hyperdata["statuses"]
context["statuses"] = node.hyperdata["statuses"]
except KeyError:
context["status"] = None
context["statuses"] = None
return Response(context)
def post(self, request, data):
'''create a new status for node'''
if not request.user.is_authenticated():
......@@ -102,17 +177,17 @@ class Status(APIView):
return HttpResponse('Unauthorized', status=401)
raise NotImplementedError
def put(self, request, data):
'''update status for node'''
if not request.user.is_authenticated():
# can't use @requires_auth because of positional 'self' within class
return HttpResponse('Unauthorized', status=401)
user = cache.User[request.user.id]
check_rights(request, node_id)
node = session.query(Node).filter(Node.id == node_id).first()
# check_rights(request, node_id)
node = session.query(Node).filter(Node.id == node_id, Node.user_id== user.id).first()
raise NotImplementedError
......@@ -126,8 +201,8 @@ class Status(APIView):
return HttpResponse('Unauthorized', status=401)
user = cache.User[request.user.id]
check_rights(request, node_id)
node = session.query(Node).filter(Node.id == node_id).first()
# check_rights(request, node_id)
node = session.query(Node).filter(Node.id == node_id, Node.user_id == user.id).first()
if node is None:
return Response({"detail":"Node not Found"}, status=HTTP_404_NOT_FOUND)
node.hyperdata["status"] = []
......@@ -150,16 +225,22 @@ class NodeListResource(APIView):
parameters, query, count = _query_nodes(request)
if parameters['formated'] == 'json':
records_array = []
add_record = records_array.append
# FIXME filter in rawsql in _query_nodes
for node in query:
add_record(_filter_node_fields(node, parameters))
return JsonHttpResponse({
'parameters': parameters,
'count': count,
'records': [
{ field: getattr(node, field) for field in parameters['fields'] }
for node in query
]
'records': records_array
})
elif parameters['formated'] == 'csv':
# TODO add support for fields and hyperdata_filter
response = HttpResponse(content_type='text/csv')
response['Content-Disposition'] = 'attachment; filename="Gargantext_Corpus.csv"'
......@@ -305,9 +386,8 @@ class NodeResource(APIView):
if not len(query):
raise Http404()
node = query[0]
return JsonHttpResponse({
field: getattr(node, field) for field in parameters['fields']
})
return JsonHttpResponse(_filter_node_fields(node, parameters))
# contains a check on user.id (within _query_nodes)
def delete(self, request, node_id):
......@@ -415,11 +495,11 @@ class CorpusFavorites(APIView):
(will test if docs 53 and 54 are among the favorites of corpus 2)
(returns the intersection of fav docs with [53,54])
"""
if not request.user.is_authenticated():
# can't use @requires_auth because of positional 'self' within class
return HttpResponse('Unauthorized', status=401)
fav_node = self._get_fav_node(corpus_id)
req_params = validate(
......
......@@ -54,6 +54,9 @@ def overview(request):
# projects owned by the user's contacts
'common_users': (contact for contact, projects in contacts_projects),
'common_projects': sum((projects for contact, projects in contacts_projects), []),
# status refreshing params (when active workflows)
'status_refresh_initial_interval': PROJECT_VIEW_REFRESH_INTERVAL,
'status_refresh_max_attempts': PROJECT_VIEW_MAX_REFRESH_ATTEMPTS,
},
)
......
......@@ -592,7 +592,7 @@ function Main_test(Data) {
// by default we always decide to search in the title
matchInTexts = [record.rawtitle]
// if box is checked we'll also search in the abstracts
// if box is checked we'll also search in the abstracts (todo: via ajax)
if (doAbstractsSearch) {
if (typeof record.hyperdata.abstract !== 'undefined') {
matchInTexts.push(record.hyperdata.abstract)
......@@ -630,9 +630,23 @@ function Main_test(Data) {
MyTable.data('dynatable').sorts.functions["signatureSort"] = makeAlphaSortFunctionOnProperty('signature')
MyTable.data('dynatable').sorts.functions["sourceSort"] = function sourceSort (rec1,rec2, attr, direction) {
// like rawtitle but nested property
if (direction == 1) return rec1.hyperdata.source.localeCompare(rec2.hyperdata.source)
else return rec2.hyperdata.source.localeCompare(rec1.hyperdata.source)
}
if (rec1.hyperdata && rec1.hyperdata.source
&& rec2.hyperdata && rec2.hyperdata.source) {
// the alphabetic sort
if (direction == 1) return rec1.hyperdata.source.localeCompare(rec2.hyperdata.source)
else return rec2.hyperdata.source.localeCompare(rec1.hyperdata.source)
}
else if (rec1.hyperdata && rec1.hyperdata.source) {
cmp = direction
}
else if (rec2.hyperdata && rec2.hyperdata.source) {
cmp = -direction
}
else {
cmp = 0
}
if (cmp == 0) cmp = RecDict[rec1.id] < RecDict[rec2.id] ? -1 : 1
}
// hook on page change
MyTable.bind('dynatable:page:set', tidyAfterPageSet)
......@@ -736,9 +750,20 @@ function makeAlphaSortFunctionOnProperty(property) {
return function (rec1,rec2, attr, direction) {
var cmp = null
// the alphabetic sort
if (direction == 1) cmp = rec1[property].localeCompare(rec2[property])
else cmp = rec2[property].localeCompare(rec1[property])
if (rec1[property] && rec2[property]) {
// the alphabetic sort
if (direction == 1) cmp = rec1[property].localeCompare(rec2[property])
else cmp = rec2[property].localeCompare(rec1[property])
}
else if (rec1[property]) {
cmp = direction
}
else if (rec2[property]) {
cmp = -direction
}
else {
cmp = 0
}
// second level sorting on key = id in records array
// (this one volontarily not reversable by direction
......@@ -768,7 +793,10 @@ function tidyAfterPageSet() {
$.ajax({
url: '/api/nodes?types[]=DOCUMENT&pagination_limit=-1&parent_id='
+ corpus_id
+'&fields[]=parent_id&fields[]=id&fields[]=name&fields[]=typename&fields[]=hyperdata',
+'&fields[]=parent_id&fields[]=id&fields[]=name&fields[]=typename&fields[]=hyperdata'
// +'&hyperdata_filter[]=title&hyperdata_filter[]=source&hyperdata_filter[]=language_iso2'
+'&hyperdata_filter[]=title&hyperdata_filter[]=source&hyperdata_filter[]=language_iso2&hyperdata_filter[]=abstract'
+'&hyperdata_filter[]=publication_year&hyperdata_filter[]=publication_month&hyperdata_filter[]=publication_day',
success: function(maindata){
// unfortunately favorites info is in a separate request (other nodes)
$.ajax({
......@@ -838,6 +866,10 @@ $.ajax({
rec.hyperdata.publication_day
)
// and a bool property for remote search results
// (will be updated by ajax)
rec.matched_remote_search = false // TODO use it
}
AjaxRecords = maindata.records; // backup-ing in global variable!
......
......@@ -131,7 +131,7 @@ em {
&nbsp;
<!-- save/import button -->
<button id="ImportListOrSaveAll" class="btn btn-warning" style="font-size:120%"
onclick="$('#csvimport').modal('show');">
onclick="$('#csvimport').modal('show'); document.getElementById('importsubmit').disabled = false ;">
<b>Import a Termlist</b>
</button>
</div>
......@@ -437,13 +437,22 @@ function listmergeCsvPost(theFile){
// reload after 3s
setTimeout("location.reload(true)", 3000);
},
error: function(result) {
my_html = '<h3 style="color:red">Error</h3>'
my_html += "<p class='note'>please correct your CSV file and retry</p>"
my_html += "<p>"+ result.responseJSON['err']+"</p>"
error: function(result, t) {
if (t != 'timeout') {
my_html = '<h3 style="color:red">Error</h3>'
my_html += "<p class='note'>please correct your CSV file and retry</p>"
my_html += "<p>"+ result.responseJSON['err']+"</p>"
}
else {
my_html = '<h3 style="color:red">Timeout</h3>'
my_html += "<p>The CSV import timed out.</p>"
my_html += "<p>(This bug is currently being fixed. <br/>The import and indexation are now continuing in background on our servers. Results will show in a few minutes.)</p>"
document.getElementById('importsubmit').disabled = true
}
$('#formanswer').html(my_html);
console.error(result);
},
timeout: 15000 // 15s
});
}
};
......
......@@ -21,12 +21,12 @@
<div class="col-md-3 col-md-offset-2">
<div id="monthly-move-chart">
<center>
Select a time range in the chart with blue bars to zoom in
<p align="center">
Select a time range in the chart with blue bars to zoom in
<p align="center">
<!--<a class="btn btn-xs btn-default" role="button" href="/chart/corpus/{{ corpus.id }}/data.csv">Save</a>-->
<a class="btn btn-xs btn-default" href="javascript:volumeChart.filterAll();dc.redrawAll();">Reset</a></p>
<div class="clearfix"></div>
<a class="btn btn-xs btn-default" href="javascript:volumeChart.filterAll();dc.redrawAll();">Reset</a>
</p>
<div class="clearfix"></div>
</center>
</div>
......@@ -68,10 +68,14 @@
<span style="font-size:70%;">
<span class="glyphicon glyphicon-filter" aria-hidden="true"></span>
<!-- Used by the #doubleSearch associated function -->
<input title="Search in Titles" id="searchTI" name="searchTI" type="checkbox" checked onclick="return false">TI&nbsp;
<span class="glyphicon glyphicon-filter" aria-hidden="true"></span>
<input title="Search in Abstracts" id="searchAB" name="searchAB" type="checkbox">AB
</span>&nbsp;&nbsp;
<input title="Search in Titles" id="searchTI" name="searchTI" type="checkbox" checked onclick="return false">
TI&nbsp;
</input>
<span class="glyphicon glyphicon-filter" aria-hidden="true"></span>
<input title="Search in Abstracts" id="searchAB" name="searchAB" type="checkbox">
AB&nbsp;&nbsp;
</input>
</span>
<span class="glyphicon glyphicon-filter" aria-hidden="true"></span>
<select id="docFilter" name="docFilter">
<option value="filter_all">All</option>
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment