Commit bfca4996 authored by delanoe's avatar delanoe

Merge branch 'romain-goodies' into unstable

parents fb649cfc d4b1b3a9
......@@ -616,6 +616,8 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
'map': UnweightedList,
'groupings': Translations }
if any of those lists is absent it is considered empty
@param onto_corpus: a corpus node to get the *old* lists
@param del_originals: an array of original wordlists to ignore
......@@ -694,17 +696,19 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
for list_set in [old_lists, new_lists]:
for lid, info in enumerate(linfos):
list_type = info['key']
# we use the fact that lids are ordered ints...
for ng_id in list_set[list_type].items:
if ng_id not in resolved_memberships:
resolved_memberships[ng_id] = lid
else:
# ...now resolving is simply taking the max
# stop < main < map
resolved_memberships[ng_id] = max(
lid,
resolved_memberships[ng_id]
)
# if you don't want to merge one list just don't put it in new_lists
if list_type in list_set:
# we use the fact that lids are ordered ints...
for ng_id in list_set[list_type].items:
if ng_id not in resolved_memberships:
resolved_memberships[ng_id] = lid
else:
# ...now resolving is simply taking the max
# stop < main < map
resolved_memberships[ng_id] = max(
lid,
resolved_memberships[ng_id]
)
# now each ngram is only in its most important list
# -------------------------------------------------
# NB temporarily map items are not in main anymore
......@@ -714,9 +718,6 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
# after we merge the groups
del old_lists
del new_lists['stop']
del new_lists['main']
del new_lists['map']
# ======== Merging old and new groups =========
# get the arcs already in the target DB (directed couples)
......
......@@ -30,9 +30,10 @@ class List(APIView):
class CSVLists(APIView):
"""
For CSV exports of all lists of a corpus
GET => CSV exports of all lists of a corpus
Or CSV import into existing lists as "patch"
POST => CSV import into existing lists as "post"
PATCH => internal import into existing lists (?POSSIBILITY put it in another class ?)
"""
def get(self, request):
params = get_parameters(request)
......@@ -47,23 +48,17 @@ class CSVLists(APIView):
export_ngramlists(corpus_node, fname=response, titles=True)
return response
def post(self,request):
"""
Merge the lists of a corpus with other lists from a CSV source
or from another corpus
params in request.GET:
corpus: the corpus whose lists are getting patched
onto_corpus: the corpus whose lists are getting patched
params in request.FILES:
csvsource: the csv file
or in get
dbsource: another corpus instead of the csvfile
(? this last option should perhaps not be in CSVLists ?)
NB: not using PATCH because we'll need POST file upload
csvfile: the csv file
/!\ We assume we checked the file size client-side before upload
"""
......@@ -72,7 +67,7 @@ class CSVLists(APIView):
res.status_code = 401
return res
# this time the corpus param is the one with the target lists to be patched
# the corpus with the target lists to be patched
params = get_parameters(request)
corpus_id = int(params.pop("onto_corpus"))
corpus_node = cache.Node[corpus_id]
......@@ -90,6 +85,8 @@ class CSVLists(APIView):
# import the csv
try:
new_lists = import_ngramlists(csv_file)
print("===============================!!!")
print(new_lists)
del csv_file
# merge the new_lists onto those of the target corpus
......@@ -103,6 +100,66 @@ class CSVLists(APIView):
'err': str(e),
}, 400)
def patch(self,request):
"""
A copy of POST (merging list) but with the source == just an internal corpus_id
params in request.GET:
onto_corpus: the corpus whose lists are getting patched
from: the corpus from which we take the source lists to merge in
todo: an array of the list types ("map", "main", "stop") to merge in
"""
if not request.user.is_authenticated():
res = HttpResponse("Unauthorized")
res.status_code = 401
return res
params = get_parameters(request)
print(params)
# the corpus with the target lists to be patched
corpus_id = int(params.pop("onto_corpus"))
corpus_node = cache.Node[corpus_id]
print(params)
if request.user.id != corpus_node.user_id:
res = HttpResponse("Unauthorized")
res.status_code = 401
return res
list_types = {'map':'MAPLIST', 'main':'MAINLIST', 'stop':'STOPLIST'}
# internal DB retrieve source_lists
source_corpus_id = int(params.pop("from_corpus"))
source_node = cache.Node[source_corpus_id]
todo_lists = params.pop("todo").split(',') # ex: ['map', 'stop']
source_lists = {}
for key in todo_lists:
source_lists[key] = UnweightedList(
source_node.children(list_types[key]).first().id
)
# add the groupings too
source_lists['groupings'] = Translations(
source_node.children("GROUPLIST").first().id
)
# attempt to merge and send response
try:
# merge the source_lists onto those of the target corpus
log_msg = merge_ngramlists(source_lists, onto_corpus=corpus_node)
return JsonHttpResponse({
'log': log_msg,
}, 200)
except Exception as e:
return JsonHttpResponse({
'err': str(e),
}, 400)
class GroupChange(APIView):
......
......@@ -36,6 +36,7 @@ urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view()
, url(r'^ngramlists/import$', ngramlists.CSVLists.as_view() )
# same handling class as export (CSVLists)
# but this route used only for POST + file
# or PATCH + other corpus id
, url(r'^ngramlists/change$', ngramlists.ListChange.as_view() )
# add or remove ngram from a list
......
......@@ -22,6 +22,16 @@ def ngramtable(request, project_id, corpus_id):
# and the project just for project.id in corpusBannerTop
project = cache.Node[project_id]
# retrieve all corpora of this user for list import option
# POSSIBILITY: could do same task in ajax "only if needed"
# (use api for that when merged)
corpora_infos_q = (session.query(Node.id, Node.name)
.filter(Node.typename == "CORPUS")
.filter(Node.user_id == project.user_id))
# .filter(Node.id != corpus_id)
corpora_infos = corpora_infos_q.all()
# rendered page : terms.html
return render(
template_name = 'pages/corpora/terms.html',
......@@ -36,6 +46,7 @@ def ngramtable(request, project_id, corpus_id):
'view': 'terms',
# for the CSV import modal
'csvimportroute': "/api/ngramlists/import?onto_corpus=%i"% corpus.id
'importroute': "/api/ngramlists/import?onto_corpus=%i"% corpus.id,
'corporainfos' : corpora_infos
},
)
......@@ -9,11 +9,25 @@
<style>
#formatinfos-announce {
#corpuschoose {
max-width: 75%;
}
input[type="checkbox"].importcheck {
margin-right: 1em;
}
.announce {
font-size: 12px ;
padding-top: .5em;
}
#importsubmit {
margin-left: 25%;
padding: .3em .5em;
font-size: 120% ;
}
#formatinfos {
background-color: #CCC ;
font-size: 12px ;
......@@ -47,7 +61,7 @@
font-size: 14px;
}
#formatinfos em {
em {
font-weight:bold ;
}
......@@ -122,7 +136,7 @@
<br/>
<br/>
<!-- import icon -->
<span class="needsaveicon glyphicon glyphicon-import"></span>
<span class="glyphicon glyphicon-import"></span>
&nbsp;
<button id="ImportList" class="btn btn-warning" style="font-size:120%"
onclick="$('#csvimport').modal('show');">
......@@ -176,20 +190,43 @@
<h3 id="myModalLabel">Import a Termlist</h3>
</div>
<div class="modal-body" id="uploadform">
<form id="csvimportform"
onsubmit="return postCSV(event)"
<form id="importform"
onsubmit="return submitImport(event, this)"
enctype="multipart/form-data"
method="post">
{% csrf_token %}
<label>From another corpus:</label>
<select id="corpuschoose" name="corpuschoose">
<option selected value> -- select a corpus -- </option>
<!-- lists all corpora of this user -->
{% for corpusinfo in corporainfos %}
<option value="{{corpusinfo.id}}">{{corpusinfo.name}}</option>
{% endfor %}
</select>
<br/>
<p class="announce" onclick="toggleAdvancedCheckboxes()">
<span
id="corpusadvanced-icon"
class="glyphicon glyphicon-triangle-right"></span>
Advanced options
</p>
<div id="corpusadvanced" style="display:none;" class="input-group">
<input type="checkbox" class="importcheck" name="listtypes" id="listtypes-map" value="map" checked>import map terms</input>
<br/>
<input type="checkbox" class="importcheck" name="listtypes" id="listtypes-main" value="main">import normal terms</input>
<br/>
<input type="checkbox" class="importcheck" name="listtypes" id="listtypes-stop" value="stop">import stoplist terms</input>
</div>
<br/>
<br/>
<label>From a CSV on your disk:</label>
<input type="file" id="csvfile" accept="text/csv">
<input type="file" id="csvfile" name="csvfile" accept="text/csv">
<p id="formatinfos-announce">
<p class="announce" onclick="toggleFormatInfos()">
<span
id="formatinfos-icon"
class="glyphicon glyphicon-triangle-right"
onclick="toggleFormatInfos()"></span>
More infos about CSV expected format
class="glyphicon glyphicon-triangle-right"></span>
More info about the expected CSV format
</p>
<div id="formatinfos" style="display:none;">
<h4>Example table</h4>
......@@ -210,12 +247,9 @@
</ul>
</div>
<br/>
<label>From another corpus:</label>
<p>TODO</p>
<br/>
<input type="submit" class="btn btn-xs btn-info" id="csvsubmit" value="Submit" />
<input type="submit" class="btn btn-xs btn-info" id="importsubmit" value="Import and merge with current table" />
</form>
</div>
<div class="modal-footer" id="formanswer"></div>
......@@ -227,8 +261,34 @@
<!-- custom-lib for dynatable.js and dc.js -->
<script type="text/javascript" src="{% static "lib/gargantext/NGrams_dyna_chart_and_table.js" %}"></script>
<!-- import modal controllers -->
<script type="text/javascript">
var formatInfosOpen = false;
var corpusAdvancedOpen = false;
// declared here to enable inspection
var myFormData ;
function toggleAdvancedCheckboxes() {
// when already open => we close
if (corpusAdvancedOpen) {
// hide div
$('#corpusadvanced').hide()
// change icon
$('#corpusadvanced-icon')[0].classList.remove('glyphicon-triangle-bottom')
$('#corpusadvanced-icon')[0].classList.add('glyphicon-triangle-right')
// toggle flag
corpusAdvancedOpen = false;
}
else {
// opposite case
$('#corpusadvanced').show()
$('#corpusadvanced-icon')[0].classList.remove('glyphicon-triangle-right')
$('#corpusadvanced-icon')[0].classList.add('glyphicon-triangle-bottom')
corpusAdvancedOpen = true;
}
}
function toggleFormatInfos() {
// when already open => we close
......@@ -252,29 +312,103 @@ function toggleFormatInfos() {
/* merci c24b !
* Uses csvimportroute variable from the django template
* Uses importroute variable from the django template
* Ex: /api/ngramlists/import?onto_corpus=corpus_id
*
* Uses input#csvfile as source data.
*/
function postCSV(e){
function submitImport(e, formElt){
// don't do page reload of usual submits
e.preventDefault()
// parse the form (allows iteration like so: for (kv of myFormData))
myFormData = new FormData(formElt);
// user had 2 possibilities
var theCorpus = myFormData.get("corpuschoose")
var theFile = myFormData.get("csvfile")
// console.log(theCorpus)
// console.log(theFile)
if (theCorpus && theFile.name) {
// can't select both!
my_msg = "Please select a source corpus <em>or</em> choose a source file (not both!)."
}
else if (theFile.name) {
listmergeCsvPost(theFile)
my_msg = "CSV import in progress..."
}
else if (theCorpus) {
listmergeUpdate(myFormData)
my_msg = "Internal list import in progress..."
}
else {
my_msg = "Please provide an input source!"
console.warn('Ignoring "submit": no provided input')
}
$('#formanswer').html(
'<p style="color:#777;font-style:italic">'+my_msg+'</p>'
);
}
function listmergeUpdate(aFormData){
// console.log(aFormData)
// get the selected source corpus
// ex: "13308"
var sourceCorpusId = aFormData.get("corpuschoose")
// get checkbox entries into an array
// ex: ["map", "stop"]
var todoLists = aFormData.getAll("listtypes")
// base url ex: /api/ngramlists/import?onto_corpus=123
var theUrl = "{{importroute | safe}}"
// all params are added in the url like a GET
theUrl += "&from_corpus="+sourceCorpusId
theUrl += "&todo="+todoLists.join(',')
// result url looks like this : /api/ngramlists/import?onto_corpus=2&from=13308&todo=map,stop
// console.log(theUrl)
// Update request
$.ajax({
url: theUrl,
type: 'PATCH',
async: true,
beforeSend: function(xhr) {
xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
},
success: function(response) {
my_html = '<h3 style="color:green">IMPORT OK</h3>'
my_html += "<p class='note'>" + response['log'].replace(/\n/g, '<br/>') + "</p>"
my_html += "<p'>(this page will reload in 3s)</p>"
$('#formanswer').html(my_html);
console.log(response) ;
// reload after 3s
setTimeout("location.reload(true)", 3000);
},
error: function(result) {
my_html = '<h3 style="color:red">Error</h3>'
my_html += "<p>"+ result.responseJSON['err']+"</p>"
$('#formanswer').html(my_html);
console.error(result);
},
});
};
function listmergeCsvPost(theFile){
// 2MB ≈ 70000 ngrams
var max_size = 2097152
// we take it straight from the input element
theFile = $('input#csvfile')[0].files[0]
// debug
// console.log(theFile.name, "size", theFile.size, theFile.lastModifiedDate)
if (! theFile) {
console.warn('Ignoring "submit": no provided file')
return false
}
else if (theFile.size > max_size) {
if (theFile.size > max_size) {
console.warn('Ignoring "submit": file is too big')
$('#formanswer').html(
'The import failed: your file is too big ('+max_size/1024+'kB max).'
......@@ -283,13 +417,13 @@ function postCSV(e){
}
// normal case
else {
// append into an empty form (or fixme: initialize it using form element)
// append into an empty form (all other infos from old form were not for us)
var myFileFormData = new FormData();
myFileFormData.append("csvfile", theFile)
//postCorpusFile
$.ajax({
url: "{{csvimportroute | safe}}",
url: "{{importroute | safe}}",
type: 'POST',
async: true,
contentType: false,
......@@ -315,7 +449,6 @@ function postCSV(e){
console.error(result);
},
});
$('#formanswer').html('CSV import in Progress');
}
};
</script>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment