Commit 0ca0bf13 authored by Romain Loth's avatar Romain Loth

[OK] Import dialog on terms page + merge function logging (TODO better CSS and...

[OK] Import dialog on terms page + merge function logging (TODO better CSS and possibility to import directly from other corpus
parent 0acab158
......@@ -313,7 +313,7 @@ def import_ngramlists(fname, delimiter='\t', group_delimiter='|'):
Output: 3 x UnweightedList + 1 x Translations
@param fname a filename
@param fname a local filename or a filehandle-like
@param delimiter a character used as separator in the CSV
@param group_delimiter a character used as grouped subforms separator
(in the last column)
......@@ -352,8 +352,26 @@ def import_ngramlists(fname, delimiter='\t', group_delimiter='|'):
ignored_oldids = []
# =============== READ CSV ===============
fh = open(fname, "r")
ngrams_csv_rows = reader(fh,
if isinstance(fname, str):
fh = open(fname, "r")
elif callable(getattr(fname, "read", None)):
fh = fname
else:
raise TypeError("IMPORT: fname argument has unknown type %s" % type(fh))
# reading all directly b/c csv.reader takes only lines or a real fh in bytes
# and we usually have a "false" fh (uploadedfile.InMemoryUploadedFile) in strings
# (but we checked its size before!)
contents = fh.read().decode("UTF-8").split("\n")
# end of CSV read
fh.close()
# <class 'django.core.files.uploadedfile.InMemoryUploadedFile'>
ngrams_csv_rows = reader(contents,
delimiter = delimiter,
quoting = QUOTE_MINIMAL
)
......@@ -369,6 +387,9 @@ def import_ngramlists(fname, delimiter='\t', group_delimiter='|'):
# fyi
n_read_lines +=1
# print("---------------READ LINE %i" % i)
if not len(csv_row):
continue
try:
this_ng_oldid = str(csv_row[0])
this_ng_term = str(csv_row[1])
......@@ -381,33 +402,35 @@ def import_ngramlists(fname, delimiter='\t', group_delimiter='|'):
except:
if i == 0:
print("WARN: (skip line) probable header line at CSV %s:l.0" % fname)
print("IMPORT WARN: (skip line) probable header line at CSV %s:l.0" % fname)
continue
else:
raise ValueError("Error on CSV read line %i" %n_read_lines)
# --- check format before any old ID retrieve
if not match("\d+$", this_ng_oldid):
print("WARN: (skip line) bad ID at CSV %s:l.%i" % (fname, i))
if not match(r"\d+$", this_ng_oldid):
print("IMPORT WARN: (skip line) bad ID at CSV %s:l.%i" % (fname, i))
continue
else:
this_ng_oldid = int(this_ng_oldid)
# --- term checking
if not len(this_ng_term) > 0:
print("WARN: (skip line) empty term at CSV %s:l.%i" % (fname, i))
print("IMPORT WARN: (skip line) empty term at CSV %s:l.%i" % (fname, i))
ignored_oldids.append(this_ng_oldid)
continue
# --- check if not a duplicate string
if this_ng_term in imported_ngrams_oldids:
ignored_oldids.append(this_ng_oldid)
print("WARN: (skip line) term appears more than once (previous id: %i) at CSV %s:l.%i"
print("IMPORT WARN: (skip line) term appears more than once (previous id: %i) at CSV %s:l.%i"
% (imported_ngrams_oldids[this_ng_term], fname, i))
continue
# --- check correct list type
if not this_list_type in ['stop','main','map']:
ignored_oldids.append(this_ng_oldid)
print("WARN: (skip line) wrong list type at CSV %s:l.%i" % (fname, i))
print("IMPORT WARN: (skip line) wrong list type at CSV %s:l.%i" % (fname, i))
continue
# ================= Store the data ====================
......@@ -428,9 +451,6 @@ def import_ngramlists(fname, delimiter='\t', group_delimiter='|'):
(this_ng_oldid,external_subform_id)
)
# end of CSV read
fh.close()
# ======== ngram save + id lookup =========
n_total_ng = len(imported_ngrams_dbdata)
......@@ -529,6 +549,9 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
but are never added to docs
"""
# log to send back to client-side (lines will be joined)
my_log = []
# the tgt node arg has to be a corpus here
if not hasattr(onto_corpus, "typename") or onto_corpus.typename != "CORPUS":
raise TypeError("IMPORT: 'onto_corpus' argument must be a Corpus Node")
......@@ -573,7 +596,9 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
# ...or use empty objects if replacing old list
# ----------------------------------------------
old_lists[list_type] = UnweightedList()
print("MERGE: ignoring old %s which will be overwritten" % linfo['name'])
msg = "MERGE: ignoring old %s which will be overwritten" % linfo['name']
print(msg)
my_log.append(msg)
# ======== Merging all involved ngrams =========
......@@ -641,8 +666,9 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
merged_group.save(old_group_id)
print("MERGE: groupings %i updated (links before/added/after: %i/%i/%i)"
% (old_group_id, n_links_previous, n_links_added, n_links_after))
msg = "MERGE: groupings %i updated (links before/added/after: %i/%i/%i)" % (old_group_id, n_links_previous, n_links_added, n_links_after)
my_log.append(msg)
print(msg)
# ======== Target list(s) append data =========
# if list 2 => write in both tgt_data_lists [1,2]
......@@ -664,7 +690,11 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
else:
mainform_id = merged_group.items[ng_id]
# inherited winner
target_lid = resolved_memberships[mainform_id]
try:
target_lid = resolved_memberships[mainform_id]
except KeyError:
target_lid = winner_lid
print("MERGE: WARN ng_id %i has incorrect mainform %i ?" % (ng_id, mainform_id))
## 2) map => map + main
if target_lid == 2:
......@@ -686,5 +716,9 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
result = merged_results[list_type]
result.save(tgt_id)
print("MERGE: %s %i updated (new size: %i)"
% (info['name'],tgt_id, len(merged_results[list_type].items)))
msg = "MERGE: %s %i updated (new size: %i)" % (info['name'],tgt_id, len(merged_results[list_type].items))
my_log.append(msg)
print(msg)
# return a log
return("\n".join(my_log))
......@@ -15,11 +15,12 @@ from sqlalchemy import tuple_
from gargantext.models import Ngram, NodeNgram, NodeNodeNgram, NodeNgramNgram
from gargantext.util.lists import UnweightedList, Translations
# subroutines that were previously in this module are now in util.XYZ_tools
from gargantext.util.ngramlists_tools import query_list, export_ngramlists
# useful subroutines
from gargantext.util.ngramlists_tools import query_list, export_ngramlists, \
import_ngramlists, merge_ngramlists
from gargantext.util.group_tools import query_grouped_ngrams
class List(APIView):
"""
see already available API query api/nodes/<list_id>?fields[]=ngrams
......@@ -30,6 +31,8 @@ class List(APIView):
class CSVLists(APIView):
"""
For CSV exports of all lists of a corpus
Or CSV import into existing lists as "patch"
"""
def get(self, request):
params = get_parameters(request)
......@@ -44,6 +47,49 @@ class CSVLists(APIView):
export_ngramlists(corpus_node, fname=response, titles=True)
return response
def post(self,request):
"""
Merge the lists of a corpus with other lists from a CSV source
or from another corpus
params in request.GET:
corpus: the corpus whose lists are getting patched
params in request.FILES:
csvsource: the csv file
or in get
dbsource: another corpus instead of the csvfile
(? this last option should perhaps not be in CSVLists ?)
NB: not using PATCH because we'll need POST file upload
/!\ We assume we checked the file size client-side before upload
£TODO check authentication and user.id
"""
# this time the corpus param is the one with the target lists to be patched
params = get_parameters(request)
corpus_id = int(params.pop("onto_corpus"))
corpus_node = cache.Node[corpus_id]
# request also contains the file
# csv_file has type django.core.files.uploadedfile.InMemoryUploadedFile
# ----------------------
csv_file = request.data['csvfile']
# import the csv
new_lists = import_ngramlists(csv_file)
del csv_file
# merge the new_lists onto those of the target corpus
log_msg = merge_ngramlists(new_lists, onto_corpus=corpus_node)
return JsonHttpResponse({
'log': log_msg,
}, 200)
class GroupChange(APIView):
......
......@@ -33,6 +33,10 @@ urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view()
# TODO : unify to a /api/ngrams?formatted=csv
# (similar to /api/nodes?formatted=csv)
, url(r'^ngramlists/import$', ngramlists.CSVLists.as_view() )
# same handling class as export (CSVLists)
# but this route used only for POST + file
, url(r'^ngramlists/change$', ngramlists.ListChange.as_view() )
# add or remove ngram from a list
# ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2
......
......@@ -33,6 +33,9 @@ def ngramtable(request, project_id, corpus_id):
'project': project,
'corpus' : corpus,
'resourcename' : resourcename(corpus),
'view': 'terms'
'view': 'terms',
# for the CSV import modal
'csvimportroute': "/api/ngramlists/import?onto_corpus=%i"% corpus.id
},
)
......@@ -72,6 +72,15 @@
<button id="Save_All" class="btn btn-muted" disabled style="font-size:120%">
<b>Save all changes</b>
</button>
<br/>
<br/>
<!-- import icon -->
<span class="needsaveicon glyphicon glyphicon-import"></span>
&nbsp;
<button id="ImportList" class="btn btn-warning" style="font-size:120%"
onclick="$('#csvimport').modal('show');">
<b>Import a Termlist</b>
</button>
</div>
<!-- see in javascript function queries.functions['my_state_filter'] -->
<div class="pull-right" style="margin-top:2.1em;padding-left:1em;">
......@@ -107,25 +116,110 @@
</div> <!-- /div panel -->
</div> <!-- /jumbotron -->
<!--
<button id="ImportList" onclick="GetUserPortfolio();" class="btn btn-warning">
Import a Corpus-List
</button>
-->
<!--</div> This div is closed in the menu !-->
<!--</div> This div is closed in the menu !-->
<!--
# stub to import a list (aka orange button)
<button id="ImportList" onclick="GetUserPortfolio();" class="btn btn-warning">Import a Corpus-List</button>
-->
<div class="modal" aria-hidden="true" id="csvimport">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>
<h3 id="myModalLabel">Import a CSV term list</h3>
</div>
<div class="modal-body" id="uploadform">
<form id="csvimportform"
onsubmit="return postCSV(event)"
enctype="multipart/form-data"
method="post">
{% csrf_token %}
<label>From your disk:</label>
<input type="file" id="csvfile" accept="text/csv">
<br/>
<label>From another corpus:</label>
<p>TODO</p>
<br/>
<input type="submit" class="btn btn-xs btn-info" id="csvsubmit" value="Submit" />
</form>
</div>
<div class="modal-footer" id="formanswer"></div>
</div>
</div>
</div>
<script type="text/javascript" src="{% static "lib/jquery/dynatable/jquery.dynatable.js" %}"></script>
<!-- custom-lib for dynatable.js and dc.js -->
<script type="text/javascript" src="{% static "lib/gargantext/NGrams_dyna_chart_and_table.js" %}"></script>
<script type="text/javascript">
/* merci c24b !
* Uses csvimportroute variable from the django template
* Ex: /api/ngramlists/import?onto_corpus=corpus_id
*
* Uses input#csvfile as source data.
*/
function postCSV(e){
// don't do page reload of usual submits
e.preventDefault()
// 2MB ≈ 70000 ngrams
var max_size = 2097152
// we take it straight from the input element
theFile = $('input#csvfile')[0].files[0]
// debug
// console.log(theFile.name, "size", theFile.size, theFile.lastModifiedDate)
if (! theFile) {
console.warn('Ignoring "submit": no provided file')
return false
}
else if (theFile.size > max_size) {
console.warn('Ignoring "submit": file is too big')
$('#formanswer').html(
'The import failed: your file is too big ('+max_size/1024+'kB max).'
);
return false
}
// normal case
else {
// append into an empty form (or fixme: initialize it using form element)
var myFileFormData = new FormData();
myFileFormData.append("csvfile", theFile)
//postCorpusFile
$.ajax({
url: "{{csvimportroute | safe}}",
type: 'POST',
async: true,
contentType: false,
processData: false,
data: myFileFormData,
beforeSend: function(xhr) {
xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
},
success: function(response) {
my_html = "<h2 color='green'>IMPORT OK ! </h2>"
my_html += "<p class='note'>" + response['log'].replace(/\n/g, '<br/>') + "</p>"
my_html += "<p'>(this page will reload in 3s)</p>"
$('#formanswer').html(my_html);
console.log(response) ;
// reload after 3s
setTimeout("location.reload(true)", 3000);
},
error: function(result) {
$('#formanswer').html('Erreur');
console.error(result);
},
});
$('#formanswer').html('CSV import in Progress');
}
};
</script>
{% endblock %}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment