[OK] Import dialog on terms page + merge function logging (TODO better CSS and...

[OK] Import dialog on terms page + merge function logging (TODO better CSS and possibility to import directly from other corpus

[OK] Import dialog on terms page + merge function logging (TODO better CSS and...
[OK] Import dialog on terms page + merge function logging (TODO better CSS and possibility to import directly from other corpus
0ca0bf13 · Romain Loth · 0acab158 · 0ca0bf13 · 0ca0bf13 · 0ca0bf13
Commit 0ca0bf13 authored Jun 17, 2016 by Romain Loth
5 changed files
--- a/gargantext/util/ngramlists_tools.py
+++ b/gargantext/util/ngramlists_tools.py
@@ -313,7 +313,7 @@ def import_ngramlists(fname, delimiter='\t', group_delimiter='|'):

    Output:  3 x UnweightedList + 1 x Translations

-    @param fname            a filename
+    @param fname            a local filename or a filehandle-like
    @param delimiter        a character used as separator in the CSV
    @param group_delimiter  a character used as grouped subforms separator
                            (in the last column)
@@ -352,8 +352,26 @@ def import_ngramlists(fname, delimiter='\t', group_delimiter='|'):
    ignored_oldids = []

    # =============== READ CSV ===============
-    fh = open(fname, "r")
-    ngrams_csv_rows = reader(fh,
+
+    if isinstance(fname, str):
+        fh = open(fname, "r")
+    elif callable(getattr(fname, "read", None)):
+        fh = fname
+    else:
+        raise TypeError("IMPORT: fname argument has unknown type %s" % type(fh))
+
+
+    # reading all directly b/c csv.reader takes only lines or a real fh in bytes
+    # and we usually have a "false" fh (uploadedfile.InMemoryUploadedFile) in strings
+    # (but we checked its size before!)
+    contents = fh.read().decode("UTF-8").split("\n")
+
+    # end of CSV read
+    fh.close()
+
+    # <class 'django.core.files.uploadedfile.InMemoryUploadedFile'>
+
+    ngrams_csv_rows = reader(contents,
                             delimiter = delimiter,
                             quoting   = QUOTE_MINIMAL
                             )
@@ -369,6 +387,9 @@ def import_ngramlists(fname, delimiter='\t', group_delimiter='|'):
        # fyi
        n_read_lines +=1
        # print("---------------READ LINE %i" % i)
+        if not len(csv_row):
+            continue
+
        try:
            this_ng_oldid        = str(csv_row[0])
            this_ng_term         = str(csv_row[1])
@@ -381,33 +402,35 @@ def import_ngramlists(fname, delimiter='\t', group_delimiter='|'):

        except:
            if i == 0:
-                print("WARN: (skip line) probable header line at CSV %s:l.0" % fname)
+                print("IMPORT WARN: (skip line) probable header line at CSV %s:l.0" % fname)
                continue
+            else:
+                raise ValueError("Error on CSV read line %i" %n_read_lines)

        # --- check format before any old ID retrieve
-        if not match("\d+$", this_ng_oldid):
-            print("WARN: (skip line) bad ID at CSV %s:l.%i" % (fname, i))
+        if not match(r"\d+$", this_ng_oldid):
+            print("IMPORT WARN: (skip line) bad ID at CSV %s:l.%i" % (fname, i))
            continue
        else:
            this_ng_oldid = int(this_ng_oldid)

        # --- term checking
        if not len(this_ng_term) > 0:
-            print("WARN: (skip line) empty term at CSV %s:l.%i" % (fname, i))
+            print("IMPORT WARN: (skip line) empty term at CSV %s:l.%i" % (fname, i))
            ignored_oldids.append(this_ng_oldid)
            continue

        # --- check if not a duplicate string
        if this_ng_term in imported_ngrams_oldids:
            ignored_oldids.append(this_ng_oldid)
-            print("WARN: (skip line) term appears more than once (previous id: %i) at CSV %s:l.%i"
+            print("IMPORT WARN: (skip line) term appears more than once (previous id: %i) at CSV %s:l.%i"
                    % (imported_ngrams_oldids[this_ng_term], fname, i))
            continue

        # --- check correct list type
        if not this_list_type in ['stop','main','map']:
            ignored_oldids.append(this_ng_oldid)
-            print("WARN: (skip line) wrong list type at CSV %s:l.%i" % (fname, i))
+            print("IMPORT WARN: (skip line) wrong list type at CSV %s:l.%i" % (fname, i))
            continue

        # ================= Store the data ====================
@@ -428,9 +451,6 @@ def import_ngramlists(fname, delimiter='\t', group_delimiter='|'):
                  (this_ng_oldid,external_subform_id)
                  )

-    # end of CSV read
-    fh.close()
-
    # ======== ngram save + id lookup =========
    n_total_ng = len(imported_ngrams_dbdata)

@@ -529,6 +549,9 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
           but are never added to docs
    """

+    # log to send back to client-side (lines will be joined)
+    my_log = []
+
    # the tgt node arg has to be a corpus here
    if not hasattr(onto_corpus, "typename") or onto_corpus.typename != "CORPUS":
        raise TypeError("IMPORT: 'onto_corpus' argument must be a Corpus Node")
@@ -573,7 +596,9 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
            # ...or use empty objects if replacing old list
            # ----------------------------------------------
            old_lists[list_type] = UnweightedList()
-            print("MERGE: ignoring old %s which will be overwritten" % linfo['name'])
+            msg = "MERGE: ignoring old %s which will be overwritten" % linfo['name']
+            print(msg)
+            my_log.append(msg)

    # ======== Merging all involved ngrams =========

@@ -641,8 +666,9 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):

    merged_group.save(old_group_id)

-    print("MERGE: groupings %i updated (links before/added/after: %i/%i/%i)"
-            % (old_group_id, n_links_previous, n_links_added, n_links_after))
+    msg = "MERGE: groupings %i updated (links before/added/after: %i/%i/%i)" % (old_group_id, n_links_previous, n_links_added, n_links_after)
+    my_log.append(msg)
+    print(msg)

    # ======== Target list(s) append data =========
    # if list 2 => write in both tgt_data_lists [1,2]
@@ -664,7 +690,11 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
        else:
            mainform_id = merged_group.items[ng_id]
            # inherited winner
-            target_lid = resolved_memberships[mainform_id]
+            try:
+                target_lid = resolved_memberships[mainform_id]
+            except KeyError:
+                target_lid = winner_lid
+                print("MERGE: WARN ng_id %i has incorrect mainform %i ?" % (ng_id, mainform_id))

        ## 2) map => map + main
        if target_lid == 2:
@@ -686,5 +716,9 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
        result = merged_results[list_type]
        result.save(tgt_id)

-        print("MERGE: %s %i updated (new size: %i)"
-              % (info['name'],tgt_id, len(merged_results[list_type].items)))
+        msg = "MERGE: %s %i updated (new size: %i)" % (info['name'],tgt_id, len(merged_results[list_type].items))
+        my_log.append(msg)
+        print(msg)
+
+    # return a log
+    return("\n".join(my_log))
--- a/gargantext/views/api/ngramlists.py
+++ b/gargantext/views/api/ngramlists.py
@@ -15,11 +15,12 @@ from sqlalchemy               import tuple_
 from gargantext.models        import Ngram, NodeNgram, NodeNodeNgram, NodeNgramNgram
 from gargantext.util.lists    import UnweightedList, Translations

-
-# subroutines that were previously in this module are now in util.XYZ_tools
-from gargantext.util.ngramlists_tools import query_list, export_ngramlists
+# useful subroutines
+from gargantext.util.ngramlists_tools import query_list, export_ngramlists, \
+                                             import_ngramlists, merge_ngramlists
 from gargantext.util.group_tools      import query_grouped_ngrams

+
 class List(APIView):
    """
    see already available API query api/nodes/<list_id>?fields[]=ngrams
@@ -30,6 +31,8 @@ class List(APIView):
 class CSVLists(APIView):
    """
    For CSV exports of all lists of a corpus
+
+    Or CSV import into existing lists as "patch"
    """
    def get(self, request):
        params = get_parameters(request)
@@ -44,6 +47,49 @@ class CSVLists(APIView):
        export_ngramlists(corpus_node, fname=response, titles=True)
        return response

+    def post(self,request):
+        """
+        Merge the lists of a corpus with other lists from a CSV source
+                                                 or from another corpus
+
+        params in request.GET:
+            corpus:    the corpus whose lists are getting patched
+
+        params in request.FILES:
+            csvsource: the csv file
+
+        or in get
+            dbsource:  another corpus instead of the csvfile
+                       (? this last option should perhaps not be in CSVLists ?)
+
+        NB: not using PATCH because we'll need POST file upload
+
+
+        /!\ We assume we checked the file size client-side before upload
+
+        £TODO check authentication and user.id
+        """
+        # this time the corpus param is the one with the target lists to be patched
+        params = get_parameters(request)
+        corpus_id = int(params.pop("onto_corpus"))
+        corpus_node = cache.Node[corpus_id]
+
+        # request also contains the file
+        # csv_file has type django.core.files.uploadedfile.InMemoryUploadedFile
+        #                                                 ----------------------
+        csv_file = request.data['csvfile']
+
+        # import the csv
+        new_lists = import_ngramlists(csv_file)
+        del csv_file
+
+        # merge the new_lists onto those of the target corpus
+        log_msg = merge_ngramlists(new_lists, onto_corpus=corpus_node)
+
+        return JsonHttpResponse({
+            'log': log_msg,
+            }, 200)
+


 class GroupChange(APIView):

--- a/gargantext/views/api/urls.py
+++ b/gargantext/views/api/urls.py
@@ -33,6 +33,10 @@ urlpatterns = [ url(r'^nodes$'                , nodes.NodeListResource.as_view()
                #  TODO : unify to a /api/ngrams?formatted=csv
                #        (similar to /api/nodes?formatted=csv)

+              , url(r'^ngramlists/import$', ngramlists.CSVLists.as_view()            )
+                # same handling class as export (CSVLists)
+                # but this route used only for POST + file
+
              , url(r'^ngramlists/change$', ngramlists.ListChange.as_view()          )
                # add or remove ngram from a list
                #  ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2

--- a/gargantext/views/pages/terms.py
+++ b/gargantext/views/pages/terms.py
@@ -33,6 +33,9 @@ def ngramtable(request, project_id, corpus_id):
            'project': project,
            'corpus' : corpus,
            'resourcename' : resourcename(corpus),
-            'view': 'terms'
+            'view': 'terms',
+
+            # for the CSV import modal
+            'csvimportroute': "/api/ngramlists/import?onto_corpus=%i"% corpus.id
        },
    )
--- a/templates/pages/corpora/terms.html
+++ b/templates/pages/corpora/terms.html
@@ -72,6 +72,15 @@
                          <button id="Save_All" class="btn btn-muted" disabled style="font-size:120%">
                              <b>Save all changes</b>
                          </button>
+                          <br/>
+                          <br/>
+                          <!-- import icon -->
+                          <span class="needsaveicon glyphicon glyphicon-import"></span>
+                          &nbsp;
+                          <button id="ImportList" class="btn btn-warning" style="font-size:120%"
+                                   onclick="$('#csvimport').modal('show');">
+                              <b>Import a Termlist</b>
+                          </button>
                      </div>
                        <!-- see in javascript function queries.functions['my_state_filter'] -->
                        <div class="pull-right" style="margin-top:2.1em;padding-left:1em;">
@@ -107,25 +116,110 @@
                </div> <!-- /div panel -->

        </div> <!-- /jumbotron -->
-    <!--
-    <button id="ImportList" onclick="GetUserPortfolio();" class="btn btn-warning">
-        Import a Corpus-List
-    </button>
-->
-<!--</div> This div is closed in the menu !-->
-

+<!--</div> This div is closed in the menu !-->


-    <!--
-    # stub to import a list (aka orange button)
-    <button id="ImportList" onclick="GetUserPortfolio();" class="btn btn-warning">Import a Corpus-List</button>
-    -->

+<div class="modal" aria-hidden="true" id="csvimport">
+    <div class="modal-dialog">
+    <div class="modal-content">
+    <div class="modal-header">
+     <button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>
+     <h3 id="myModalLabel">Import a CSV term list</h3>
+    </div>
+    <div class="modal-body" id="uploadform">
+        <form id="csvimportform"
+              onsubmit="return postCSV(event)"
+              enctype="multipart/form-data"
+              method="post">
+            {% csrf_token %}
+            <label>From your disk:</label>
+            <input type="file" id="csvfile" accept="text/csv">
+            <br/>
+            <label>From another corpus:</label>
+            <p>TODO</p>
+            <br/>
+            <input type="submit" class="btn btn-xs btn-info" id="csvsubmit" value="Submit" />
+        </form>
+    </div>
+    <div class="modal-footer" id="formanswer"></div>
+    </div>
+    </div>
+</div>

 <script type="text/javascript" src="{% static "lib/jquery/dynatable/jquery.dynatable.js" %}"></script>
 <!-- custom-lib for dynatable.js and dc.js -->
 <script type="text/javascript" src="{% static "lib/gargantext/NGrams_dyna_chart_and_table.js" %}"></script>

+<script type="text/javascript">
+
+/*                                             merci c24b !
+ * Uses csvimportroute variable from the django template
+ * Ex: /api/ngramlists/import?onto_corpus=corpus_id
+ *
+ * Uses input#csvfile as source data.
+ */
+function postCSV(e){
+    // don't do page reload of usual submits
+    e.preventDefault()
+
+    // 2MB ≈ 70000 ngrams
+    var max_size = 2097152
+
+    // we take it straight from the input element
+    theFile = $('input#csvfile')[0].files[0]
+
+    // debug
+    // console.log(theFile.name, "size", theFile.size, theFile.lastModifiedDate)
+
+    if (! theFile) {
+        console.warn('Ignoring "submit": no provided file')
+        return false
+    }
+    else if (theFile.size > max_size) {
+        console.warn('Ignoring "submit": file is too big')
+        $('#formanswer').html(
+            'The import failed: your file is too big ('+max_size/1024+'kB max).'
+        );
+        return false
+    }
+    // normal case
+    else {
+        // append into an empty form (or fixme: initialize it using form element)
+        var myFileFormData = new FormData();
+        myFileFormData.append("csvfile", theFile)
+
+        //postCorpusFile
+        $.ajax({
+             url: "{{csvimportroute | safe}}",
+             type: 'POST',
+             async: true,
+             contentType: false,
+             processData: false,
+             data: myFileFormData,
+             beforeSend: function(xhr) {
+               xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
+             },
+             success: function(response) {
+                 my_html  = "<h2 color='green'>IMPORT OK ! </h2>"
+                 my_html += "<p class='note'>" + response['log'].replace(/\n/g, '<br/>') + "</p>"
+                 my_html += "<p'>(this page will reload in 3s)</p>"
+                 $('#formanswer').html(my_html);
+                 console.log(response) ;
+                 // reload after 3s
+                 setTimeout("location.reload(true)", 3000);
+                 },
+              error: function(result) {
+                  $('#formanswer').html('Erreur');
+                  console.error(result);
+                 },
+            });
+        $('#formanswer').html('CSV import in Progress');
+    }
+};
+</script>
+
+

 {% endblock %}