Merge branch 'romain-goodies' into unstable

3a445e73 · delanoe · ebe22cf3 · 0ca0bf13 · 3a445e73 · 3a445e73
Commit 3a445e73 authored Jun 18, 2016 by delanoe
11 changed files
--- a/gargantext/util/db.py
+++ b/gargantext/util/db.py
@@ -86,7 +86,19 @@ class bulk_insert:
    readline = read


-def bulk_insert_ifnotexists(model, uniquekey, fields, data, cursor=None):
+def bulk_insert_ifnotexists(model, uniquekey, fields, data, cursor=None, do_stats=False):
+    """
+    Inserts bulk data with an intermediate check on a uniquekey
+    (ex: Ngram.terms) to see if the row existed before.
+
+    If the row already existed we just retrieve its id.
+    If it didn't exist we create it and retrieve the id.
+
+    Returns a dict {uniquekey => id}
+
+    Option:
+        do stats: also returns the number of those that had no previous id
+    """
    if cursor is None:
        db, cursor = get_cursor()
        mustcommit = True
@@ -109,6 +121,7 @@ def bulk_insert_ifnotexists(model, uniquekey, fields, data, cursor=None):
        sourcetable = model.__tablename__,
        uniquecolumn = uniquekey,
    ))
+
    # insert what has not been found to the real table
    cursor.execute('''
        INSERT INTO {sourcetable} ({columns})
@@ -119,6 +132,11 @@ def bulk_insert_ifnotexists(model, uniquekey, fields, data, cursor=None):
        sourcetable = model.__tablename__,
        columns = ', '.join(fields),
    ))
+
+    if do_stats:
+        # remember how many rows we inserted just now
+        n_new = cursor.rowcount
+
    # retrieve dict associating unique key to id
    cursor.execute('''
        SELECT source.id, source.{uniquecolumn}
@@ -130,10 +148,15 @@ def bulk_insert_ifnotexists(model, uniquekey, fields, data, cursor=None):
        columns = ', '.join(fields),
    ))
    result = {
+        # term : new_id
        row[1]: row[0] for row in cursor.fetchall()
    }
    # this is the end!
    cursor.execute('DROP TABLE __tmp__')
    if mustcommit:
        db.commit()
-    return result
+
+    if do_stats:
+        return result, n_new
+    else:
+        return result
--- a/gargantext/util/group_tools.py
+++ b/gargantext/util/group_tools.py
+"""
+Utilities for group management
+  - query_grouped_ngrams(group_id) to retrieve subforms
+  - group_union() to join two groupings lists
+"""
+from gargantext.util.db  import session, aliased
+from gargantext.models   import Ngram, NodeNgramNgram
+from igraph              import Graph  # for group_union
+
+def query_groups(groupings_id, details=False):
+    """
+    Listing of couples (mainform,   subform)
+                 aka   (ngram1_id, ngram2_id)
+
+    Parameter:
+      - details: if False, just send the array of couples
+                 if True, send quadruplets with (ngram1_id, term1, ngram2_id, term2)
+    """
+    if not details:
+        # simple contents
+        query = session.query(NodeNgramNgram.ngram1_id, NodeNgramNgram.ngram2_id)
+    else:
+        # detailed contents (id + terms)
+        Ngram1 = aliased(Ngram)
+        Ngram2 = aliased(Ngram)
+        query = (session
+                    .query(
+                        NodeNgramNgram.ngram1_id,
+                        Ngram1.terms,
+                        NodeNgramNgram.ngram2_id,
+                        Ngram2.terms,
+                     )
+                    .join(Ngram1, NodeNgramNgram.ngram1_id == Ngram1.id)
+                    .join(Ngram2, NodeNgramNgram.ngram2_id == Ngram2.id)
+                )
+
+    # main filter
+    # -----------
+    query = query.filter(NodeNgramNgram.node_id == groupings_id)
+
+    return query
+
+def query_grouped_ngrams(groupings_id, details=False, scoring_metric_id=None):
+    """
+    Listing of "hidden" ngram_ids from the groups
+
+    Works only for grouplists
+
+    Parameter:
+      - details: if False, send just the array of ngram_ids
+                 if True, send triples with (ngram_id, term, scoring)
+                                                             ^^^^^^^
+
+      deprecated: scoring_metric_id: id of a scoring metric node   (TFIDF or OCCS)
+                           (for details and sorting)
+                   (no more OCCS counts of subforms)
+    """
+    if not details:
+        # simple contents
+        query = session.query(NodeNgramNgram.ngram2_id)
+    else:
+        # detailed contents (terms and some NodeNodeNgram for score)
+        query = (session
+                    .query(
+                        NodeNgramNgram.ngram2_id,
+                        Ngram.terms,
+                        # NodeNodeNgram.score           #
+                     )
+                    .join(Ngram, NodeNgramNgram.ngram2_id == Ngram.id)
+                    # .join(NodeNodeNgram, NodeNgramNgram.ngram2_id == NodeNodeNgram.ngram_id)
+                    # .filter(NodeNodeNgram.node1_id == scoring_metric_id)
+                    # .order_by(desc(NodeNodeNgram.score))
+                )
+
+    # main filter
+    # -----------
+    query = query.filter(NodeNgramNgram.node_id == groupings_id)
+
+    return query
+
+
+def group_union(g_a_links, g_b_links):
+    """
+    Synonym groups are modelled by sets of couples in the DB
+
+    Input : 2 arrays of links (ngramx_id, ngramy_id)
+    Input : 1 array of links (ngramx_id, ngramy_id)
+
+    Synonymity is considered transitive so in effect the groups
+    can form a set (defined by the connected component of couples).
+
+     A requested feature is also that one node dominates others
+     (aka "leader effect"; leader will be in the map, the others won't)
+
+    Summary of major union effects in various cases:
+
+    GROUP 1         Group 2         Group 1 ∪ 2
+
+    A -> B           A -> C           A -> B       (simple union)
+                                      A -> C
+
+    D -> E           E -> F           D -> E
+                                      D -> F       (D "leader effect")
+
+
+    G -> H           G -> I           G -> H       ( transitivity +
+                     H -> J           G -> I        "leader effect")
+                                      G -> J
+
+     rloth: this is some slightly amended code
+     from Samuel's in rest_v1_0.ngrams.Group.get
+
+     TODO use "most frequent" score if leader candidates are ex aequo by degree.
+    """
+
+    # output: list of links forming new group
+    new_links = []
+
+    # 1) create graph with both lists
+    # -------------------------------
+
+    # from igraph import Graph
+
+    # the set of all our ngram_ids
+    all_vertices = set(
+      [ngid for couple in g_a_links+g_b_links for ngid in couple]
+    )
+
+    # initialize the synonym graph with size
+    sg = Graph(len(all_vertices), directed=True)
+
+    # add our IDs as "name" (special attribute good for edge creation)
+    sg.vs['name'] = [str(x) for x in all_vertices]
+
+    # add the edges as named couples
+    sg.add_edges([(str(x),str(y)) for (x,y) in g_a_links])
+
+    #print('UNION A:', g_a_links)
+    #print('initially %i components' % len(sg.as_undirected().components()))
+
+    # same with the other edges
+    sg.add_edges([(str(x),str(y)) for (x,y) in g_b_links])
+
+    #print('UNION B:', g_b_links)
+    #print('after union %i components' % len(sg.as_undirected().components()))
+
+
+    # 2) list resulting components
+    # -----------------------------
+    synonym_components = sg.as_undirected().components()
+
+    # for example
+    # cs = [[0, 3, 6], [1, 2, 8], [4, 5, 9, 11], [7,10]]
+
+    # there should be no singletons by construction
+
+    # list of all outdegrees for "leader" detection
+    # (leader = term most often marked as source by the users)
+    odegs = sg.outdegree()
+
+    #for i, v in enumerate(sg.vs):
+    #    print("%i - name:%s - odeg:%i" % (i, v['name'], odegs[i]))
+
+    for component in synonym_components:
+        # we map back to our ids, preserving order
+        our_comp = [int(our_id) for our_id in sg.vs[component]['name']]
+
+        # 3) take main node and unnest into new links list
+        # -------------------------------------------------
+
+        # position (within this component) of the best node (by degree)
+        max_odeg = -1
+        main_node_local_index = None
+        for position, vertex_id in enumerate(component):
+            this_odeg = odegs[vertex_id]
+            if this_odeg > max_odeg:
+                main_node_local_index = position
+                max_odeg = this_odeg
+
+        # we set it aside in our translated version our_comp
+        main_node = our_comp.pop(main_node_local_index)
+
+        # and unnest the others
+        for remaining_id in our_comp:
+            new_links.append((main_node, remaining_id))
+
+    return new_links
--- a/gargantext/util/ngramlists_tools.py
+++ b/gargantext/util/ngramlists_tools.py
+"""
+Tools to work with ngramlists (MAINLIST, MAPLIST, STOPLIST)
+
+    - query_list(list_id) to retrieve ngrams
+    - export_ngramlists(corpus_node)
+    - import_ngramlists(corpus_node)
+    - merge_ngramlists(new_lists, onto_corpus = corpus_node)
+"""
+
+from gargantext.util.group_tools import query_groups, group_union
+from gargantext.util.db          import session, desc, func, \
+                                        bulk_insert_ifnotexists
+from gargantext.models           import Ngram, NodeNgram, NodeNodeNgram, \
+                                        NodeNgramNgram
+
+from gargantext.util.lists       import UnweightedList, Translations
+
+# import will implement the same text cleaning procedures as toolchain
+from gargantext.util.toolchain.parsing           import normalize_chars
+from gargantext.util.toolchain.ngrams_extraction import normalize_terms
+
+from sqlalchemy.sql      import exists
+from os                  import path
+from csv                 import writer, reader, QUOTE_MINIMAL
+from collections         import defaultdict
+from re                  import match
+from io                  import StringIO # pseudo file to write CSV to memory
+
+def query_list(list_id,
+                pagination_limit=None, pagination_offset=None,
+                details=False, scoring_metric_id=None, groupings_id=None
+                ):
+    """
+    Paginated listing of ngram_ids in a NodeNgram lists.
+
+    Works for a mainlist or stoplist or maplist (not grouplists!)
+
+    Parameter:
+      - pagination_limit, pagination_offset
+      - details: if False, send just the array of ngram_ids
+                 if True and no scoring,   send couples with (ngram_id, term)
+                 if True and a scoring_id, send triples with (ngram_id, term, scoring)
+      - scoring_metric_id: id of a scoring metric node   (TFIDF or OCCS)
+                           (for details and sorting)
+      - groupings_id: optional id of a list of grouping relations (synonyms)
+                      (each synonym will be added to the list if not already in there)
+
+    FIXME: subforms appended recently and not generalized enough
+            => add a common part for all "if groupings_id"
+            => provide the option also in combination with scoring
+    """
+    # simple contents
+    if not details:
+        query = session.query(NodeNgram.ngram_id).filter(NodeNgram.node_id == list_id)
+
+        if groupings_id:
+            subforms = (session.query(NodeNgramNgram.ngram2_id)
+                               # subform ids...
+                               .filter(NodeNgramNgram.node_id == groupings_id)
+                               # .. that are connected to a mainform
+                               .join(NodeNgram, NodeNgram.ngram_id == NodeNgramNgram.ngram1_id)
+                               # .. which is in the list
+                               .filter(NodeNgram.node_id == list_id)
+                               )
+            # union with the main q
+            query = query.union(subforms)
+
+    # detailed contents (id + terms)
+    elif not scoring_metric_id:
+        query = (session.query(Ngram.id, Ngram.terms, Ngram.n)
+                        .join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
+                        .filter(NodeNgram.node_id == list_id)
+                        )
+        if groupings_id:
+            subforms = (session.query(Ngram.id, Ngram.terms, Ngram.n)
+                               .join(NodeNgramNgram, NodeNgramNgram.ngram2_id == Ngram.id)
+                               # subform ids...
+                               .filter(NodeNgramNgram.node_id == groupings_id)
+                               # .. that are connected to a mainform
+                               .join(NodeNgram, NodeNgram.ngram_id == NodeNgramNgram.ngram1_id)
+                               # .. which is in the list
+                               .filter(NodeNgram.node_id == list_id)
+                               )
+            # union with the main q
+            query = query.union(subforms)
+
+    # detailed contents (id + terms) + score
+    else:
+        # NB: score can be undefined (eg ex-subform that now became free)
+        #     ==> we need outerjoin
+        #     and the filter needs to have scoring_metric_id so we do it before
+
+        ScoresTable = (session
+                        .query(NodeNodeNgram.score, NodeNodeNgram.ngram_id)
+                        .filter(NodeNodeNgram.node1_id == scoring_metric_id)
+                        .subquery()
+                        )
+
+        query = (session
+                    .query(
+                        NodeNgram.ngram_id,
+                        Ngram.terms,
+                        ScoresTable.c.score
+                     )
+                    .join(Ngram, NodeNgram.ngram_id == Ngram.id)
+
+                    # main filter ----------------------
+                    .filter(NodeNgram.node_id == list_id)
+
+                    # scores if possible
+                    .outerjoin(ScoresTable,
+                               ScoresTable.c.ngram_id == NodeNgram.ngram_id)
+
+                    .order_by(desc(ScoresTable.c.score))
+                )
+
+    if pagination_limit:
+        query = query.limit(pagination_limit)
+
+    if pagination_offset:
+        query = query.offset(pagination_offsets)
+
+    return query
+
+
+# helper func for exports
+def ngrams_to_csv_rows(ngram_objs, id_groupings={}, list_type=""):
+    """
+    @param: ngram_objs
+            an array of ngrams (eg: from a db query.all())
+
+    @param: optional id_groupings
+            a dict of sets {mainform_id : {subform_idA, subform_idB, etc}}
+
+    @param: list_type (a str 'map','main' or 'stop' to fill in col 4)
+
+    Outputs a basic info table per ngram
+      (ng_id, term string, term size, list_type)
+
+      with an optional 5th column of grouped subforms  ex: "4|42"
+
+    Returns format is a csv_rows matrix (as a list of lists)
+             [
+              [ligne1_colA, ligne1_colB..],
+              [ligne2_colA, ligne2_colB..],
+              ..
+             ]
+
+    (to be used for instance like: csv.writer.writerows(csv_rows)
+
+    list_type ici:
+      0  <=> stopList
+      1  <=> miamList
+      2  <=> mapList
+    """
+    # transcrire les objets ngrammes en tableau (liste de listes)
+    csv_rows = list()
+    for ng_obj in ngram_objs:
+        ng_id = ng_obj.id
+
+        if ng_id in id_groupings.keys():
+            this_grouped = "|".join(str(gid) for gid in id_groupings[ng_id])
+        else:
+            this_grouped = ""
+
+        # transcription : 5 columns
+        # ID , terme , n , type_de_liste , grouped_id|grouped_id...
+
+        csv_rows.append(
+              [ng_id,ng_obj.terms,ng_obj.n,list_type,this_grouped]
+              )
+
+    return csv_rows
+
+
+
+def export_ngramlists(node,fname=None,delimiter="\t",titles=False):
+    """
+    export of the 3 lists under a corpus node (MAP, MAIN, STOP)
+           with local combination of groups
+
+    @param node: the corpus node
+
+    @param fname:     optional filename to write the CSV
+                      (if absent, returns a str with CSV contents)
+
+    @param delimiter: optional column separator in the CSV
+                      (if absent defaults to tabulation)
+
+    @param titles:    optional flag to print or not a first line with headers
+
+    # ID  , term , nwords , list_type , grouped_id|grouped_id...
+    1622	textile	1	main 1623|3397
+    3397	textile production	2	main
+    3410	possibility	1	stop
+
+    TODO : REFACTOR split list logic from corpus logic
+                    => possibility to act on one list
+    """
+
+    # the node arg has to be a corpus here
+    if not hasattr(node, "typename") or node.typename != "CORPUS":
+        raise TypeError("EXPORT: node argument must be a Corpus Node")
+
+    # les nodes couvrant les listes
+    # -----------------------------
+    stoplist_node  = node.children("STOPLIST").first()
+    mainlist_node  = node.children("MAINLIST").first()
+    maplist_node   = node.children("MAPLIST").first()
+
+    # et les groupes de synonymes
+    group_node = node.children("GROUPLIST").first()
+
+
+    # listes de ngram_ids correspondantes
+    # ------------------------------------
+    # contenu: liste des objets ngrammes [(2562,"monterme",1),...]
+    stop_ngrams  = query_list(stoplist_node.id, details=True, groupings_id=group_node.id).all()
+    main_ngrams  = query_list(mainlist_node.id, details=True, groupings_id=group_node.id).all()
+    map_ngrams  = query_list(maplist_node.id, details=True, groupings_id=group_node.id).all()
+
+
+    # pour debug ---------->8 --------------------
+    #~ stop_ngrams = stop_ngrams[0:10]
+    #~ main_ngrams = main_ngrams[0:10]
+    #~ map_ngrams  = map_ngrams[0:10]
+    # --------------------->8 --------------------
+
+    # pour la group_list on a des couples de ngram_ids
+    # -------------------
+    # ex: [(3544, 2353), (2787, 4032), ...]
+    group_ngram_id_couples = query_groups(group_node.id).all()
+
+    # k couples comme set
+    # --------------------
+    # [(x => y1), (x => y2)] >~~~~~~~> [x => {y1,y2}]
+    grouped = defaultdict(set)
+    for ngram in group_ngram_id_couples:
+        grouped[ngram[0]].add(ngram[1])
+
+    # on applique notre fonction ng_to_csv sur chaque liste
+    # ------------------------------------------------------
+    map_csv_rows = ngrams_to_csv_rows(map_ngrams,
+                                       id_groupings=grouped,
+                                       list_type="map")
+
+    stop_csv_rows = ngrams_to_csv_rows(stop_ngrams,
+                                       id_groupings=grouped,
+                                       list_type="stop")
+
+    # miam contient map donc il y a un préalable ici
+    map_ngram_ids = {ng.id for ng in map_ngrams}
+    main_without_map = [ng for ng in main_ngrams if ng.id not in map_ngram_ids]
+    miam_csv_rows = ngrams_to_csv_rows(main_without_map,
+                                       id_groupings=grouped,
+                                       list_type="main")
+
+    # all lists together now
+    this_corpus_all_rows = map_csv_rows + miam_csv_rows + stop_csv_rows
+
+    # choice of output: file or string
+    if fname == None:
+        out_file = StringIO()
+    elif type(fname) == str:
+        out_file = open(fname, 'w')
+    else:
+        straight_to_handle = True
+        out_file = fname
+
+    # csv.writer()
+    csv_wr = writer(out_file,
+                    delimiter=delimiter,
+                    quoting=QUOTE_MINIMAL)
+
+    if titles:
+        csv_wr.writerow(["oldid","term","nwords","listtype","subforms"])
+
+    # write to outfile
+    csv_wr.writerows(this_corpus_all_rows)
+
+    if fname == None:
+        # return output as a string
+        print("EXPORT: wrote %i ngrams to CSV string"
+               % len(this_corpus_all_rows))
+        return out_file.getvalue()
+    elif straight_to_handle:
+        print("EXPORT: wrote %i ngrams to CSV response handle"
+               % len(this_corpus_all_rows))
+    else:
+        # just close output file
+        out_file.close()
+        print("EXPORT: wrote %i ngrams to CSV file '%s'"
+               % (len(this_corpus_all_rows), path.abspath(fname)))
+
+
+
+def import_ngramlists(fname, delimiter='\t', group_delimiter='|'):
+    '''
+    This function reads a CSV of an ngrams table for a Corpus,
+    then it converts old ngram_ids to those of the current DB
+       (and adds to DB any unknown ngrams)
+    then recreates an equivalent set of MAINLIST, MAPLIST, STOPLIST + GROUPS
+
+    Input example:
+        oldid  | term          |nwords| ltype  |group_oldids
+        -------+---------------+------+--------+---------------
+        3842     water table        2    map      3724
+        3724     water tables       2    map
+        4277     water supply       2    map      190362|13415
+        13415    water supplies     2    map
+        190362   water-supply       1    map
+        20489    wastewater         1    map
+
+    Output:  3 x UnweightedList + 1 x Translations
+
+    @param fname            a local filename or a filehandle-like
+    @param delimiter        a character used as separator in the CSV
+    @param group_delimiter  a character used as grouped subforms separator
+                            (in the last column)
+
+    The conversion of old_id to ngram_id works in 2 steps:
+        => look up each term str in the DB with bulk_insert_ifnotexists
+           (creates absent ngrams if necessary)
+        => use the new ids to map the relations involving the old ones
+
+    NB: the creation of MAINLIST also adds all elements from the MAPLIST
+
+    NB: To merge the imported lists into a corpus node's lists,
+        chain this function with merge_ngramlists()
+    '''
+    # --------------
+    #
+    # --------------
+
+    # main storage for the ngrams by list
+    import_nodes_ngrams = {'stop':[], 'main':[], 'map':[]}
+
+    # separate storage for the term's couples  [(term str, nwords int),...]
+    imported_ngrams_dbdata = []
+
+    # and all the old ids, by term (for id lookup after dbdata bulk_insert)
+    imported_ngrams_oldids = {}
+
+    # and for the imported_grouping list of couples [(x1,y1),(x1,y2),(x2,y3),..]
+    imported_groupings = []
+
+    # /!\ imported_grouping contains only external ids (aka oldids)
+    #     (ie imported ids.. that will have to be translated
+    #      to target db ids)
+
+    # skipped lines can (very rarely) be used in groups => mark as ignored
+    ignored_oldids = []
+
+    # =============== READ CSV ===============
+
+    if isinstance(fname, str):
+        fh = open(fname, "r")
+    elif callable(getattr(fname, "read", None)):
+        fh = fname
+    else:
+        raise TypeError("IMPORT: fname argument has unknown type %s" % type(fh))
+
+
+    # reading all directly b/c csv.reader takes only lines or a real fh in bytes
+    # and we usually have a "false" fh (uploadedfile.InMemoryUploadedFile) in strings
+    # (but we checked its size before!)
+    contents = fh.read().decode("UTF-8").split("\n")
+
+    # end of CSV read
+    fh.close()
+
+    # <class 'django.core.files.uploadedfile.InMemoryUploadedFile'>
+
+    ngrams_csv_rows = reader(contents,
+                             delimiter = delimiter,
+                             quoting   = QUOTE_MINIMAL
+                             )
+
+    # for stats
+    n_read_lines = 0
+    n_total_ng = 0
+    n_added_ng = 0
+    n_group_relations = 0
+
+    # load CSV + initial checks
+    for i, csv_row in enumerate(ngrams_csv_rows):
+        # fyi
+        n_read_lines +=1
+        # print("---------------READ LINE %i" % i)
+        if not len(csv_row):
+            continue
+
+        try:
+            this_ng_oldid        = str(csv_row[0])
+            this_ng_term         = str(csv_row[1])
+            this_ng_nwords       = int(csv_row[2])
+            this_list_type       = str(csv_row[3])
+            this_ng_group        = str(csv_row[4])
+
+            # string normalizations
+            this_ng_term = normalize_terms(normalize_chars(this_ng_term))
+
+        except:
+            if i == 0:
+                print("IMPORT WARN: (skip line) probable header line at CSV %s:l.0" % fname)
+                continue
+            else:
+                raise ValueError("Error on CSV read line %i" %n_read_lines)
+
+        # --- check format before any old ID retrieve
+        if not match(r"\d+$", this_ng_oldid):
+            print("IMPORT WARN: (skip line) bad ID at CSV %s:l.%i" % (fname, i))
+            continue
+        else:
+            this_ng_oldid = int(this_ng_oldid)
+
+        # --- term checking
+        if not len(this_ng_term) > 0:
+            print("IMPORT WARN: (skip line) empty term at CSV %s:l.%i" % (fname, i))
+            ignored_oldids.append(this_ng_oldid)
+            continue
+
+        # --- check if not a duplicate string
+        if this_ng_term in imported_ngrams_oldids:
+            ignored_oldids.append(this_ng_oldid)
+            print("IMPORT WARN: (skip line) term appears more than once (previous id: %i) at CSV %s:l.%i"
+                    % (imported_ngrams_oldids[this_ng_term], fname, i))
+            continue
+
+        # --- check correct list type
+        if not this_list_type in ['stop','main','map']:
+            ignored_oldids.append(this_ng_oldid)
+            print("IMPORT WARN: (skip line) wrong list type at CSV %s:l.%i" % (fname, i))
+            continue
+
+        # ================= Store the data ====================
+        # the ngram data
+        imported_ngrams_dbdata.append([this_ng_term, this_ng_nwords])
+        imported_ngrams_oldids[this_ng_term] = this_ng_oldid
+
+        # and the "list to ngram" relation
+        import_nodes_ngrams[this_list_type].append(this_ng_oldid)
+
+        # ====== Store synonyms from the import (if any) ======
+        if len(this_ng_group) != 0:
+            group_as_external_ids = this_ng_group.split('|')
+
+            for external_subform_id in group_as_external_ids:
+                external_subform_id = int(external_subform_id)
+                imported_groupings.append(
+                  (this_ng_oldid,external_subform_id)
+                  )
+
+    # ======== ngram save + id lookup =========
+    n_total_ng = len(imported_ngrams_dbdata)
+
+    # returns a dict {term => id} and a count of inserted ones
+    (new_ngrams_ids, n_added_ng) = bulk_insert_ifnotexists(
+        model = Ngram,
+        uniquekey = 'terms',
+        fields = ('terms', 'n'),
+        data = imported_ngrams_dbdata,
+        do_stats = True
+    )
+    del imported_ngrams_dbdata
+
+    # loop on old ngrams and create direct mapping old_id => new_id
+    old_to_new_id_map = {}
+    for term, oldid in imported_ngrams_oldids.items():
+        old_to_new_id_map[oldid] = new_ngrams_ids[term]
+    del new_ngrams_ids
+    del imported_ngrams_oldids
+
+    # print(old_to_new_id_map)
+    # print(import_nodes_ngrams)
+    # ======== Import into lists =========
+
+    # 3 x abstract lists + 1 translations
+    result = {
+         'map':  UnweightedList(),
+         'main': UnweightedList(),
+         'stop': UnweightedList(),
+         'groupings' : Translations()
+         }
+
+    for list_type in import_nodes_ngrams:
+        for old_id in import_nodes_ngrams[list_type]:
+            new_id = old_to_new_id_map[old_id]
+            # add to the abstract list
+            result[list_type].items.add(new_id)
+
+        # for main also add map elements
+        if list_type == 'main':
+            for old_id in import_nodes_ngrams['map']:
+                new_id = old_to_new_id_map[old_id]
+                result['main'].items.add(new_id)
+
+    # ======== Synonyms =========
+    for (x,y) in imported_groupings:
+        if (x not in ignored_oldids) and (y not in ignored_oldids):
+            new_mainform_id = old_to_new_id_map[x]
+            new_subform_id  = old_to_new_id_map[y]
+
+            # /!\ Translations use (subform => mainform) order
+            result['groupings'].items[new_subform_id] = new_mainform_id
+            n_group_relations += 1
+
+    # ------------------------------------------------------------------
+    print("IMPORT: read %i lines from the CSV" % n_read_lines)
+    print("IMPORT: read %i terms (%i added and %i already existing)"
+                % (n_total_ng, n_added_ng, n_total_ng-n_added_ng) )
+    print("IMPORT: read %i grouping relations" % n_group_relations)
+
+    return result
+
+
+
+def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
+    """
+    Integrates an external terms table to the current one:
+       - merges groups (using group_union() function)
+       - resolves conflicts if terms belong in different lists
+          > map wins over both other types
+          > main wins over stop
+          > stop never wins
+
+    @param new_lists:     a dict of *new* imported lists with format:
+                                {'stop':     UnweightedList,
+                                 'main':     UnweightedList,
+                                 'map':      UnweightedList,
+                                 'groupings': Translations }
+
+    @param onto_corpus:   a corpus node to get the *old* lists
+
+    @param del_originals: an array of original wordlists to ignore
+                          and delete during the merge
+                          possible values : ['stop','main','map']
+
+            par exemple
+            del_originals = ['stop','main'] => effacera la stoplist
+                                                 et la mainlist
+                                          mais pas la maplist qui sera fusionnée
+                                         (les éléments de la map list
+                                          seront remis dans la main à la fin)
+
+    NB: Uses group_tools.group_union() to merge the synonym links.
+
+    FIXME: new terms created at import_ngramlists() can now be added to lists
+           but are never added to docs
+    """
+
+    # log to send back to client-side (lines will be joined)
+    my_log = []
+
+    # the tgt node arg has to be a corpus here
+    if not hasattr(onto_corpus, "typename") or onto_corpus.typename != "CORPUS":
+        raise TypeError("IMPORT: 'onto_corpus' argument must be a Corpus Node")
+
+    # for stats
+    added_nd_ng = 0   # number of added list elements
+
+
+    # our list shortcuts will be 0,1,2 (aka lid)
+    # by order of precedence
+    linfos = [
+       {'key': 'stop', 'name':"STOPLIST"},    # lid = 0
+       {'key': 'main', 'name':"MAINLIST"},    # lid = 1
+       {'key': 'map',  'name':"MAPLIST"}      # lid = 2
+    ]
+
+    # ======== Get the old lists =========
+    old_lists = {}
+
+    # DB nodes stored with same indices 0,1,2 (resp. stop, miam and map)
+    # find target ids of the list node objects
+    tgt_nodeids = [
+                    onto_corpus.children("STOPLIST").first().id,
+                    onto_corpus.children("MAINLIST").first().id,
+                    onto_corpus.children("MAPLIST").first().id
+                ]
+
+    old_group_id = onto_corpus.children("GROUPLIST").first().id
+
+    # retrieve old data into old_lists[list_type]...
+    # ----------------------------------------------
+    for lid, linfo in enumerate(linfos):
+        list_type = linfo['key']
+        if list_type not in del_originals:
+
+            # NB can't use UnweightedList(tgt_nodeids[lid])
+            # because we need to include out-of-list subforms
+            list_ngrams_q  = query_list(tgt_nodeids[lid],
+                                        groupings_id=old_group_id)
+            old_lists[list_type] = UnweightedList(list_ngrams_q.all())
+        else:
+            # ...or use empty objects if replacing old list
+            # ----------------------------------------------
+            old_lists[list_type] = UnweightedList()
+            msg = "MERGE: ignoring old %s which will be overwritten" % linfo['name']
+            print(msg)
+            my_log.append(msg)
+
+    # ======== Merging all involved ngrams =========
+
+    # all memberships with resolved conflicts of interfering memberships
+    resolved_memberships = {}
+
+    for list_set in [old_lists, new_lists]:
+        for lid, info in enumerate(linfos):
+            list_type = info['key']
+            # we use the fact that lids are ordered ints...
+            for ng_id in list_set[list_type].items:
+                if ng_id not in resolved_memberships:
+                    resolved_memberships[ng_id] = lid
+                else:
+                    # ...now resolving is simply taking the max
+                    # stop < main < map
+                    resolved_memberships[ng_id] = max(
+                                                    lid,
+                                                    resolved_memberships[ng_id]
+                                                    )
+            # now each ngram is only in its most important list
+            # -------------------------------------------------
+            # NB temporarily map items are not in main anymore
+            #    but we'll copy it at the end
+            # NB temporarily all subforms were treated separately
+            #    from mainforms but we'll force them into same list
+            #    after we merge the groups
+
+    del old_lists
+    del new_lists['stop']
+    del new_lists['main']
+    del new_lists['map']
+
+    # ======== Merging old and new groups =========
+    # get the arcs already in the target DB (directed couples)
+    previous_links = session.query(
+       NodeNgramNgram.ngram1_id,
+       NodeNgramNgram.ngram2_id
+      ).filter(
+         NodeNgramNgram.node_id == old_group_id
+       ).all()
+
+    n_links_previous = len(previous_links)
+
+    # same format for the new arcs (Translations ~~~> array of couples)
+    translated_imported_links = []
+    add_link = translated_imported_links.append
+    n_links_added = 0
+    for (y,x) in new_lists['groupings'].items.items():
+        add_link((x,y))
+        n_links_added += 1
+    del new_lists
+
+    # group_union: joins 2 different synonym-links lists into 1 new list
+    new_links = group_union(previous_links, translated_imported_links)
+    del previous_links
+    del translated_imported_links
+
+    n_links_after = len(new_links)
+
+    merged_group = Translations([(y,x) for (x,y) in new_links])
+    del new_links
+
+    # ======== Overwrite old data with new =========
+
+    merged_group.save(old_group_id)
+
+    msg = "MERGE: groupings %i updated (links before/added/after: %i/%i/%i)" % (old_group_id, n_links_previous, n_links_added, n_links_after)
+    my_log.append(msg)
+    print(msg)
+
+    # ======== Target list(s) append data =========
+    # if list 2 => write in both tgt_data_lists [1,2]
+    # lists 0 or 1 => straightforward targets [0] or [1]
+
+    merged_results = {
+        'stop': UnweightedList(),
+        'main': UnweightedList(),
+        'map':  UnweightedList()
+    }
+
+    for (ng_id, winner_lid) in resolved_memberships.items():
+
+        ## 1) using the new groups
+        # normal case if not a subform
+        if ng_id not in merged_group.items:
+            target_lid = winner_lid
+        # inherit case if is a subform
+        else:
+            mainform_id = merged_group.items[ng_id]
+            # inherited winner
+            try:
+                target_lid = resolved_memberships[mainform_id]
+            except KeyError:
+                target_lid = winner_lid
+                print("MERGE: WARN ng_id %i has incorrect mainform %i ?" % (ng_id, mainform_id))
+
+        ## 2) map => map + main
+        if target_lid == 2:
+            todo_lids = [1,2]
+        else:
+            todo_lids = [target_lid]
+
+        ## 3) storage
+        for lid in todo_lids:
+            list_type = linfos[lid]['key']
+            merged_results[list_type].items.add(ng_id)
+
+    # print("IMPORT: added %i elements in the lists indices" % added_nd_ng)
+
+    # ======== Overwrite old data with new =========
+    for lid, info in enumerate(linfos):
+        tgt_id = tgt_nodeids[lid]
+        list_type = info['key']
+        result = merged_results[list_type]
+        result.save(tgt_id)
+
+        msg = "MERGE: %s %i updated (new size: %i)" % (info['name'],tgt_id, len(merged_results[list_type].items))
+        my_log.append(msg)
+        print(msg)
+
+    # return a log
+    return("\n".join(my_log))
--- a/gargantext/util/toolchain/ngrams_extraction.py
+++ b/gargantext/util/toolchain/ngrams_extraction.py
@@ -9,6 +9,9 @@ from re          import sub
 from gargantext.util.scheduling import scheduled

 def _integrate_associations(nodes_ngrams_count, ngrams_data, db, cursor):
+    """
+    @param ngrams_data   a set like {('single word', 2), ('apple', 1),...}
+    """
    print('INTEGRATE')
    # integrate ngrams
    ngrams_ids = bulk_insert_ifnotexists(

--- a/gargantext/views/api/ngramlists.py
+++ b/gargantext/views/api/ngramlists.py
@@ -8,118 +8,88 @@ API views for advanced operations on ngrams and ngramlists
 """

 from gargantext.util.http     import APIView, get_parameters, JsonHttpResponse,\
-                                     ValidationException, Http404
-from gargantext.util.db       import session, aliased, desc, bulk_insert
+                                     ValidationException, Http404, HttpResponse
+from gargantext.util.db       import session, aliased, bulk_insert
 from gargantext.util.db_cache import cache
 from sqlalchemy               import tuple_
 from gargantext.models        import Ngram, NodeNgram, NodeNodeNgram, NodeNgramNgram
 from gargantext.util.lists    import UnweightedList, Translations

+# useful subroutines
+from gargantext.util.ngramlists_tools import query_list, export_ngramlists, \
+                                             import_ngramlists, merge_ngramlists
+from gargantext.util.group_tools      import query_grouped_ngrams

-def _query_list(list_id,
-                pagination_limit=None, pagination_offset=None,
-                details=False, scoring_metric_id=None
-                ):
+
+class List(APIView):
+    """
+    see already available API query api/nodes/<list_id>?fields[]=ngrams
    """
-    Paginated listing of ngram_ids in a NodeNgram lists.
+    pass

-    Works for a mainlist or stoplist or maplist (not grouplists!)

-    Parameter:
-      - pagination_limit, pagination_offset
-      - details: if False, send just the array of ngram_ids
-                 if True, send triples with (ngram_id, term, scoring)
-                                                             ^^^^^^^
-      - scoring_metric_id: id of a scoring metric node   (TFIDF or OCCS)
-                           (for details and sorting)
+class CSVLists(APIView):
    """
-    if not details:
-        # simple contents
-        query = session.query(NodeNgram.ngram_id).filter(NodeNgram.node_id == list_id)
-    else:
-        # detailed contents (terms and some NodeNodeNgram for score)
-
-        # NB: score can be undefined (eg ex-subform that now became free)
-        #     ==> we need outerjoin
-        #     and the filter needs to have scoring_metric_id so we do it before
-        ScoresTable = (session
-                        .query(NodeNodeNgram.score, NodeNodeNgram.ngram_id)
-                        .filter(NodeNodeNgram.node1_id == scoring_metric_id)
-                        .subquery()
-                        )
-
-        query = (session
-                    .query(
-                        NodeNgram.ngram_id,
-                        Ngram.terms,
-                        ScoresTable.c.score
-                     )
-                    .join(Ngram, NodeNgram.ngram_id == Ngram.id)
-
-                    # main filter ----------------------
-                    .filter(NodeNgram.node_id == list_id)
-
-                    # scores if possible
-                    .outerjoin(ScoresTable,
-                               ScoresTable.c.ngram_id == NodeNgram.ngram_id)
-
-                    .order_by(desc(ScoresTable.c.score))
-                )
+    For CSV exports of all lists of a corpus

-    if pagination_limit:
-        query = query.limit(pagination_limit)
+    Or CSV import into existing lists as "patch"
+    """
+    def get(self, request):
+        params = get_parameters(request)
+        corpus_id = int(params.pop("corpus"))
+        corpus_node = cache.Node[corpus_id]

-    if pagination_offset:
-        query = query.offset(pagination_offsets)
+        # response is file-like + headers
+        response = HttpResponse(content_type='text/csv')
+        response['Content-Disposition'] = 'attachment; filename="corpus-%i_gargantext_term_list.csv"' % corpus_id

-    return query
+        # fill the response with the data
+        export_ngramlists(corpus_node, fname=response, titles=True)
+        return response

+    def post(self,request):
+        """
+        Merge the lists of a corpus with other lists from a CSV source
+                                                 or from another corpus

+        params in request.GET:
+            corpus:    the corpus whose lists are getting patched

+        params in request.FILES:
+            csvsource: the csv file

-def _query_grouped_ngrams(groupings_id, details=False, scoring_metric_id=None):
-    """
-    Listing of "hidden" ngram_ids from the groups
+        or in get
+            dbsource:  another corpus instead of the csvfile
+                       (? this last option should perhaps not be in CSVLists ?)

-    Works only for grouplists
+        NB: not using PATCH because we'll need POST file upload

-    Parameter:
-      - details: if False, send just the array of ngram_ids
-                 if True, send triples with (ngram_id, term, scoring)
-                                                             ^^^^^^^

-      deprecated: scoring_metric_id: id of a scoring metric node   (TFIDF or OCCS)
-                           (for details and sorting)
-                   (no more OCCS counts of subforms)
-    """
-    if not details:
-        # simple contents
-        query = session.query(NodeNgramNgram.ngram2_id)
-    else:
-        # detailed contents (terms and some NodeNodeNgram for score)
-        query = (session
-                    .query(
-                        NodeNgramNgram.ngram2_id,
-                        Ngram.terms,
-                        # NodeNodeNgram.score           #
-                     )
-                    .join(Ngram, NodeNgramNgram.ngram2_id == Ngram.id)
-                    # .join(NodeNodeNgram, NodeNgramNgram.ngram2_id == NodeNodeNgram.ngram_id)
-                    # .filter(NodeNodeNgram.node1_id == scoring_metric_id)
-                    # .order_by(desc(NodeNodeNgram.score))
-                )
+        /!\ We assume we checked the file size client-side before upload

-    # main filter
-    # -----------
-    query = query.filter(NodeNgramNgram.node_id == groupings_id)
+        £TODO check authentication and user.id
+        """
+        # this time the corpus param is the one with the target lists to be patched
+        params = get_parameters(request)
+        corpus_id = int(params.pop("onto_corpus"))
+        corpus_node = cache.Node[corpus_id]

-    return query
+        # request also contains the file
+        # csv_file has type django.core.files.uploadedfile.InMemoryUploadedFile
+        #                                                 ----------------------
+        csv_file = request.data['csvfile']
+
+        # import the csv
+        new_lists = import_ngramlists(csv_file)
+        del csv_file
+
+        # merge the new_lists onto those of the target corpus
+        log_msg = merge_ngramlists(new_lists, onto_corpus=corpus_node)
+
+        return JsonHttpResponse({
+            'log': log_msg,
+            }, 200)

-class List(APIView):
-    """
-    see already available API query api/nodes/<list_id>?fields[]=ngrams
-    """
-    pass


 class GroupChange(APIView):
@@ -441,7 +411,7 @@ class MapListGlance(APIView):
        listmembers = {'maplist':[]}         # ngram ids sorted per list name

        # infos for all ngrams from maplist
-        map_ngrams = _query_list(maplist_id, details=True,
+        map_ngrams = query_list(maplist_id, details=True,
                                      scoring_metric_id= scores_id).all()

        # ex:  [(8805, 'mean age', 4.0),
@@ -566,25 +536,25 @@ class ListFamily(APIView):
        if "head" in parameters:
            # head <=> only mainlist AND only k top ngrams
            glance_limit = int(parameters['head'])
-            mainlist_query = _query_list(mainlist_id, details=True,
+            mainlist_query = query_list(mainlist_id, details=True,
                                          pagination_limit = glance_limit,
                                          scoring_metric_id= scores_id)
        else:
            # infos for all ngrams from mainlist
-            mainlist_query = _query_list(mainlist_id, details=True,
+            mainlist_query = query_list(mainlist_id, details=True,
                                          scoring_metric_id= scores_id)
            # infos for grouped ngrams, absent from mainlist
-            hidden_ngrams_query = _query_grouped_ngrams(groups_id, details=True,
+            hidden_ngrams_query = query_grouped_ngrams(groups_id, details=True,
                                          scoring_metric_id= scores_id)

            # infos for stoplist terms, absent from mainlist
-            stop_ngrams_query = _query_list(other_list_ids['stoplist'], details=True,
+            stop_ngrams_query = query_list(other_list_ids['stoplist'], details=True,
                                            scoring_metric_id=scores_id)

            # and for the other lists (stop and map)
            # no details needed here, just the member ids
            for li in other_list_ids:
-                li_elts = _query_list(other_list_ids[li], details=False
+                li_elts = query_list(other_list_ids[li], details=False
                                      ).all()
                # simple array of ngram_ids
                listmembers[li] = [ng[0] for ng in li_elts]

--- a/gargantext/views/api/urls.py
+++ b/gargantext/views/api/urls.py
@@ -27,6 +27,15 @@ urlpatterns = [ url(r'^nodes$'                , nodes.NodeListResource.as_view()
                #                     \
                #                   corpus id

+              , url(r'^ngramlists/export$', ngramlists.CSVLists.as_view()            )
+                # get a CSV export of the ngramlists of a corpus
+                #  ex: GET ngramlists/export?corpus=43
+                #  TODO : unify to a /api/ngrams?formatted=csv
+                #        (similar to /api/nodes?formatted=csv)
+
+              , url(r'^ngramlists/import$', ngramlists.CSVLists.as_view()            )
+                # same handling class as export (CSVLists)
+                # but this route used only for POST + file

              , url(r'^ngramlists/change$', ngramlists.ListChange.as_view()          )
                # add or remove ngram from a list

--- a/gargantext/views/pages/terms.py
+++ b/gargantext/views/pages/terms.py
@@ -33,6 +33,9 @@ def ngramtable(request, project_id, corpus_id):
            'project': project,
            'corpus' : corpus,
            'resourcename' : resourcename(corpus),
-            'view': 'terms'
+            'view': 'terms',
+
+            # for the CSV import modal
+            'csvimportroute': "/api/ngramlists/import?onto_corpus=%i"% corpus.id
        },
    )
--- a/install/python/requirements.txt
+++ b/install/python/requirements.txt
@@ -11,6 +11,7 @@ django-pgfields==1.4.4
 django-pgjsonb==0.0.16
 djangorestframework==3.3.2
 html5lib==0.9999999
+python-igraph>=0.7.1
 jdatetime==1.7.2
 kombu==3.0.33                  # messaging
 nltk==3.1

--- a/static/lib/gargantext/menu.css
+++ b/static/lib/gargantext/menu.css
@@ -19,3 +19,13 @@
    line-height: .85;
    margin-bottom: -5px;
 }
+
+.exportbtn {
+    /* border: 1px solid #333 ; */
+    margin-top:17px ;   /* valigns with bootstrap h2  */
+}
+
+.btn .glyphicon {
+    /* glyphicons are always rendered too high within bootstrap buttons */
+    vertical-align:middle
+}
--- a/templates/pages/corpora/terms.html
+++ b/templates/pages/corpora/terms.html
@@ -72,6 +72,15 @@
                          <button id="Save_All" class="btn btn-muted" disabled style="font-size:120%">
                              <b>Save all changes</b>
                          </button>
+                          <br/>
+                          <br/>
+                          <!-- import icon -->
+                          <span class="needsaveicon glyphicon glyphicon-import"></span>
+                          &nbsp;
+                          <button id="ImportList" class="btn btn-warning" style="font-size:120%"
+                                   onclick="$('#csvimport').modal('show');">
+                              <b>Import a Termlist</b>
+                          </button>
                      </div>
                        <!-- see in javascript function queries.functions['my_state_filter'] -->
                        <div class="pull-right" style="margin-top:2.1em;padding-left:1em;">
@@ -107,25 +116,110 @@
                </div> <!-- /div panel -->

        </div> <!-- /jumbotron -->
-    <!--
-    <button id="ImportList" onclick="GetUserPortfolio();" class="btn btn-warning">
-        Import a Corpus-List
-    </button>
-->
-<!--</div> This div is closed in the menu !-->
-

+<!--</div> This div is closed in the menu !-->


-    <!--
-    # stub to import a list (aka orange button)
-    <button id="ImportList" onclick="GetUserPortfolio();" class="btn btn-warning">Import a Corpus-List</button>
-    -->

+<div class="modal" aria-hidden="true" id="csvimport">
+    <div class="modal-dialog">
+    <div class="modal-content">
+    <div class="modal-header">
+     <button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>
+     <h3 id="myModalLabel">Import a CSV term list</h3>
+    </div>
+    <div class="modal-body" id="uploadform">
+        <form id="csvimportform"
+              onsubmit="return postCSV(event)"
+              enctype="multipart/form-data"
+              method="post">
+            {% csrf_token %}
+            <label>From your disk:</label>
+            <input type="file" id="csvfile" accept="text/csv">
+            <br/>
+            <label>From another corpus:</label>
+            <p>TODO</p>
+            <br/>
+            <input type="submit" class="btn btn-xs btn-info" id="csvsubmit" value="Submit" />
+        </form>
+    </div>
+    <div class="modal-footer" id="formanswer"></div>
+    </div>
+    </div>
+</div>

 <script type="text/javascript" src="{% static "lib/jquery/dynatable/jquery.dynatable.js" %}"></script>
 <!-- custom-lib for dynatable.js and dc.js -->
 <script type="text/javascript" src="{% static "lib/gargantext/NGrams_dyna_chart_and_table.js" %}"></script>

+<script type="text/javascript">
+
+/*                                             merci c24b !
+ * Uses csvimportroute variable from the django template
+ * Ex: /api/ngramlists/import?onto_corpus=corpus_id
+ *
+ * Uses input#csvfile as source data.
+ */
+function postCSV(e){
+    // don't do page reload of usual submits
+    e.preventDefault()
+
+    // 2MB ≈ 70000 ngrams
+    var max_size = 2097152
+
+    // we take it straight from the input element
+    theFile = $('input#csvfile')[0].files[0]
+
+    // debug
+    // console.log(theFile.name, "size", theFile.size, theFile.lastModifiedDate)
+
+    if (! theFile) {
+        console.warn('Ignoring "submit": no provided file')
+        return false
+    }
+    else if (theFile.size > max_size) {
+        console.warn('Ignoring "submit": file is too big')
+        $('#formanswer').html(
+            'The import failed: your file is too big ('+max_size/1024+'kB max).'
+        );
+        return false
+    }
+    // normal case
+    else {
+        // append into an empty form (or fixme: initialize it using form element)
+        var myFileFormData = new FormData();
+        myFileFormData.append("csvfile", theFile)
+
+        //postCorpusFile
+        $.ajax({
+             url: "{{csvimportroute | safe}}",
+             type: 'POST',
+             async: true,
+             contentType: false,
+             processData: false,
+             data: myFileFormData,
+             beforeSend: function(xhr) {
+               xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
+             },
+             success: function(response) {
+                 my_html  = "<h2 color='green'>IMPORT OK ! </h2>"
+                 my_html += "<p class='note'>" + response['log'].replace(/\n/g, '<br/>') + "</p>"
+                 my_html += "<p'>(this page will reload in 3s)</p>"
+                 $('#formanswer').html(my_html);
+                 console.log(response) ;
+                 // reload after 3s
+                 setTimeout("location.reload(true)", 3000);
+                 },
+              error: function(result) {
+                  $('#formanswer').html('Erreur');
+                  console.error(result);
+                 },
+            });
+        $('#formanswer').html('CSV import in Progress');
+    }
+};
+</script>
+
+

 {% endblock %}
--- a/templates/pages/menu.html
+++ b/templates/pages/menu.html
@@ -41,7 +41,7 @@
                        {% if corpus %}
                        <li><a href="/projects/{{project.id}}/corpora/{{corpus.id}}">
                                <span class="glyphicon glyphicon-file" aria-hidden="true"></span>
-                                {{corpus.name | truncatechars:15}}
+                                {{corpus.name | truncatechars:25}}
                            </a>
                        </li>
                        {% endif %}
@@ -150,12 +150,32 @@
                            <br>
                            <br>
                                <div class="row">
-                                    <h3>
-                                        <a href="/projects/{{project.id}}">
-                                            <span class="glyphicon glyphicon-book" aria-hidden="true"></span>
-                                            {{ project.name | truncatechars:50}}
+                                    <div class="col-md-6">
+                                        <h3>
+                                            <a href="/projects/{{project.id}}">
+                                                <span class="glyphicon glyphicon-book" aria-hidden="true"></span>
+                                                {{ project.name | truncatechars:50}}
+                                            </a>
+                                        </h3>
+                                    </div>
+                                    <!-- export button -->
+                                    <div class="col-md-6">
+                                        {% if view == 'terms'  %}
+                                        <a class="btn btn-primary exportbtn pull-right" role="button"
+                                            href="/api/ngramlists/export?corpus={{corpus.id}}"
+                                            title="Export terms table in CSV">
+                                            Export terms table &nbsp; <span class="glyphicon glyphicon-download" aria-hidden="true"></span>
+                                        </a>
+                                        {% elif view == 'titles'  %}
+                                        <a class="btn btn-primary exportbtn pull-right" role="button"
+                                            href="/api/nodes?parent_id={{corpus.id}}&types[]=DOCUMENT&pagination_limit=100000&formated=csv"
+                                            title="Export full corpus in CSV">
+                                            Export corpus &nbsp; <span class="glyphicon glyphicon-download" aria-hidden="true"></span>
                                        </a>
-                                    </h3>
+                                        {% else  %}
+                                        <!-- TODO export journal table -->
+                                        {% endif %}
+                                    </div>
                                </div>
                                <div class="row">
                                    <div class="col-md-1">
@@ -167,10 +187,7 @@
                                            </h3>
                                            <h3>
                                                <span class="glyphicon glyphicon-file" aria-hidden="true"></span>
-                                                {{ corpus.name | truncatechars:20 }}
-                                                <a class="btn btn-primary" role="button" href="/api/nodes?parent_id={{corpus.id}}&types[]=DOCUMENT&pagination_limit=100000&formated=csv">
-                                                    <span class="glyphicon glyphicon-download" aria-hidden="true"></span>
-                                                </a>
+                                                {{ corpus.name | truncatechars:30 }}
                                            </h3>

                                    </div>