Commit 3f2b219d authored by Romain Loth's avatar Romain Loth

[FIX] import termlist: was blocking on empty columns (BUG-7)

parent 2e4037c4
...@@ -462,6 +462,9 @@ def import_ngramlists(fname, delimiter=DEFAULT_CSV_DELIM, ...@@ -462,6 +462,9 @@ def import_ngramlists(fname, delimiter=DEFAULT_CSV_DELIM,
for j, colname in enumerate(csv_row): for j, colname in enumerate(csv_row):
if colname in ['label', 'status', 'forms']: if colname in ['label', 'status', 'forms']:
columns[colname] = j columns[colname] = j
# skip empty columns
elif match(r'^\s*$',colname):
pass
else: else:
raise ValueError('Wrong header "%s" on line %i (only possible headers are "label", "forms" and "status")' % (colname, n_read_lines)) raise ValueError('Wrong header "%s" on line %i (only possible headers are "label", "forms" and "status")' % (colname, n_read_lines))
if 'label' not in columns: if 'label' not in columns:
...@@ -548,7 +551,9 @@ def import_ngramlists(fname, delimiter=DEFAULT_CSV_DELIM, ...@@ -548,7 +551,9 @@ def import_ngramlists(fname, delimiter=DEFAULT_CSV_DELIM,
imported_ngrams_dbdata.append((ngram_str, n_words)) imported_ngrams_dbdata.append((ngram_str, n_words))
# returns a dict {term => id} and a count of inserted ones # returns a dict {term => id} and a count of inserted ones
# -------------------------
(new_ngrams_ids, n_added_ng) = bulk_insert_ifnotexists( (new_ngrams_ids, n_added_ng) = bulk_insert_ifnotexists(
# -------------------------
model = Ngram, model = Ngram,
uniquekey = 'terms', uniquekey = 'terms',
fields = ('terms', 'n'), fields = ('terms', 'n'),
...@@ -612,7 +617,7 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]): ...@@ -612,7 +617,7 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
- resolves conflicts if terms belong in different lists - resolves conflicts if terms belong in different lists
> map wins over both other types > map wins over both other types
> main wins over stop > main wins over stop
> stop never wins > stop never wins £TODO STOP wins over candidates from main
@param new_lists: a dict of *new* imported lists with format: @param new_lists: a dict of *new* imported lists with format:
{'stop': UnweightedList, {'stop': UnweightedList,
...@@ -667,7 +672,10 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]): ...@@ -667,7 +672,10 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
for ng_id in new_lists[list_type].items: for ng_id in new_lists[list_type].items:
collect(ng_id) collect(ng_id)
from gargantext.util.toolchain.main import t
print("MERGE DEBUG: starting index_new_ngrams", t())
n_added = index_new_ngrams(all_possibly_new_ngram_ids, onto_corpus) n_added = index_new_ngrams(all_possibly_new_ngram_ids, onto_corpus)
print("MERGE DEBUG: finished index_new_ngrams", t())
my_log.append("MERGE: added %i new ngram occurrences in docs" % n_added) my_log.append("MERGE: added %i new ngram occurrences in docs" % n_added)
...@@ -677,7 +685,7 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]): ...@@ -677,7 +685,7 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
# DB nodes stored with same indices 0,1,2 (resp. stop, miam and map) # DB nodes stored with same indices 0,1,2 (resp. stop, miam and map)
# find target ids of the list node objects # find target ids of the list node objects
tgt_nodeids = [ tgt_nodeids = [
onto_corpus.children("STOPLIST").first().id, onto_corpus.children("STOPLIST").first().id, # £todo via parent project?
onto_corpus.children("MAINLIST").first().id, onto_corpus.children("MAINLIST").first().id,
onto_corpus.children("MAPLIST").first().id onto_corpus.children("MAPLIST").first().id
] ]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment