Commit 6b6937af authored by Romain Loth's avatar Romain Loth

fix bulk_insert error on associations: new solution mixing first idea about...

fix bulk_insert error on associations: new solution mixing first idea about remainder of loop and new idea about one 'for' loop too much on the languages
parent 820df253
......@@ -51,9 +51,10 @@ def extract_ngrams(corpus, keys=DEFAULT_INDEX_FIELDS, do_subngrams = DEFAULT_IND
docs = [doc for doc in corpus.children('DOCUMENT') if doc.id not in corpus.skipped_docs]
tagger_bots = {lang: load_tagger(lang)() for lang in corpus.languages if lang != "__skipped__"}
#sort docs by lang?
for lang, tagger in tagger_bots.items():
# for lang, tagger in tagger_bots.items():
for documents_count, document in enumerate(docs):
language_iso2 = document.hyperdata.get('language_iso2', lang)
language_iso2 = document.hyperdata.get('language_iso2')
tagger = tagger_bots[language_iso2]
#print(language_iso2)
for key in keys:
try:
......@@ -97,9 +98,13 @@ def extract_ngrams(corpus, keys=DEFAULT_INDEX_FIELDS, do_subngrams = DEFAULT_IND
corpus.save_hyperdata()
session.add(corpus)
session.commit()
else:
# integrate ngrams and nodes-ngrams
# integrate ngrams and nodes-ngrams (le reste)
if len(nodes_ngrams_count) > 0:
_integrate_associations(nodes_ngrams_count, ngrams_data, db, cursor)
nodes_ngrams_count.clear()
ngrams_data.clear()
corpus.status('Ngrams', progress=documents_count+1, complete=True)
corpus.save_hyperdata()
session.commit()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment