Commit cd9b6308 authored by Romain Loth's avatar Romain Loth

fix bulk_insert error on associations: new solution mixing first idea about...

fix bulk_insert error on associations: new solution mixing first idea about remainder of loop and new idea about one 'for' loop too much on the languages
parent ab88ba32
...@@ -51,9 +51,10 @@ def extract_ngrams(corpus, keys=DEFAULT_INDEX_FIELDS, do_subngrams = DEFAULT_IND ...@@ -51,9 +51,10 @@ def extract_ngrams(corpus, keys=DEFAULT_INDEX_FIELDS, do_subngrams = DEFAULT_IND
docs = [doc for doc in corpus.children('DOCUMENT') if doc.id not in corpus.skipped_docs] docs = [doc for doc in corpus.children('DOCUMENT') if doc.id not in corpus.skipped_docs]
tagger_bots = {lang: load_tagger(lang)() for lang in corpus.languages if lang != "__skipped__"} tagger_bots = {lang: load_tagger(lang)() for lang in corpus.languages if lang != "__skipped__"}
#sort docs by lang? #sort docs by lang?
for lang, tagger in tagger_bots.items(): # for lang, tagger in tagger_bots.items():
for documents_count, document in enumerate(docs): for documents_count, document in enumerate(docs):
language_iso2 = document.hyperdata.get('language_iso2', lang) language_iso2 = document.hyperdata.get('language_iso2')
tagger = tagger_bots[language_iso2]
#print(language_iso2) #print(language_iso2)
for key in keys: for key in keys:
try: try:
...@@ -97,9 +98,13 @@ def extract_ngrams(corpus, keys=DEFAULT_INDEX_FIELDS, do_subngrams = DEFAULT_IND ...@@ -97,9 +98,13 @@ def extract_ngrams(corpus, keys=DEFAULT_INDEX_FIELDS, do_subngrams = DEFAULT_IND
corpus.save_hyperdata() corpus.save_hyperdata()
session.add(corpus) session.add(corpus)
session.commit() session.commit()
else:
# integrate ngrams and nodes-ngrams # integrate ngrams and nodes-ngrams (le reste)
if len(nodes_ngrams_count) > 0:
_integrate_associations(nodes_ngrams_count, ngrams_data, db, cursor) _integrate_associations(nodes_ngrams_count, ngrams_data, db, cursor)
nodes_ngrams_count.clear()
ngrams_data.clear()
corpus.status('Ngrams', progress=documents_count+1, complete=True) corpus.status('Ngrams', progress=documents_count+1, complete=True)
corpus.save_hyperdata() corpus.save_hyperdata()
session.commit() session.commit()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment