Commit 0f3ecfc8 authored by sim's avatar sim

[FIX] Quick fix: ngrams indexation at start of recount

parent 61f34558
......@@ -62,12 +62,12 @@ def parse_extract_indexhyperdata(corpus):
# apply actions
print('CORPUS #%d' % (corpus.id))
corpus.status('Docs', progress=1)
corpus.save_hyperdata()
session.commit()
parse(corpus)
docs = corpus.children("DOCUMENT").count()
print('CORPUS #%d: parsed %d' % (corpus.id, docs))
extract_ngrams(corpus)
......@@ -242,6 +242,19 @@ def recount(corpus_id):
corpus.save_hyperdata()
session.commit()
# START OF KLUDGE...
from gargantext.models import NodeNgram, DocumentNode
from .ngrams_addition import index_new_ngrams
maplist_id = corpus.children("MAPLIST").first().id
ngram_ids = session.query(NodeNgram.ngram_id.distinct())
indexed_ngrams = ngram_ids.join(DocumentNode).filter(DocumentNode.parent_id==corpus.id)
not_indexed_ngrams = ngram_ids.filter(NodeNgram.node_id==maplist_id,
~NodeNgram.ngram_id.in_(indexed_ngrams))
not_indexed_ngrams = [x[0] for x in not_indexed_ngrams]
added = index_new_ngrams(not_indexed_ngrams, corpus)
print('RECOUNT #%d: [%s] indexed %s ngrams' % (corpus.id, t(), added))
# ...END OF KLUDGE
# -> overwrite occurrences (=> NodeNodeNgram)
occ_id = compute_occs(corpus,
groupings_id = group_id,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment