Commit c5bd5c56 authored by Romain Loth's avatar Romain Loth

ngrams reindexing: add timer (diagnostic: my functionis too slow, negative...

ngrams reindexing: add timer (diagnostic: my functionis too slow, negative effect on list importing and free annotation add)
parent 9073a0ba
...@@ -23,6 +23,8 @@ from gargantext.util.db import bulk_insert_ifnotexists # £TODO debug ...@@ -23,6 +23,8 @@ from gargantext.util.db import bulk_insert_ifnotexists # £TODO debug
from sqlalchemy import distinct from sqlalchemy import distinct
from re import findall, IGNORECASE from re import findall, IGNORECASE
from gargantext.util.toolchain.main import t # timer
# TODO from gargantext.constants import LIST_OF_KEYS_TO_INDEX = title, abstract # TODO from gargantext.constants import LIST_OF_KEYS_TO_INDEX = title, abstract
def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )): def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
...@@ -40,6 +42,8 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )): ...@@ -40,6 +42,8 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
@param corpus: the CORPUS node @param corpus: the CORPUS node
@param keys: the hyperdata fields to index @param keys: the hyperdata fields to index
# FIXME too slow: index_new_ngrams should be faster via tsvector on DB
""" """
# retrieve *all* the ngrams from our list # retrieve *all* the ngrams from our list
...@@ -56,7 +60,11 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )): ...@@ -56,7 +60,11 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
node_ngram_to_write = {} node_ngram_to_write = {}
# loop throught the docs and their text fields # loop throught the docs and their text fields
for doc in corpus.children('DOCUMENT'): for (i, doc) in enumerate(corpus.children('DOCUMENT')):
if (i % 100 == 0):
print('CORPUS #%d: [%s] ngrams_addition: doc %i' % (corpus.id, t(), i))
print()
# a new empty counting subdict # a new empty counting subdict
node_ngram_to_write[doc.id] = {} node_ngram_to_write[doc.id] = {}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment