Commit 63ec1b5c authored by Romain Loth's avatar Romain Loth

filter terms that became empty after normalization + esthetics

parent a77ea0cf
......@@ -66,6 +66,7 @@ def compute_coocs(corpus,
"""
# - TODO add grouped element's values in grouping 'chief ngram'
# - TODO cvalue_id: allow a metric as additional input filter
# - TODO n_min, n_max : filter on Ngram.n (aka length of ngram)
# - TODO start, end : filter on document date
......@@ -159,9 +160,9 @@ def compute_coocs(corpus,
matrix = WeightedMatrix(coocs_query.all())
# fyi
# shape_0 = len({pair[0] for pair in matrix.items})
# shape_1 = len({pair[1] for pair in matrix.items})
# print("COOCS: NEW matrix shape [%ix%i]" % (shape_0, shape_1))
shape_0 = len({pair[0] for pair in matrix.items})
shape_1 = len({pair[1] for pair in matrix.items})
print("COOCS: NEW matrix shape [%ix%i]" % (shape_0, shape_1))
# 5) SAVE
# --------
......
......@@ -63,8 +63,9 @@ def extract_ngrams(corpus, keys=('title', 'abstract', )):
for ngram in ngramsextractor.extract(value):
tokens = tuple(token[0] for token in ngram)
terms = normalize_terms(' '.join(tokens))
nodes_ngrams_count[(document.id, terms)] += 1
ngrams_data.add((terms[:255], len(tokens), ))
if len(terms) > 1:
nodes_ngrams_count[(document.id, terms)] += 1
ngrams_data.add((terms[:255], len(tokens), ))
# integrate ngrams and nodes-ngrams
if len(nodes_ngrams_count) >= BATCH_NGRAMSEXTRACTION_SIZE:
_integrate_associations(nodes_ngrams_count, ngrams_data, db, cursor)
......
......@@ -11,7 +11,7 @@ def ngramtable(request, project_id, corpus_id):
=> maplist and mainlist terms in a table
with groupings as '+' nodes
=> uses API GET batch of lists
=> uses API PUT/DEL for list modifications (TODO)
=> uses API PUT/DEL for list modifications
=> uses frontend AJAX through Ngrams_dyna_charts_and_table.js
# TODO refactor Ngrams_dyna_charts_and_table.js
'''
......@@ -21,7 +21,7 @@ def ngramtable(request, project_id, corpus_id):
# and the project just for project.id in corpusBannerTop
project = cache.Node[project_id]
# rendered page : journals.html
# rendered page : terms.html
return render(
template_name = 'pages/corpora/terms.html',
request = request,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment