filter terms that became empty after normalization + esthetics

63ec1b5c · Romain Loth · a77ea0cf · 63ec1b5c · 63ec1b5c · 63ec1b5c
Commit 63ec1b5c authored Mar 31, 2016 by Romain Loth
Showing with 9 additions and 7 deletions

ngram_coocs.py gargantext/util/toolchain/ngram_coocs.py +4 -3

ngrams_extraction.py gargantext/util/toolchain/ngrams_extraction.py +3 -2

terms.py gargantext/views/pages/terms.py +2 -2

No files found.
--- a/gargantext/util/toolchain/ngram_coocs.py
+++ b/gargantext/util/toolchain/ngram_coocs.py
@@ -66,6 +66,7 @@ def compute_coocs(corpus,

    """

+        #   - TODO add grouped element's values in grouping 'chief ngram'
        #   - TODO cvalue_id: allow a metric as additional  input filter
        #   - TODO n_min, n_max : filter on Ngram.n (aka length of ngram)
        #   - TODO start, end : filter on document date
@@ -159,9 +160,9 @@ def compute_coocs(corpus,
    matrix = WeightedMatrix(coocs_query.all())

    # fyi
-    # shape_0 = len({pair[0] for pair in matrix.items})
-    # shape_1 = len({pair[1] for pair in matrix.items})
-    # print("COOCS: NEW matrix shape [%ix%i]" % (shape_0, shape_1))
+    shape_0 = len({pair[0] for pair in matrix.items})
+    shape_1 = len({pair[1] for pair in matrix.items})
+    print("COOCS: NEW matrix shape [%ix%i]" % (shape_0, shape_1))

    # 5) SAVE
    # --------

--- a/gargantext/util/toolchain/ngrams_extraction.py
+++ b/gargantext/util/toolchain/ngrams_extraction.py
@@ -63,8 +63,9 @@ def extract_ngrams(corpus, keys=('title', 'abstract', )):
                for ngram in ngramsextractor.extract(value):
                    tokens = tuple(token[0] for token in ngram)
                    terms = normalize_terms(' '.join(tokens))
-                    nodes_ngrams_count[(document.id, terms)] += 1
-                    ngrams_data.add((terms[:255], len(tokens), ))
+                    if len(terms) > 1:
+                        nodes_ngrams_count[(document.id, terms)] += 1
+                        ngrams_data.add((terms[:255], len(tokens), ))
            # integrate ngrams and nodes-ngrams
            if len(nodes_ngrams_count) >= BATCH_NGRAMSEXTRACTION_SIZE:
                _integrate_associations(nodes_ngrams_count, ngrams_data, db, cursor)

--- a/gargantext/views/pages/terms.py
+++ b/gargantext/views/pages/terms.py
@@ -11,7 +11,7 @@ def ngramtable(request, project_id, corpus_id):
       => maplist and mainlist terms in a table
          with groupings as '+' nodes
       => uses API GET batch of lists
-       => uses API PUT/DEL for list modifications (TODO)
+       => uses API PUT/DEL for list modifications
       => uses frontend AJAX through Ngrams_dyna_charts_and_table.js
    # TODO refactor Ngrams_dyna_charts_and_table.js
    '''
@@ -21,7 +21,7 @@ def ngramtable(request, project_id, corpus_id):
    # and the project just for project.id in corpusBannerTop
    project = cache.Node[project_id]

-    # rendered page : journals.html
+    # rendered page : terms.html
    return render(
        template_name = 'pages/corpora/terms.html',
        request = request,