Merge remote-tracking branch 'origin/romain-goodies' into unstable

42cd1ab2 · delanoe · 85eb2e65 · 25d1941e · 42cd1ab2 · 42cd1ab2
Commit 42cd1ab2 authored Jul 01, 2016 by delanoe
Showing with 13 additions and 6 deletions

ngrams_extraction.py gargantext/util/toolchain/ngrams_extraction.py +5 -2

parsing.py gargantext/util/toolchain/parsing.py +6 -3

NGrams_dyna_chart_and_table.js static/lib/gargantext/NGrams_dyna_chart_and_table.js +2 -1

No files found.
--- a/gargantext/util/toolchain/ngrams_extraction.py
+++ b/gargantext/util/toolchain/ngrams_extraction.py
@@ -127,12 +127,15 @@ def normalize_terms(term_str, do_lowercase=DEFAULT_ALL_LOWERCASE_FLAG):

    (benefits from normalize_chars upstream so there's less cases to consider)
    """
-    term_str = sub(r'^[-",;/%(){}\\\[\]\.\']+', '', term_str)
-    term_str = sub(r'[-",;/%(){}\\\[\]\.\']+$', '', term_str)
+    # print('normalize_terms  IN: "%s"' % term_str)
+    term_str = sub(r'^[-",;/%(){}\\\[\]\.\' ]+', '', term_str)
+    term_str = sub(r'[-",;/%(){}\\\[\]\.\' ]+$', '', term_str)

    if do_lowercase:
        term_str = term_str.lower()

+    # print('normalize_terms OUT: "%s"' % term_str)
+
    return term_str



--- a/gargantext/util/toolchain/parsing.py
+++ b/gargantext/util/toolchain/parsing.py
@@ -8,7 +8,7 @@ from re          import sub
 def parse(corpus):
    try:
        documents_count = 0
-        
+
        corpus.status('Docs', progress=0)

        # will gather info about languages
@@ -86,6 +86,7 @@ def normalize_chars(my_str):
       ou passer en lowercase, seront à placer plutôt *après* le tagger,
            cf. toolchain.ngrams_extraction.normalize_terms)
    """
+    # print('normalize_chars  IN: "%s"' % my_str)
    # --------------
    # E S P A C E S
    # --------------
@@ -121,12 +122,12 @@ def normalize_chars(my_str):
    # Guillemets
    # ----------
    # la plupart des quotes simples --> ' APOSTROPHE
-    my_str = sub(r"‘’‚`‛", "'", my_str) # U+2018 U+2019 U+201a U+201b
+    my_str = sub(r"[‘’‚`‛]", "'", my_str) # U+2018 U+2019 U+201a U+201b
    my_str = sub(r'‹ ?',"'", my_str)    # U+2039 plus espace éventuel après
    my_str = sub(r' ?›',"'", my_str)    # U+203A plus espace éventuel avant

    # la plupart des quotes doubles --> " QUOTATION MARK
-    my_str = sub(r'“”„‟', '"', my_str)  # U+201C U+201D U+201E U+201F
+    my_str = sub(r'[“”„‟]', '"', my_str)  # U+201C U+201D U+201E U+201F
    my_str = sub(r'« ?', '"', my_str)   # U+20AB plus espace éventuel après
    my_str = sub(r' ?»', '"', my_str)   # U+20AB plus espace éventuel avant

@@ -178,4 +179,6 @@ def normalize_chars(my_str):
    my_str = sub(r'Ꜩ', 'Tz', my_str)
    my_str = sub(r'ꜩ', 'tz', my_str)

+    # print('normalize_chars OUT: "%s"' % my_str)
+
    return my_str
--- a/static/lib/gargantext/NGrams_dyna_chart_and_table.js
+++ b/static/lib/gargantext/NGrams_dyna_chart_and_table.js
@@ -1357,7 +1357,7 @@ function SelectPage(boxType, boxElem) {

    // console.log("data became:" + newColumnSelection)

-  $("tbody tr").each(function (i, row) {
+  $("table#my-ajax-table tbody tr").each(function (i, row) {
      var ngramId = $(row).attr("ngram-id") ;

      // a cache to restore previous states if unchecked
@@ -1374,6 +1374,7 @@ function SelectPage(boxType, boxElem) {
          AjaxRecords[ngramId]["state"] = AjaxRecords[ngramId]["state_buff"] ;
          AjaxRecords[ngramId]["state_buff"] = null ;
      }
+
  });

  // OK update this table page