pointers for listtype adjustments todo before release

92d5dfcd · Romain Loth · 1f1e23ce · 92d5dfcd · 92d5dfcd
Commit 92d5dfcd authored May 19, 2016 by Romain Loth
Hide whitespace changes
Inline Side-by-side

Showing with 17 additions and 11 deletions

constants.py gargantext/constants.py +6 -4

lists.py gargantext/util/lists.py +11 -7

No files found.
--- a/gargantext/constants.py
+++ b/gargantext/constants.py
@@ -8,18 +8,19 @@ import re
 LISTTYPES = {
    'DOCUMENT'     : WeightedList,
-    'GROUPLIST'    : Translations,
+    'GROUPLIST'    : Translations,   # todo remove "LIST" from name
    'STOPLIST'     : UnweightedList,
    'MAINLIST'     : UnweightedList,
    'MAPLIST'      : UnweightedList,
    'SPECIFICITY'  : WeightedList,
-    'OCCURRENCES'  : WeightedContextIndex,
+    'OCCURRENCES'  : WeightedIndex,   # todo replace by WeightedList
    'COOCCURRENCES': WeightedMatrix,
-    'TFIDF-CORPUS' : WeightedContextIndex,
+    'TFIDF-CORPUS' : WeightedIndex,   # todo split -> WeightedList for ti_rank and WeightedIndex for tfidf
-    'TFIDF-GLOBAL' : WeightedContextIndex,
+    'TFIDF-GLOBAL' : WeightedIndex,   # todo split -> WeightedList for ti_rank and WeightedIndex for tfidf
 }
 NODETYPES = [
+    # TODO separate id not array index, read by models.node
    None,
    # documents hierarchy
    'USER',                  # 1
@@ -40,6 +41,7 @@ NODETYPES = [
    'TFIDF-GLOBAL',          # 14
    # docs subset
    'FAVORITES'              # 15
+    # TODO add ti RANK
 ]
 INDEXED_HYPERDATA = {

--- a/gargantext/util/lists.py
+++ b/gargantext/util/lists.py
@@ -2,7 +2,7 @@
 """
-__all__ = ['Translations', 'WeightedMatrix', 'UnweightedList', 'WeightedList', 'WeightedContextIndex']
+__all__ = ['Translations', 'WeightedMatrix', 'UnweightedList', 'WeightedList', 'WeightedIndex']
 from gargantext.util.db import session, bulk_insert
@@ -165,15 +165,18 @@ class Translations(_BaseClass):
        )
-class WeightedContextIndex(_BaseClass):
+class WeightedIndex(_BaseClass):
    """
    associated model   : NodeNodeNgram
    associated columns : node1_id  |  node2_id  |  ngram_id  |  score (float)
+                           ^^^^
+                    reserved for this
+                       object's id
-    Tensor representing a contextual index or registry
+    Matrix representing a weighted word index across docs or small context nodes
-    (matrix of weighted ngrams *per* doc *per* context)
+                   (matrix of weighted ngrams  *per*  doc)
-    Exemple : tfidf by corpus
+    Exemple : tfidf within a corpus
    """
    def __init__(self, source=None):
        self.items = defaultdict(float)
@@ -182,7 +185,7 @@ class WeightedContextIndex(_BaseClass):
+# ?TODO rename WeightedWordmatrix
 class WeightedMatrix(_BaseClass):
    def __init__(self, source=None):
@@ -294,7 +297,7 @@ class WeightedMatrix(_BaseClass):
                result.items[key1, key2] = value / sqrt(other.items[key1] * other.items[key2])
        return result
+# ?TODO rename Wordlist
 class UnweightedList(_BaseClass):
    def __init__(self, source=None):
@@ -399,6 +402,7 @@ class UnweightedList(_BaseClass):
        )
+# ?TODO rename WeightedWordlist
 class WeightedList(_BaseClass):
    def __init__(self, source=None):