Commit 92d5dfcd authored by Romain Loth's avatar Romain Loth

pointers for listtype adjustments todo before release

parent 1f1e23ce
...@@ -8,18 +8,19 @@ import re ...@@ -8,18 +8,19 @@ import re
LISTTYPES = { LISTTYPES = {
'DOCUMENT' : WeightedList, 'DOCUMENT' : WeightedList,
'GROUPLIST' : Translations, 'GROUPLIST' : Translations, # todo remove "LIST" from name
'STOPLIST' : UnweightedList, 'STOPLIST' : UnweightedList,
'MAINLIST' : UnweightedList, 'MAINLIST' : UnweightedList,
'MAPLIST' : UnweightedList, 'MAPLIST' : UnweightedList,
'SPECIFICITY' : WeightedList, 'SPECIFICITY' : WeightedList,
'OCCURRENCES' : WeightedContextIndex, 'OCCURRENCES' : WeightedIndex, # todo replace by WeightedList
'COOCCURRENCES': WeightedMatrix, 'COOCCURRENCES': WeightedMatrix,
'TFIDF-CORPUS' : WeightedContextIndex, 'TFIDF-CORPUS' : WeightedIndex, # todo split -> WeightedList for ti_rank and WeightedIndex for tfidf
'TFIDF-GLOBAL' : WeightedContextIndex, 'TFIDF-GLOBAL' : WeightedIndex, # todo split -> WeightedList for ti_rank and WeightedIndex for tfidf
} }
NODETYPES = [ NODETYPES = [
# TODO separate id not array index, read by models.node
None, None,
# documents hierarchy # documents hierarchy
'USER', # 1 'USER', # 1
...@@ -40,6 +41,7 @@ NODETYPES = [ ...@@ -40,6 +41,7 @@ NODETYPES = [
'TFIDF-GLOBAL', # 14 'TFIDF-GLOBAL', # 14
# docs subset # docs subset
'FAVORITES' # 15 'FAVORITES' # 15
# TODO add ti RANK
] ]
INDEXED_HYPERDATA = { INDEXED_HYPERDATA = {
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
""" """
__all__ = ['Translations', 'WeightedMatrix', 'UnweightedList', 'WeightedList', 'WeightedContextIndex'] __all__ = ['Translations', 'WeightedMatrix', 'UnweightedList', 'WeightedList', 'WeightedIndex']
from gargantext.util.db import session, bulk_insert from gargantext.util.db import session, bulk_insert
...@@ -165,15 +165,18 @@ class Translations(_BaseClass): ...@@ -165,15 +165,18 @@ class Translations(_BaseClass):
) )
class WeightedContextIndex(_BaseClass): class WeightedIndex(_BaseClass):
""" """
associated model : NodeNodeNgram associated model : NodeNodeNgram
associated columns : node1_id | node2_id | ngram_id | score (float) associated columns : node1_id | node2_id | ngram_id | score (float)
^^^^
reserved for this
object's id
Tensor representing a contextual index or registry Matrix representing a weighted word index across docs or small context nodes
(matrix of weighted ngrams *per* doc *per* context) (matrix of weighted ngrams *per* doc)
Exemple : tfidf by corpus Exemple : tfidf within a corpus
""" """
def __init__(self, source=None): def __init__(self, source=None):
self.items = defaultdict(float) self.items = defaultdict(float)
...@@ -182,7 +185,7 @@ class WeightedContextIndex(_BaseClass): ...@@ -182,7 +185,7 @@ class WeightedContextIndex(_BaseClass):
# ?TODO rename WeightedWordmatrix
class WeightedMatrix(_BaseClass): class WeightedMatrix(_BaseClass):
def __init__(self, source=None): def __init__(self, source=None):
...@@ -294,7 +297,7 @@ class WeightedMatrix(_BaseClass): ...@@ -294,7 +297,7 @@ class WeightedMatrix(_BaseClass):
result.items[key1, key2] = value / sqrt(other.items[key1] * other.items[key2]) result.items[key1, key2] = value / sqrt(other.items[key1] * other.items[key2])
return result return result
# ?TODO rename Wordlist
class UnweightedList(_BaseClass): class UnweightedList(_BaseClass):
def __init__(self, source=None): def __init__(self, source=None):
...@@ -399,6 +402,7 @@ class UnweightedList(_BaseClass): ...@@ -399,6 +402,7 @@ class UnweightedList(_BaseClass):
) )
# ?TODO rename WeightedWordlist
class WeightedList(_BaseClass): class WeightedList(_BaseClass):
def __init__(self, source=None): def __init__(self, source=None):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment