Commit 4283baa5 authored by Romain Loth's avatar Romain Loth

FIX add tirank as a separate list type like OCCURRENCES

parent ab4965ee
......@@ -13,10 +13,12 @@ LISTTYPES = {
'MAINLIST' : UnweightedList,
'MAPLIST' : UnweightedList,
'SPECIFICITY' : WeightedList,
'OCCURRENCES' : WeightedIndex, # todo replace by WeightedList
'OCCURRENCES' : WeightedIndex, # could be WeightedList
'COOCCURRENCES': WeightedMatrix,
'TFIDF-CORPUS' : WeightedIndex, # todo split -> WeightedList for ti_rank and WeightedIndex for tfidf
'TFIDF-GLOBAL' : WeightedIndex, # todo split -> WeightedList for ti_rank and WeightedIndex for tfidf
'TFIDF-CORPUS' : WeightedIndex,
'TFIDF-GLOBAL' : WeightedIndex,
'TIRANK-LOCAL' : WeightedIndex, # could be WeightedList
'TIRANK-GLOBAL' : WeightedIndex # could be WeightedList
}
NODETYPES = [
......@@ -40,8 +42,11 @@ NODETYPES = [
'TFIDF-CORPUS', # 13
'TFIDF-GLOBAL', # 14
# docs subset
'FAVORITES' # 15
# TODO add ti RANK
'FAVORITES', # 15
# more scores (sorry!)
'TIRANK-LOCAL', # 16
'TIRANK-GLOBAL', # 17
]
INDEXED_HYPERDATA = {
......
......@@ -112,11 +112,11 @@ def parse_extract_indexhyperdata(corpus):
print('CORPUS #%d: [%s] new grouplist node #%i' % (corpus.id, t(), group_id))
# ------------
# -> write occurrences to Node and NodeNodeNgram # (todo: NodeNgram)
# -> write occurrences to Node and NodeNodeNgram
occ_id = compute_occs(corpus, groupings_id = group_id)
print('CORPUS #%d: [%s] new occs node #%i' % (corpus.id, t(), occ_id))
# -> write cumulated ti_ranking (tfidf ranking vector) to Node and NodeNodeNgram (todo: NodeNgram)
# -> write cumulated ti_ranking (tfidf ranking vector) to Node and NodeNodeNgram
tirank_id = compute_ti_ranking(corpus,
groupings_id = group_id,
count_scope="global")
......@@ -172,5 +172,6 @@ def parse_extract_indexhyperdata(corpus):
session.commit()
def t():
return datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
......@@ -345,11 +345,11 @@ def compute_ti_ranking(corpus,
# create the new TFIDF-XXXX node to get an id
tir_nd = corpus.add_child()
if count_scope == "local":
tir_nd.typename = "TFIDF-CORPUS"
tir_nd.typename = "TIRANK-CORPUS"
tir_nd.name = "ti rank (%i ngforms in corpus:%s)" % (
total_ngramforms, corpus_id)
elif count_scope == "global":
tir_nd.typename = "TFIDF-GLOBAL"
tir_nd.typename = "TIRANK-GLOBAL"
tir_nd.name = "ti rank (%i ngforms %s in corpora of sourcetype:%s)" % (
total_ngramforms,
("from corpus %i" % corpus_id) if (termset_scope == "local") else "" ,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment