Commit a0890660 authored by delanoe's avatar delanoe

[FIX] Group factorized, needs feedbacks.

parent ec79d809
...@@ -94,42 +94,36 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem'): ...@@ -94,42 +94,36 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem'):
.all() .all()
) )
ngrams = [n for n in ngrams if n not in stops] ngrams = [n + (str(stemIt(n[1])),) for n in ngrams if n not in stops]
#print(ngrams) #print(ngrams)
#group = defaultdict(lambda : defaultdict()) group = dict()
ids_dict = dict() group_to_insert = list()
mainform_dict = dict() miam_to_insert = set()
count_dict = dict()
for n in ngrams: for n in ngrams:
stem = str(stemIt(n[1])) # n has form (id, terms, freq, stem)
# first if stem is not None:
if stem is not None : if n[3] is not None:
ids_dict[stem] = ids_dict.get(stem, []) + [n[0]] # Adding the id to the forms
group[n[3]] = group.get(n[3], dict())
count = count_dict.get(stem, 0) group[n[3]]['forms'] = group[n[3]].get('forms', set())
group[n[3]]['forms'].add(n[0])
if n[2] > count:
mainform_dict[stem] = n[0] # Take current max
count_dict[stem] = n[2] group[n[3]]['freq'] = group[n[3]].get('freq', 0)
# Update max and mainForm if present ngram (n) has more freq
for key in mainform_dict.keys(): if n[2] > group[n[3]]['freq'] :
miam_to_insert.add((miam_node.id, mainform_dict[key], 1)) group[n[3]]['freq'] = n[2]
group[n[3]]['mainForm'] = n[0]
try:
ids = ids_dict[key] # Adding all ngrams in the list
for key in group.keys():
if ids is not None and len(ids) > 1: for form in list(group[key]['forms']):
for ngram_id in ids : group_to_insert.append((node_group.id, group[key]['mainForm'], form, 1))
if ngram_id != mainform_dict[key]: miam_to_insert.add((miam_node.id, group[key]['mainForm'], 1))
group_to_insert.add((node_group.id, mainform_dict[key], ngram_id, 1))
except exception as e:
print(e)
print(group[stem])
#print(ids_dict[stem])
# # Deleting previous groups # # Deleting previous groups
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id == node_group.id).delete() session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id == node_group.id).delete()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment