Commit 5caa3954 authored by Administrator's avatar Administrator

[FEAT] Graph with cleaner parameters.

parent 02bbc918
...@@ -21,7 +21,7 @@ def create_synonymes(user, corpus): ...@@ -21,7 +21,7 @@ def create_synonymes(user, corpus):
size = 1000 size = 1000
def create_whitelist(user, corpus_id, size=size): def create_whitelist(user, corpus_id, size=size, count_min=2):
cursor = connection.cursor() cursor = connection.cursor()
whitelist_type_id = cache.NodeType['WhiteList'].id whitelist_type_id = cache.NodeType['WhiteList'].id
...@@ -66,13 +66,13 @@ def create_whitelist(user, corpus_id, size=size): ...@@ -66,13 +66,13 @@ def create_whitelist(user, corpus_id, size=size):
GROUP BY GROUP BY
ngX.id ngX.id
Having Having
COUNT(*) >= 3 COUNT(*) >= %d
ORDER BY ORDER BY
occurrences DESC occurrences DESC
LIMIT LIMIT
%d %d
; ;
""" % (white_list.id, int(corpus_id), int(type_document_id), size) """ % (white_list.id, int(corpus_id), int(type_document_id), count_min, size)
# print("PRINTING QYERY OF WHITELIST:") # print("PRINTING QYERY OF WHITELIST:")
# print(query_whitelist) # print(query_whitelist)
cursor.execute(query_whitelist) cursor.execute(query_whitelist)
...@@ -205,23 +205,30 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', size= ...@@ -205,23 +205,30 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', size=
ys = x.sum(axis=0) - x ys = x.sum(axis=0) - x
# top inclus ou exclus # top inclus ou exclus
#n = ( xs + ys) / (2 * (x.shape[0] -1)) n = ( xs + ys) / (2 * (x.shape[0] -1))
# top generic or specific # top generic or specific
m = ( xs - ys) / (2 * (x.shape[0] -1)) m = ( xs - ys) / (2 * (x.shape[0] -1))
#m = pd.DataFrame.abs(m)
#n = n.sort(inplace=False) n = n.sort(inplace=False)
m = m.sort(inplace=False) m = m.sort(inplace=False)
matrix_size = int(round(size/5,0)) print(n)
# TODO user the generic score for the node size print(m)
#n_index = pd.Index.intersection(x.index, n.index[-matrix_size:])
nodes_included = int(round(size/20,0))
#nodes_excluded = int(round(size/10,0))
nodes_specific = int(round(size/2,0))
#nodes_generic = int(round(size/10,0))
# TODO user the included score for the node size
n_index = pd.Index.intersection(x.index, n.index[:nodes_included])
# Generic: # Generic:
#m_index = pd.Index.intersection(x.index, m.index[:matrix_size]) #m_index = pd.Index.intersection(x.index, m.index[:nodes_generic])
# Specific: # Specific:
m_index = pd.Index.intersection(x.index, m.index[-matrix_size:]) m_index = pd.Index.intersection(x.index, m.index[-nodes_specific:])
x_index = m_index# pd.Index.union(n_index, m_index) x_index = pd.Index.union(n_index, m_index)
xx = x[list(x_index)].T[list(x_index)] xx = x[list(x_index)].T[list(x_index)]
# import pprint # import pprint
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment