Commit db229347 authored by delanoe's avatar delanoe

[CLEAN] removing old raw SQL queries.

parent 72238540
...@@ -29,145 +29,9 @@ from ngram.lists import listIds ...@@ -29,145 +29,9 @@ from ngram.lists import listIds
def diag_null(x): def diag_null(x):
return x - x * scipy.eye(x.shape[0]) return x - x * scipy.eye(x.shape[0])
def create_blacklist(user, corpus):
pass
def create_synonymes(user, corpus):
pass
size = 1000 size = 1000
def create_whitelist(user, corpus_id, size=size, count_min=2, miam_id=None):
if miam_id is None:
PrintException()
cursor = connection.cursor()
whitelist_type_id = cache.NodeType['WhiteList'].id
blacklist_type_id = cache.NodeType['BlackList'].id
type_document_id = cache.NodeType['Document'].id
white_list = Node(name='WhiteList Corpus ' + str(corpus_id), user_id=user.id, parent_id=corpus_id, type_id=whitelist_type_id)
black_list = Node(name='BlackList Corpus ' + str(corpus_id), user_id=user.id, parent_id=corpus_id, type_id=blacklist_type_id)
session.add(white_list)
session.add(black_list)
session.commit()
# delete avant pour éviter les doublons
# try:
# Node_Ngram.objects.filter(node=white_list).all().delete()
# except:
# print('First time we compute cooc')
#
query_whitelist = """
INSERT INTO node_node_ngram (node_id, ngram_id, weight)
SELECT
%d,
ngX.id,
COUNT(*) AS occurrences
FROM
node_node AS n
INNER JOIN
node_node_ngram AS nngX ON nngX.node_id = n.id
INNER JOIN
node_ngram AS ngX ON ngX.id = nngX.ngram_id
INNER JOIN
node_node_ngram AS miam ON ngX.id = miam.ngram_id
WHERE
n.parent_id = %d
AND
n.type_id = %d
AND
miam.node_id = %d
AND
ngX.n >= 2
AND
ngX.n <= 3
GROUP BY
ngX.id
Having
COUNT(*) >= %d
ORDER BY
occurrences DESC
LIMIT
%d
;
""" % (white_list.id, int(corpus_id), int(type_document_id), int(miam_id), count_min, size)
# print("PRINTING QYERY OF WHITELIST:")
# print(query_whitelist)
cursor.execute(query_whitelist)
return white_list
#def create_cooc(user, corpus, whitelist, blacklist, synonymes):
def create_cooc(user=None, corpus_id=None, whitelist=None, size=size, year_start=None, year_end=None):
cursor = connection.cursor()
cooc_type_id = cache.NodeType['Cooccurrence'].id
# pour les tests on supprime les cooc
#session.Node.objects.filter(type=cooc_type, parent=corpus).delete()
cooc = Node(user_id=user.id,\
parent_id=corpus_id,\
type_id=cooc_type_id,\
name="Cooccurrences corpus " + str(corpus_id))
session.add(cooc)
session.commit()
query_cooc = """
INSERT INTO node_nodengramngram (node_id, "ngramx_id", "ngramy_id", score)
SELECT
%d as node_id,
ngX.id,
ngY.id,
COUNT(*) AS score
FROM
node_node AS n -- the nodes who are direct children of the corpus
INNER JOIN
node_node_ngram AS nngX ON nngX.node_id = n.id -- list of ngrams contained in the node
INNER JOIN
node_node_ngram AS whitelistX ON whitelistX.ngram_id = nngX.ngram_id -- list of ngrams contained in the whitelist and in the node
INNER JOIN
node_ngram AS ngX ON ngX.id = whitelistX.ngram_id -- ngrams which are in both
INNER JOIN
node_node_ngram AS nngY ON nngY.node_id = n.id
INNER JOIN
node_node_ngram AS whitelistY ON whitelistY.ngram_id = nngY.ngram_id
INNER JOIN
node_ngram AS ngY ON ngY.id = whitelistY.ngram_id
WHERE
n.parent_id = %s
AND
whitelistX.node_id = %s
AND
whitelistY.node_id = %s
AND
nngX.ngram_id < nngY.ngram_id -- so we only get distinct pairs of ngrams
GROUP BY
ngX.id,
ngX.terms,
ngY.id,
ngY.terms
ORDER BY
score DESC
LIMIT
%d
""" % (cooc.id, corpus_id, whitelist.id, whitelist.id, size)
# print(query_cooc)
cursor.execute(query_cooc)
return cooc.id
def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=size): def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=size):
''' '''
...@@ -255,7 +119,6 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz ...@@ -255,7 +119,6 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
matrix_filtered = np.where(xxx >= threshold, xxx, 0) matrix_filtered = np.where(xxx >= threshold, xxx, 0)
#matrix_filtered = matrix_filtered.resize((90,90)) #matrix_filtered = matrix_filtered.resize((90,90))
try:
G = nx.from_numpy_matrix(np.matrix(matrix_filtered)) G = nx.from_numpy_matrix(np.matrix(matrix_filtered))
#G = nx.from_numpy_matrix(matrix_filtered, create_using=nx.MultiDiGraph()) #G = nx.from_numpy_matrix(matrix_filtered, create_using=nx.MultiDiGraph())
...@@ -270,9 +133,6 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz ...@@ -270,9 +133,6 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
partition = best_partition(uG) partition = best_partition(uG)
print(partition) print(partition)
print("Density of the graph:", nx.density(G)) print("Density of the graph:", nx.density(G))
except:
print("-" * 30)
PrintException()
if type == "node_link": if type == "node_link":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment