Commit aaaa31b1 authored by Mathieu Rodic's avatar Mathieu Rodic

[BUGFIX] Improved all CorpusController methods: now only children are used

parent 3765084c
...@@ -11,6 +11,26 @@ from django.db import connection ...@@ -11,6 +11,26 @@ from django.db import connection
# from node.models import Node, NodeType, Node_Resource, Project, Corpus # from node.models import Node, NodeType, Node_Resource, Project, Corpus
# from node.admin import CorpusForm, ProjectForm, ResourceForm # from node.admin import CorpusForm, ProjectForm, ResourceForm
_sql_cte = '''
WITH RECURSIVE cte ("depth", "path", "ordering", "id") AS (
SELECT 1 AS depth,
array[T."id"] AS path,
array[T."id"] AS ordering,
T."id"
FROM %s T
WHERE T."parent_id" IS NULL
UNION ALL
SELECT cte.depth + 1 AS depth,
cte.path || T."id",
cte.ordering || array[T."id"],
T."id"
FROM %s T
JOIN cte ON T."parent_id" = cte."id"
)
''' % (Node._meta.db_table, Node._meta.db_table, )
def DebugHttpResponse(data): def DebugHttpResponse(data):
return HttpResponse('<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), )) return HttpResponse('<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), ))
...@@ -58,19 +78,28 @@ class CorpusController: ...@@ -58,19 +78,28 @@ class CorpusController:
raise ValidationError('The order parameter should take one of the following values: ' + ', '.join(_ngrams_order_columns), 400) raise ValidationError('The order parameter should take one of the following values: ' + ', '.join(_ngrams_order_columns), 400)
order_column = _ngrams_order_columns[order] order_column = _ngrams_order_columns[order]
# query building # query building
ngramsQuery = Ngram.objects.filter( cursor = connection.cursor()
nodes__parent = corpus, cursor.execute(_sql_cte + '''
terms__startswith = request.GET.get('startswith', '') SELECT ngram.terms
).annotate(count=Count('id')) FROM cte
# how should we order this? INNER JOIN %s AS node ON node.id = cte.id
orderColumn = { INNER JOIN %s AS nodetype ON nodetype.id = node.type_id
"frequency" : "-count", INNER JOIN %s AS node_ngram ON node_ngram.node_id = node.id
"alphabetical" : "terms" INNER JOIN %s AS ngram ON ngram.id = node_ngram.ngram_id
}.get(request.GET.get('order', 'frequency'), '-count') WHERE (NOT cte.id = \'%d\') AND (\'%d\' = ANY(cte."path"))
ngramsQuery = ngramsQuery.order_by(orderColumn) AND nodetype.name = 'Document'
AND ngram.terms LIKE '%s%%'
GROUP BY ngram.terms
ORDER BY SUM(node_ngram.weight) DESC
''' % (Node._meta.db_table, NodeType._meta.db_table, Node_Ngram._meta.db_table, Ngram._meta.db_table, corpus.id, corpus.id, request.GET.get('startwith', '').replace("'", "\\'"), ))
# # how should we order this?
# orderColumn = {
# "frequency" : "-count",
# "alphabetical" : "terms"
# }.get(request.GET.get('order', 'frequency'), '-count')
# response building # response building
return JsonHttpResponse({ return JsonHttpResponse({
"list" : [ngram.terms for ngram in ngramsQuery], "list" : [row[0] for row in cursor.fetchall()],
}) })
@classmethod @classmethod
...@@ -79,19 +108,17 @@ class CorpusController: ...@@ -79,19 +108,17 @@ class CorpusController:
corpus = cls.get(corpus_id) corpus = cls.get(corpus_id)
# query building # query building
cursor = connection.cursor() cursor = connection.cursor()
cursor.execute( cursor.execute(_sql_cte + '''
''' SELECT SELECT key
key,
COUNT(*) AS count
FROM ( FROM (
SELECT skeys(metadata) AS key SELECT skeys(metadata) AS key
FROM %s FROM cte
INNER JOIN %s AS node ON node.id = cte.id
WHERE (NOT cte.id = \'%d\') AND (\'%d\' = ANY(cte."path"))
) AS keys ) AS keys
GROUP BY GROUP BY key
key ORDER BY COUNT(*) DESC
ORDER BY ''' % (Node._meta.db_table, corpus.id, corpus.id, ))
count DESC
''' % (Node._meta.db_table, ))
# response building # response building
return JsonHttpResponse({ return JsonHttpResponse({
"list" : [row[0] for row in cursor.fetchall()], "list" : [row[0] for row in cursor.fetchall()],
...@@ -145,30 +172,7 @@ class CorpusController: ...@@ -145,30 +172,7 @@ class CorpusController:
else: else:
raise ValidationError('Unrecognized "filter[]=%s"' % (filter, )) raise ValidationError('Unrecognized "filter[]=%s"' % (filter, ))
# query building: initializing SQL # query building: initializing SQL
sql = str(corpus.descendants().query) sql_0 = _sql_cte
sql_array_select = sql.split('SELECT')
sql_array_from = sql_array_select[-1].split('FROM')
sql_array_where = sql_array_from[-1].split('WHERE')
sql_array_order = sql_array_where[-1].split('ORDER')
sql_0 = ''' WITH RECURSIVE cte (
"depth", "path", "ordering", "id") AS (
SELECT 1 AS depth,
array[T."id"] AS path,
array[T."id"] AS ordering,
T."id"
FROM node_node T
WHERE T."parent_id" IS NULL
UNION ALL
SELECT cte.depth + 1 AS depth,
cte.path || T."id",
cte.ordering || array[T."id"],
T."id"
FROM node_node T
JOIN cte ON T."parent_id" = cte."id")
'''
sql_1 = '\nSELECT ' sql_1 = '\nSELECT '
sql_2 = '\nFROM %s\nINNER JOIN cte ON cte."id" = %s.id' % (Node._meta.db_table, Node._meta.db_table, ) sql_2 = '\nFROM %s\nINNER JOIN cte ON cte."id" = %s.id' % (Node._meta.db_table, Node._meta.db_table, )
sql_3 = '\nWHERE ((NOT cte.id = \'%d\') AND (\'%d\' = ANY(cte."path")))' % (corpus.id, corpus.id, ) sql_3 = '\nWHERE ((NOT cte.id = \'%d\') AND (\'%d\' = ANY(cte."path")))' % (corpus.id, corpus.id, )
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment