Commit 5fc7c2d7 authored by Mathieu Rodic's avatar Mathieu Rodic

[CODE] The API is now wrapped in a controller.

NgramsCache: ngrams are now stored in lowercase
parent 281e712a
......@@ -27,135 +27,135 @@ _ngrams_order_columns = {
}
def corpus_ngrams(request, corpus_id):
# parameters retrieval and control
corpusQuery = Node.objects.filter(id = corpus_id)
if not corpusQuery:
raise Http404("No such corpus.")
corpus = corpusQuery.first()
if corpus.type.name != 'Corpus':
raise Http404("No such corpus.")
order = request.GET.get('order', 'frequency')
if order not in _ngrams_order_columns:
raise ValidationError('The order parameter should take one of the following values: ' + ', '.join(_ngrams_order_columns), 400)
order_column = _ngrams_order_columns[order]
# query building
ngramsQuery = Ngram.objects.filter(
nodes__parent = corpus,
terms__startswith = request.GET.get('startswith', '')
).annotate(count=Count('id'))
# how should we order this?
orderColumn = {
"frequency" : "-count",
"alphabetical" : "terms"
}.get(request.GET.get('order', 'frequency'), '-count')
ngramsQuery = ngramsQuery.order_by(orderColumn)
# response building
return JsonHttpResponse({
"list" : [ngram.terms for ngram in ngramsQuery],
})
class CorpusController:
def corpus_metadata(request, corpus_id):
# parameters retrieval and control
corpusQuery = Node.objects.filter(id = corpus_id)
if not corpusQuery:
raise Http404("No such corpus.")
corpus = corpusQuery.first()
if corpus.type.name != 'Corpus':
raise Http404("No such corpus.")
# query building
cursor = connection.cursor()
cursor.execute(
''' SELECT
key,
COUNT(*) AS count
FROM (
SELECT skeys(metadata) AS key
FROM %s
) AS keys
GROUP BY
key
ORDER BY
count DESC
''' % (Node._meta.db_table, ))
# response building
return JsonHttpResponse({
"list" : [row[0] for row in cursor.fetchall()],
})
@staticmethod
def ngrams(request, corpus_id):
# parameters retrieval and control
corpusQuery = Node.objects.filter(id = corpus_id)
if not corpusQuery:
raise Http404("No such corpus.")
corpus = corpusQuery.first()
if corpus.type.name != 'Corpus':
raise Http404("No such corpus.")
order = request.GET.get('order', 'frequency')
if order not in _ngrams_order_columns:
raise ValidationError('The order parameter should take one of the following values: ' + ', '.join(_ngrams_order_columns), 400)
order_column = _ngrams_order_columns[order]
# query building
ngramsQuery = Ngram.objects.filter(
nodes__parent = corpus,
terms__startswith = request.GET.get('startswith', '')
).annotate(count=Count('id'))
# how should we order this?
orderColumn = {
"frequency" : "-count",
"alphabetical" : "terms"
}.get(request.GET.get('order', 'frequency'), '-count')
ngramsQuery = ngramsQuery.order_by(orderColumn)
# response building
return JsonHttpResponse({
"list" : [ngram.terms for ngram in ngramsQuery],
})
# ?mesured=documents.count
# &parameters[]=metadata.publication_year
# &filter[]=ngrams.in.test,ht,grf
def corpus_data(request, corpus_id):
# parameters retrieval and control
corpusQuery = Node.objects.filter(id = corpus_id)
if not corpusQuery:
raise Http404("No such corpus.")
corpus = corpusQuery.first()
if corpus.type.name != 'Corpus':
raise Http404("No such corpus.")
# query building: initialization
columns = []
conditions = []
group = []
order = []
join_ngrams = False
# query building: parameters
for parameter in request.GET.getlist('parameters[]'):
@staticmethod
def metadata(request, corpus_id):
# parameters retrieval and control
corpusQuery = Node.objects.filter(id = corpus_id)
if not corpusQuery:
raise Http404("No such corpus.")
corpus = corpusQuery.first()
if corpus.type.name != 'Corpus':
raise Http404("No such corpus.")
# query building
cursor = connection.cursor()
cursor.execute(
''' SELECT
key,
COUNT(*) AS count
FROM (
SELECT skeys(metadata) AS key
FROM %s
) AS keys
GROUP BY
key
ORDER BY
count DESC
''' % (Node._meta.db_table, ))
# response building
return JsonHttpResponse({
"list" : [row[0] for row in cursor.fetchall()],
})
@staticmethod
def data(request, corpus_id):
# parameters retrieval and control
corpusQuery = Node.objects.filter(id = corpus_id)
if not corpusQuery:
raise Http404("No such corpus.")
corpus = corpusQuery.first()
if corpus.type.name != 'Corpus':
raise Http404("No such corpus.")
# query building: initialization
columns = []
conditions = []
group = []
order = []
join_ngrams = False
# query building: parameters
for parameter in request.GET.getlist('parameters[]'):
c = len(columns)
parameter_array = parameter.split('.')
if len(parameter_array) != 2:
raise ValidationError('Unrecognized "parameter[]=%s"' % (parameter, ))
origin = parameter_array[0]
key = parameter_array[1]
if origin == "metadata":
key = key.replace('\'', '\\\'')
columns.append("node.metadata->'%s' AS c%d" % (key, c, ))
conditions.append("node.metadata ? '%s'" % (key, ))
group.append("c%d" % (c, ))
order.append("c%d" % (c, ))
else:
raise ValidationError('Unrecognized type "%s" in "parameter[]=%s"' % (origin, parameter, ))
# query building: mesured value
mesured = request.GET.get('mesured', '')
c = len(columns)
parameter_array = parameter.split('.')
if len(parameter_array) != 2:
raise ValidationError('Unrecognized "parameter[]=%s"' % (parameter, ))
origin = parameter_array[0]
key = parameter_array[1]
if origin == "metadata":
key = key.replace('\'', '\\\'')
columns.append("node.metadata->'%s' AS c%d" % (key, c, ))
conditions.append("node.metadata ? '%s'" % (key, ))
group.append("c%d" % (c, ))
order.append("c%d" % (c, ))
else:
raise ValidationError('Unrecognized type "%s" in "parameter[]=%s"' % (origin, parameter, ))
# query building: mesured value
mesured = request.GET.get('mesured', '')
c = len(columns)
if mesured == "documents.count":
columns.append("COUNT(node.id) AS c%d " % (c, ))
elif mesured == "ngrams.count":
columns.append("COUNT(ngram.id) AS c%d " % (c, ))
join_ngrams = True
else:
raise ValidationError('The "mesured" parameter should take one of the following values: "documents.count", "ngrams.count"')
# query building: filters
for filter in request.GET.getlist('filters[]', ''):
if '|' in filter:
filter_array = filter.split("|")
key = filter_array[0]
values = filter_array[1].replace("'", "\\'").split(",")
if key == 'ngram.terms':
conditions.append("ngram.terms IN ('%s')" % ("', '".join(values), ))
join_ngrams = True
if mesured == "documents.count":
columns.append("COUNT(node.id) AS c%d " % (c, ))
elif mesured == "ngrams.count":
columns.append("COUNT(ngram.id) AS c%d " % (c, ))
join_ngrams = True
else:
raise ValidationError('Unrecognized "filter[]=%s"' % (filter, ))
# query building: assembling
sql = "SELECT %s FROM %s AS node" % (', '.join(columns), Node._meta.db_table, )
if join_ngrams:
sql += " INNER JOIN %s AS node_ngram ON node_ngram.node_id = node.id" % (Node_Ngram._meta.db_table, )
sql += " INNER JOIN %s AS ngram ON ngram.id = node_ngram.ngram_id" % (Ngram._meta.db_table, )
if conditions:
sql += " WHERE %s" % (" AND ".join(conditions), )
if group:
sql += " GROUP BY %s" % (", ".join(group), )
if order:
sql += " ORDER BY %s" % (", ".join(order), )
# query execution
# return HttpResponse(sql)
cursor = connection.cursor()
cursor.execute(sql)
# response building
return JsonHttpResponse({
"list": [row for row in cursor.fetchall()],
})
raise ValidationError('The "mesured" parameter should take one of the following values: "documents.count", "ngrams.count"')
# query building: filters
for filter in request.GET.getlist('filters[]', ''):
if '|' in filter:
filter_array = filter.split("|")
key = filter_array[0]
values = filter_array[1].replace("'", "\\'").split(",")
if key == 'ngram.terms':
conditions.append("ngram.terms IN ('%s')" % ("', '".join(values), ))
join_ngrams = True
else:
raise ValidationError('Unrecognized "filter[]=%s"' % (filter, ))
# query building: assembling
sql = "SELECT %s FROM %s AS node" % (', '.join(columns), Node._meta.db_table, )
if join_ngrams:
sql += " INNER JOIN %s AS node_ngram ON node_ngram.node_id = node.id" % (Node_Ngram._meta.db_table, )
sql += " INNER JOIN %s AS ngram ON ngram.id = node_ngram.ngram_id" % (Ngram._meta.db_table, )
if conditions:
sql += " WHERE %s" % (" AND ".join(conditions), )
if group:
sql += " GROUP BY %s" % (", ".join(group), )
if order:
sql += " ORDER BY %s" % (", ".join(order), )
# query execution
# return HttpResponse(sql)
cursor = connection.cursor()
cursor.execute(sql)
# response building
return JsonHttpResponse({
"list": [row for row in cursor.fetchall()],
})
......@@ -40,9 +40,9 @@ urlpatterns = patterns('',
url(r'^chart/corpus/(\d+)/data.csv$', send_csv),
url(r'^graph.json$', send_graph),
url(r'^api/corpus/(\d+)/ngrams$', gargantext_web.api.corpus_ngrams),
url(r'^api/corpus/(\d+)/metadata$', gargantext_web.api.corpus_metadata),
url(r'^api/corpus/(\d+)/data$', gargantext_web.api.corpus_data),
url(r'^api/corpus/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),
url(r'^api/corpus/(\d+)/metadata$', gargantext_web.api.CorpusController.metadata),
url(r'^api/corpus/(\d+)/data$', gargantext_web.api.CorpusController.data),
)
from django.conf import settings
......
......@@ -16,6 +16,7 @@ class NgramsCache(defaultdict):
def __missing__(self, terms):
"""If the terms are not yet present in the dictionary,
retrieve it from the database or insert it."""
terms = terms.strip().lower()
try:
ngram = node.models.Ngram.get(terms=terms, language=self.language)
except:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment