Commit 5fc7c2d7 authored by Mathieu Rodic's avatar Mathieu Rodic

[CODE] The API is now wrapped in a controller.

NgramsCache: ngrams are now stored in lowercase
parent 281e712a
...@@ -27,135 +27,135 @@ _ngrams_order_columns = { ...@@ -27,135 +27,135 @@ _ngrams_order_columns = {
} }
def corpus_ngrams(request, corpus_id): class CorpusController:
# parameters retrieval and control
corpusQuery = Node.objects.filter(id = corpus_id)
if not corpusQuery:
raise Http404("No such corpus.")
corpus = corpusQuery.first()
if corpus.type.name != 'Corpus':
raise Http404("No such corpus.")
order = request.GET.get('order', 'frequency')
if order not in _ngrams_order_columns:
raise ValidationError('The order parameter should take one of the following values: ' + ', '.join(_ngrams_order_columns), 400)
order_column = _ngrams_order_columns[order]
# query building
ngramsQuery = Ngram.objects.filter(
nodes__parent = corpus,
terms__startswith = request.GET.get('startswith', '')
).annotate(count=Count('id'))
# how should we order this?
orderColumn = {
"frequency" : "-count",
"alphabetical" : "terms"
}.get(request.GET.get('order', 'frequency'), '-count')
ngramsQuery = ngramsQuery.order_by(orderColumn)
# response building
return JsonHttpResponse({
"list" : [ngram.terms for ngram in ngramsQuery],
})
def corpus_metadata(request, corpus_id): @staticmethod
# parameters retrieval and control def ngrams(request, corpus_id):
corpusQuery = Node.objects.filter(id = corpus_id) # parameters retrieval and control
if not corpusQuery: corpusQuery = Node.objects.filter(id = corpus_id)
raise Http404("No such corpus.") if not corpusQuery:
corpus = corpusQuery.first() raise Http404("No such corpus.")
if corpus.type.name != 'Corpus': corpus = corpusQuery.first()
raise Http404("No such corpus.") if corpus.type.name != 'Corpus':
# query building raise Http404("No such corpus.")
cursor = connection.cursor() order = request.GET.get('order', 'frequency')
cursor.execute( if order not in _ngrams_order_columns:
''' SELECT raise ValidationError('The order parameter should take one of the following values: ' + ', '.join(_ngrams_order_columns), 400)
key, order_column = _ngrams_order_columns[order]
COUNT(*) AS count # query building
FROM ( ngramsQuery = Ngram.objects.filter(
SELECT skeys(metadata) AS key nodes__parent = corpus,
FROM %s terms__startswith = request.GET.get('startswith', '')
) AS keys ).annotate(count=Count('id'))
GROUP BY # how should we order this?
key orderColumn = {
ORDER BY "frequency" : "-count",
count DESC "alphabetical" : "terms"
''' % (Node._meta.db_table, )) }.get(request.GET.get('order', 'frequency'), '-count')
# response building ngramsQuery = ngramsQuery.order_by(orderColumn)
return JsonHttpResponse({ # response building
"list" : [row[0] for row in cursor.fetchall()], return JsonHttpResponse({
}) "list" : [ngram.terms for ngram in ngramsQuery],
})
# ?mesured=documents.count @staticmethod
# &parameters[]=metadata.publication_year def metadata(request, corpus_id):
# &filter[]=ngrams.in.test,ht,grf # parameters retrieval and control
corpusQuery = Node.objects.filter(id = corpus_id)
def corpus_data(request, corpus_id): if not corpusQuery:
# parameters retrieval and control raise Http404("No such corpus.")
corpusQuery = Node.objects.filter(id = corpus_id) corpus = corpusQuery.first()
if not corpusQuery: if corpus.type.name != 'Corpus':
raise Http404("No such corpus.") raise Http404("No such corpus.")
corpus = corpusQuery.first() # query building
if corpus.type.name != 'Corpus': cursor = connection.cursor()
raise Http404("No such corpus.") cursor.execute(
# query building: initialization ''' SELECT
columns = [] key,
conditions = [] COUNT(*) AS count
group = [] FROM (
order = [] SELECT skeys(metadata) AS key
join_ngrams = False FROM %s
# query building: parameters ) AS keys
for parameter in request.GET.getlist('parameters[]'): GROUP BY
key
ORDER BY
count DESC
''' % (Node._meta.db_table, ))
# response building
return JsonHttpResponse({
"list" : [row[0] for row in cursor.fetchall()],
})
@staticmethod
def data(request, corpus_id):
# parameters retrieval and control
corpusQuery = Node.objects.filter(id = corpus_id)
if not corpusQuery:
raise Http404("No such corpus.")
corpus = corpusQuery.first()
if corpus.type.name != 'Corpus':
raise Http404("No such corpus.")
# query building: initialization
columns = []
conditions = []
group = []
order = []
join_ngrams = False
# query building: parameters
for parameter in request.GET.getlist('parameters[]'):
c = len(columns)
parameter_array = parameter.split('.')
if len(parameter_array) != 2:
raise ValidationError('Unrecognized "parameter[]=%s"' % (parameter, ))
origin = parameter_array[0]
key = parameter_array[1]
if origin == "metadata":
key = key.replace('\'', '\\\'')
columns.append("node.metadata->'%s' AS c%d" % (key, c, ))
conditions.append("node.metadata ? '%s'" % (key, ))
group.append("c%d" % (c, ))
order.append("c%d" % (c, ))
else:
raise ValidationError('Unrecognized type "%s" in "parameter[]=%s"' % (origin, parameter, ))
# query building: mesured value
mesured = request.GET.get('mesured', '')
c = len(columns) c = len(columns)
parameter_array = parameter.split('.') if mesured == "documents.count":
if len(parameter_array) != 2: columns.append("COUNT(node.id) AS c%d " % (c, ))
raise ValidationError('Unrecognized "parameter[]=%s"' % (parameter, )) elif mesured == "ngrams.count":
origin = parameter_array[0] columns.append("COUNT(ngram.id) AS c%d " % (c, ))
key = parameter_array[1] join_ngrams = True
if origin == "metadata":
key = key.replace('\'', '\\\'')
columns.append("node.metadata->'%s' AS c%d" % (key, c, ))
conditions.append("node.metadata ? '%s'" % (key, ))
group.append("c%d" % (c, ))
order.append("c%d" % (c, ))
else:
raise ValidationError('Unrecognized type "%s" in "parameter[]=%s"' % (origin, parameter, ))
# query building: mesured value
mesured = request.GET.get('mesured', '')
c = len(columns)
if mesured == "documents.count":
columns.append("COUNT(node.id) AS c%d " % (c, ))
elif mesured == "ngrams.count":
columns.append("COUNT(ngram.id) AS c%d " % (c, ))
join_ngrams = True
else:
raise ValidationError('The "mesured" parameter should take one of the following values: "documents.count", "ngrams.count"')
# query building: filters
for filter in request.GET.getlist('filters[]', ''):
if '|' in filter:
filter_array = filter.split("|")
key = filter_array[0]
values = filter_array[1].replace("'", "\\'").split(",")
if key == 'ngram.terms':
conditions.append("ngram.terms IN ('%s')" % ("', '".join(values), ))
join_ngrams = True
else: else:
raise ValidationError('Unrecognized "filter[]=%s"' % (filter, )) raise ValidationError('The "mesured" parameter should take one of the following values: "documents.count", "ngrams.count"')
# query building: assembling # query building: filters
sql = "SELECT %s FROM %s AS node" % (', '.join(columns), Node._meta.db_table, ) for filter in request.GET.getlist('filters[]', ''):
if join_ngrams: if '|' in filter:
sql += " INNER JOIN %s AS node_ngram ON node_ngram.node_id = node.id" % (Node_Ngram._meta.db_table, ) filter_array = filter.split("|")
sql += " INNER JOIN %s AS ngram ON ngram.id = node_ngram.ngram_id" % (Ngram._meta.db_table, ) key = filter_array[0]
if conditions: values = filter_array[1].replace("'", "\\'").split(",")
sql += " WHERE %s" % (" AND ".join(conditions), ) if key == 'ngram.terms':
if group: conditions.append("ngram.terms IN ('%s')" % ("', '".join(values), ))
sql += " GROUP BY %s" % (", ".join(group), ) join_ngrams = True
if order: else:
sql += " ORDER BY %s" % (", ".join(order), ) raise ValidationError('Unrecognized "filter[]=%s"' % (filter, ))
# query execution # query building: assembling
# return HttpResponse(sql) sql = "SELECT %s FROM %s AS node" % (', '.join(columns), Node._meta.db_table, )
cursor = connection.cursor() if join_ngrams:
cursor.execute(sql) sql += " INNER JOIN %s AS node_ngram ON node_ngram.node_id = node.id" % (Node_Ngram._meta.db_table, )
# response building sql += " INNER JOIN %s AS ngram ON ngram.id = node_ngram.ngram_id" % (Ngram._meta.db_table, )
return JsonHttpResponse({ if conditions:
"list": [row for row in cursor.fetchall()], sql += " WHERE %s" % (" AND ".join(conditions), )
}) if group:
sql += " GROUP BY %s" % (", ".join(group), )
if order:
sql += " ORDER BY %s" % (", ".join(order), )
# query execution
# return HttpResponse(sql)
cursor = connection.cursor()
cursor.execute(sql)
# response building
return JsonHttpResponse({
"list": [row for row in cursor.fetchall()],
})
...@@ -40,9 +40,9 @@ urlpatterns = patterns('', ...@@ -40,9 +40,9 @@ urlpatterns = patterns('',
url(r'^chart/corpus/(\d+)/data.csv$', send_csv), url(r'^chart/corpus/(\d+)/data.csv$', send_csv),
url(r'^graph.json$', send_graph), url(r'^graph.json$', send_graph),
url(r'^api/corpus/(\d+)/ngrams$', gargantext_web.api.corpus_ngrams), url(r'^api/corpus/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),
url(r'^api/corpus/(\d+)/metadata$', gargantext_web.api.corpus_metadata), url(r'^api/corpus/(\d+)/metadata$', gargantext_web.api.CorpusController.metadata),
url(r'^api/corpus/(\d+)/data$', gargantext_web.api.corpus_data), url(r'^api/corpus/(\d+)/data$', gargantext_web.api.CorpusController.data),
) )
from django.conf import settings from django.conf import settings
......
...@@ -16,6 +16,7 @@ class NgramsCache(defaultdict): ...@@ -16,6 +16,7 @@ class NgramsCache(defaultdict):
def __missing__(self, terms): def __missing__(self, terms):
"""If the terms are not yet present in the dictionary, """If the terms are not yet present in the dictionary,
retrieve it from the database or insert it.""" retrieve it from the database or insert it."""
terms = terms.strip().lower()
try: try:
ngram = node.models.Ngram.get(terms=terms, language=self.language) ngram = node.models.Ngram.get(terms=terms, language=self.language)
except: except:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment