Commit 586e12d3 authored by delanoe's avatar delanoe

Merge branch 'unstable-migration' into unstable

parents 7e426b8a 4a7114e6
from gargantext.util.db import *
from gargantext.util.files import upload
from gargantext.constants import *
from datetime import datetime
from .users import User
#__all__ = ['Node', 'NodeType', 'Language']
class NodeType_v2(Base):
__table_args__ = {'extend_existing': True}
__tablename__ = 'node_nodetype'
id = Column(Integer, primary_key=True)
name = Column(String(255))
class Language_v2(Base):
__table_args__ = {'extend_existing': True}
__tablename__ = 'node_language'
id = Column(Integer, primary_key=True)
iso2 = Column(String(2))
iso3 = Column(String(3))
fullname = Column(String(255))
implemented = Column(Boolean)
class Node_v2(Base):
__table_args__ = {'extend_existing': True}
__tablename__ = 'node_node'
id = Column(Integer, primary_key=True)
parent_id = Column(Integer, ForeignKey('node_node.id'))
user_id = Column(Integer, ForeignKey(User.id))
type_id = Column(ForeignKey(NodeType_v2.id))
name = Column(String(255))
language_id = Column(Integer, ForeignKey(Language_v2.id))
date = Column(DateTime(), default=datetime.now)
hyperdata = Column(JSONB, default=dict)
def nodes_list(user_id, nodetype, parent_id=None, count=False):
"""
nodes_list :: Int -> String -> Maybe Int -> Maybe Bool -> [(Int, String)]
"""
nodes = ( session.query(Node_v2.id, Node_v2.name)
.join(NodeType_v2, NodeType_v2.id == Node_v2.type_id)
.filter(NodeType_v2.name == nodetype)
)
if parent_id is not None:
nodes = nodes.filter(Node_v2.parent_id == parent_id)
if count is True:
return nodes.count()
else:
return nodes.all()
def nodes_tree(user_id):
"""
nodes_tree :: Int -> Tree Nodes
"""
for project_id, project_name in nodes_list(user_id, 'Project'):
print("* Project (%d, %s)" % (project_id, project_name))
for corpus_id, corpus_name in nodes_list(user_id, 'Corpus', parent_id=project_id):
count = nodes_list( user_id
, 'Document'
, parent_id=corpus_id
, count=True
)
if count > 1:
print("|- %d %s" % ( corpus_id, corpus_name ))
print(" |- %s docs" % count)
from django.http import HttpResponse, Http404
from django.core.exceptions import PermissionDenied, SuspiciousOperation
from django.core.exceptions import ValidationError
from django.core.urlresolvers import reverse
from sqlalchemy import text, distinct, or_,not_
from sqlalchemy.sql import func, desc
from sqlalchemy.orm import aliased
import datetime
import copy
from gargantext_web.views import move_to_trash
from gargantext_web.db import cache, Node, NodeNgram, NodeNgramNgram, NodeNodeNgram, Ngram, Hyperdata, Node_Ngram\
, NodeType, Node_Hyperdata
from gargantext_web.views import session
from gargantext_web.validation import validate, ValidationException
from node import models
def DebugHttpResponse(data):
return HttpResponse('<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), ))
import json
class JSONEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()[:19] + 'Z'
else:
return super(self.__class__, self).default(obj)
json_encoder = JSONEncoder(indent=4)
def JsonHttpResponse(data, status=200):
return HttpResponse(
content = json_encoder.encode(data),
content_type = 'application/json; charset=utf-8',
status = status
)
Http400 = SuspiciousOperation
Http403 = PermissionDenied
import csv
def CsvHttpResponse(data, headers=None, status=200):
response = HttpResponse(
content_type = "text/csv",
status = status
)
writer = csv.writer(response, delimiter=',')
if headers:
writer.writerow(headers)
for row in data:
writer.writerow(row)
return response
_ngrams_order_columns = {
"frequency" : "-count",
"alphabetical" : "terms"
}
from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework.exceptions import APIException as _APIException
class APIException(_APIException):
def __init__(self, message, code=500):
self.status_code = code
self.detail = message
_operators_dict = {
"=": lambda field, value: (field == value),
"!=": lambda field, value: (field != value),
"<": lambda field, value: (field < value),
">": lambda field, value: (field > value),
"<=": lambda field, value: (field <= value),
">=": lambda field, value: (field >= value),
"in": lambda field, value: (or_(*tuple(field == x for x in value))),
"contains": lambda field, value: (field.contains(value)),
"doesnotcontain": lambda field, value: (not_(field.contains(value))),
"startswith": lambda field, value: (field.startswith(value)),
}
_hyperdata_list = [
hyperdata
for hyperdata in session.query(Hyperdata).order_by(Hyperdata.name)
]
_hyperdata_dict = {
hyperdata.name: hyperdata
for hyperdata in _hyperdata_list
}
from rest_framework.decorators import api_view
@api_view(('GET',))
def Root(request, format=None):
return Response({
'users': reverse('user-list', request=request, format=format),
'snippets': reverse('snippet-list', request=request, format=format)
})
class NodesChildrenNgrams(APIView):
def get(self, request, node_id):
# query ngrams
ParentNode = aliased(Node)
ngrams_query = (session
.query(Ngram.terms, func.sum(Node_Ngram.weight).label('count'))
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
.join(Node, Node.id == Node_Ngram.node_id)
.filter(Node.parent_id == node_id)
.group_by(Ngram.terms)
# .group_by(Ngram)
.order_by(func.sum(Node_Ngram.weight).desc(), Ngram.terms)
)
# filters
if 'startwith' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.startswith(request.GET['startwith']))
if 'contain' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.contains(request.GET['contain']))
#if 'doesnotcontain' in request.GET:
# ngrams_query = ngrams_query.filter(not_(Ngram.terms.contains(request.GET['doesnotcontain'])))
# pagination
offset = int(request.GET.get('offset', 0))
limit = int(request.GET.get('limit', 20))
total = ngrams_query.count()
# return formatted result
return JsonHttpResponse({
'pagination': {
'offset': offset,
'limit': limit,
'total': total,
},
'data': [
{
# 'id': ngram.id,
'terms': ngram.terms,
'count': ngram.count,
}
for ngram in ngrams_query[offset : offset+limit]
],
})
class NodesChildrenNgramsIds(APIView):
def get(self, request, node_id):
# query ngrams
ParentNode = aliased(Node)
ngrams_query = (session
.query(Node.id, func.sum(Node_Ngram.weight).label('count'))
.join(Node_Ngram, Node_Ngram.node_id == Node.id)
.join(Ngram, Ngram.id == Node_Ngram.ngram_id)
.filter(Node.parent_id == node_id)
.filter(Node.type_id == cache.NodeType['Document'].id)
.group_by(Node.id)
# .group_by(Ngram)
.order_by(func.sum(Node_Ngram.weight).desc())
)
# filters
if 'startwith' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.startswith(request.GET['startwith']))
if 'contain' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.contains(request.GET['contain']))
#if 'doesnotcontain' in request.GET:
# ngrams_query = ngrams_query.filter(not_(Ngram.terms.contains(request.GET['doesnotcontain'])))
# pagination
offset = int(request.GET.get('offset', 0))
limit = int(request.GET.get('limit', 20))
total = ngrams_query.count()
# return formatted result
return JsonHttpResponse({
'pagination': {
'offset': offset,
'limit': limit,
'total': total,
},
'data': [
{
'id': node,
'count': count
}
for node, count in ngrams_query[offset : offset+limit]
],
})
from gargantext_web.db import get_or_create_node
class Ngrams(APIView):
def get(self, request, node_id):
# query ngrams
ParentNode = aliased(Node)
corpus = session.query(Node).filter(Node.id==node_id).first()
group_by = []
results = ['id', 'terms']
ngrams_query = (session
.query(Ngram.id, Ngram.terms)
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
.join(Node, Node.id == Node_Ngram.node_id)
)
# get the scores
if 'tfidf' in request.GET['score']:
Tfidf = aliased(NodeNodeNgram)
tfidf_id = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus).id
ngrams_query = (ngrams_query.add_column(Tfidf.score.label('tfidf'))
.join(Tfidf, Tfidf.ngram_id == Ngram.id)
.filter(Tfidf.nodex_id == tfidf_id)
)
group_by.append(Tfidf.score)
results.append('tfidf')
if 'cvalue' in request.GET['score']:
Cvalue = aliased(NodeNodeNgram)
cvalue_id = get_or_create_node(nodetype='Cvalue', corpus=corpus).id
ngrams_query = (ngrams_query.add_column(Cvalue.score.label('cvalue'))
.join(Cvalue, Cvalue.ngram_id == Ngram.id)
.filter(Cvalue.nodex_id == cvalue_id)
)
group_by.append(Cvalue.score)
results.append('cvalue')
if 'specificity' in request.GET['score']:
Spec = aliased(NodeNodeNgram)
spec_id = get_or_create_node(nodetype='Specificity', corpus=corpus).id
ngrams_query = (ngrams_query.add_column(Spec.score.label('specificity'))
.join(Spec, Spec.ngram_id == Ngram.id)
.filter(Spec.nodex_id == spec_id)
)
group_by.append(Spec.score)
results.append('specificity')
if request.GET.get('order', False) == 'cvalue':
ngrams_query = ngrams_query.order_by(desc(Cvalue.score))
elif request.GET.get('order', False) == 'tfidf':
ngrams_query = ngrams_query.order_by(desc(Tfidf.score))
elif request.GET.get('order', False) == 'specificity':
ngrams_query = ngrams_query.order_by(desc(Spec.score))
offset = int(request.GET.get('offset', 0))
limit = int(request.GET.get('limit', 20))
ngrams_query = (ngrams_query.filter(Node.parent_id == node_id)
.group_by(Ngram.id, Ngram.terms, *group_by)
)
if request.GET.get('ngram_id', False) != False:
ngram_id = int(request.GET['ngram_id'])
Group = aliased(NodeNgramNgram)
group_id = get_or_create_node(nodetype='Group', corpus=corpus).id
ngrams_query = (ngrams_query.join(Group, Group.ngramx_id == ngram_id )
.filter(Group.node_id == group_id)
.filter(Group.ngramx_id == ngram_id)
)
# filters by list type (soon list_id to factorize it in javascript)
list_query = request.GET.get('list', 'miam')
if list_query == 'miam':
Miam = aliased(NodeNgram)
miam_id = get_or_create_node(nodetype='MiamList', corpus=corpus).id
ngrams_query = (ngrams_query.join(Miam, Miam.ngram_id == Ngram.id )
.filter(Miam.node_id == miam_id)
)
elif list_query == 'stop':
Stop = aliased(NodeNgram)
stop_id = get_or_create_node(nodetype='StopList', corpus=corpus).id
ngrams_query = (ngrams_query.join(Stop, Stop.ngram_id == Ngram.id )
.filter(Stop.node_id == stop_id)
)
elif list_query == 'map':
# ngram could be in ngramx_id or ngramy_id
CoocX = aliased(NodeNgramNgram)
CoocY = aliased(NodeNgramNgram)
cooc_id = get_or_create_node(nodetype='Cooccurrence', corpus=corpus).id
ngrams_query = (ngrams_query.join(CoocX, CoocX.ngramx_id == Ngram.id )
.join(CoocY, CoocY.ngramy_id == Ngram.id)
.filter(CoocX.node_id == cooc_id)
.filter(CoocY.node_id == cooc_id)
)
total = ngrams_query.count()
# return formatted result
return JsonHttpResponse({
'pagination': {
'offset': offset,
'limit': limit,
'total': total,
},
'data': [
{
'id' : ngram.id
, 'terms' : ngram.terms
, 'tfidf' : ngram.tfidf
, 'cvalue': ngram.cvalue
} for ngram in ngrams_query[offset : offset+limit]
# TODO : dict comprehension in list comprehension :
# { x : eval('ngram.' + x) for x in results
# } for ngram in ngrams_query[offset : offset+limit]
],
})
class NodesChildrenDuplicates(APIView):
def _fetch_duplicates(self, request, node_id, extra_columns=None, min_count=1):
# input validation
if extra_columns is None:
extra_columns = []
if 'keys' not in request.GET:
raise APIException('Missing GET parameter: "keys"', 400)
keys = request.GET['keys'].split(',')
# hyperdata retrieval
hyperdata_query = (session
.query(Hyperdata)
.filter(Hyperdata.name.in_(keys))
)
# build query elements
columns = []
aliases = []
for hyperdata in hyperdata_query:
# aliases
_Hyperdata = aliased(Hyperdata)
_Node_Hyperdata = aliased(Node_Hyperdata)
aliases.append(_Node_Hyperdata)
# what shall we retrieve?
columns.append(
getattr(_Node_Hyperdata, 'value_' + hyperdata.type)
)
# build the query
groups = list(columns)
duplicates_query = (session
.query(*(extra_columns + [func.count()] + columns))
.select_from(Node)
)
for _Node_Hyperdata, hyperdata in zip(aliases, hyperdata_query):
duplicates_query = duplicates_query.outerjoin(_Node_Hyperdata, _Node_Hyperdata.node_id == Node.id)
duplicates_query = duplicates_query.filter(_Node_Hyperdata.hyperdata_id == hyperdata.id)
duplicates_query = duplicates_query.filter(Node.parent_id == node_id)
duplicates_query = duplicates_query.group_by(*columns)
duplicates_query = duplicates_query.order_by(func.count().desc())
duplicates_query = duplicates_query.having(func.count() > min_count)
# and now, return it
return duplicates_query
def get(self, request, node_id):
# data to be returned
duplicates = self._fetch_duplicates(request, node_id)
# pagination
offset = int(request.GET.get('offset', 0))
limit = int(request.GET.get('limit', 10))
total = duplicates.count()
# response building
return JsonHttpResponse({
'pagination': {
'offset': offset,
'limit': limit,
'total': total,
},
'data': [
{
'count': duplicate[0],
'values': duplicate[1:],
}
for duplicate in duplicates[offset : offset+limit]
]
})
def delete(self, request, node_id):
# get the minimum ID for each of the nodes sharing the same hyperdata
kept_node_ids_query = self._fetch_duplicates(request, node_id, [func.min(Node.id).label('id')], 0)
kept_node_ids = [kept_node.id for kept_node in kept_node_ids_query]
# TODO with new orm
duplicate_nodes = models.Node.objects.filter( parent_id=node_id ).exclude(id__in=kept_node_ids)
# # delete the stuff
# delete_query = (session
# .query(Node)
# .filter(Node.parent_id == node_id)
# .filter(~Node.id.in_(kept_node_ids))
# )
count = len(duplicate_nodes)
for node in duplicate_nodes:
print("deleting node ",node.id)
move_to_trash(node.id)
# print(delete_query)
# # delete_query.delete(synchronize_session=True)
# session.flush()
return JsonHttpResponse({
'deleted': count
})
# retrieve metadata from a given list of parent node
def get_metadata(corpus_id_list):
# query hyperdata keys
ParentNode = aliased(Node)
hyperdata_query = (session
.query(Hyperdata)
.join(Node_Hyperdata, Node_Hyperdata.hyperdata_id == Hyperdata.id)
.join(Node, Node.id == Node_Hyperdata.node_id)
.filter(Node.parent_id.in_(corpus_id_list))
.group_by(Hyperdata)
)
# build a collection with the hyperdata keys
collection = []
for hyperdata in hyperdata_query:
valuesCount = 0
values = None
# count values and determine their span
values_count = None
values_from = None
values_to = None
if hyperdata.type != 'text':
value_column = getattr(Node_Hyperdata, 'value_' + hyperdata.type)
node_hyperdata_query = (session
.query(value_column)
.join(Node, Node.id == Node_Hyperdata.node_id)
.filter(Node.parent_id.in_(corpus_id_list))
.filter(Node_Hyperdata.hyperdata_id == hyperdata.id)
.group_by(value_column)
.order_by(value_column)
)
values_count = node_hyperdata_query.count()
# values_count, values_from, values_to = node_hyperdata_query.first()
# if there is less than 32 values, retrieve them
values = None
if isinstance(values_count, int) and values_count <= 48:
if hyperdata.type == 'datetime':
values = [row[0].isoformat() for row in node_hyperdata_query.all()]
else:
values = [row[0] for row in node_hyperdata_query.all()]
# adding this hyperdata to the collection
collection.append({
'key': hyperdata.name,
'type': hyperdata.type,
'values': values,
'valuesFrom': values_from,
'valuesTo': values_to,
'valuesCount': values_count,
})
# give the result back
return collection
class ApiHyperdata(APIView):
def get(self, request):
corpus_id_list = list(map(int, request.GET['corpus_id'].split(',')))
return JsonHttpResponse({
'data': get_metadata(corpus_id_list),
})
# retrieve ngrams from a given list of parent node
def get_ngrams(corpus_id_list):
pass
class ApiNgrams(APIView):
def get(self, request):
# parameters retrieval and validation
startwith = request.GET.get('startwith', '').replace("'", "\\'")
# query ngrams
ParentNode = aliased(Node)
ngrams_query = (session
.query(Ngram.terms, func.sum(Node_Ngram.weight).label('count'))
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
.join(Node, Node.id == Node_Ngram.node_id)
.group_by(Ngram.terms)
# .group_by(Ngram)
.order_by(func.sum(Node_Ngram.weight).desc(), Ngram.terms)
)
# filters
if 'startwith' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.startswith(request.GET['startwith']))
if 'contain' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.contains(request.GET['contain']))
if 'corpus_id' in request.GET:
corpus_id_list = list(map(int, request.GET.get('corpus_id', '').split(',')))
if corpus_id_list and corpus_id_list[0]:
ngrams_query = ngrams_query.filter(Node.parent_id.in_(corpus_id_list))
# pagination
offset = int(request.GET.get('offset', 0))
limit = int(request.GET.get('limit', 20))
total = ngrams_query.count()
# return formatted result
return JsonHttpResponse({
'pagination': {
'offset': offset,
'limit': limit,
'total': total,
},
'data': [
{
'terms': ngram.terms,
'count': ngram.count,
}
for ngram in ngrams_query[offset : offset+limit]
],
})
class NodesChildrenQueries(APIView):
def _sql(self, input, node_id):
fields = dict()
tables = set('nodes')
hyperdata_aliases = dict()
# retrieve all unique fields names
fields_names = input['retrieve']['fields'].copy()
fields_names += [filter['field'] for filter in input['filters']]
fields_names += input['sort']
fields_names = set(fields_names)
# relate fields to their respective ORM counterparts
for field_name in fields_names:
field_name_parts = field_name.split('.')
field = None
if len(field_name_parts) == 1:
field = getattr(Node, field_name)
elif field_name_parts[1] == 'count':
if field_name_parts[0] == 'nodes':
field = func.count(Node.id)
elif field_name_parts[0] == 'ngrams':
field = func.count(Ngram.id)
tables.add('ngrams')
elif field_name_parts[0] == 'ngrams':
field = getattr(Ngram, field_name_parts[1])
tables.add('ngrams')
elif field_name_parts[0] == 'hyperdata':
hyperdata = _hyperdata_dict[field_name_parts[1]]
if hyperdata not in hyperdata_aliases:
hyperdata_aliases[hyperdata] = aliased(Node_Hyperdata)
hyperdata_alias = hyperdata_aliases[hyperdata]
field = getattr(hyperdata_alias, 'value_%s' % hyperdata.type)
if len(field_name_parts) == 3:
field = func.date_trunc(field_name_parts[2], field)
fields[field_name] = field
# build query: selected fields
query = (session
.query(*(fields[field_name] for field_name in input['retrieve']['fields']))
)
# build query: selected tables
query = query.select_from(Node)
if 'ngrams' in tables:
query = (query
.join(Node_Ngram, Node_Ngram.node_id == Node.id)
.join(Ngram, Ngram.id == Node_Ngram.ngram_id)
)
for hyperdata, hyperdata_alias in hyperdata_aliases.items():
query = (query
.join(hyperdata_alias, hyperdata_alias.node_id == Node.id)
.filter(hyperdata_alias.hyperdata_id == hyperdata.id)
)
# build query: filtering
query = (query
.filter(Node.parent_id == node_id)
)
for filter in input['filters']:
query = (query
.filter(_operators_dict[filter['operator']](
fields[filter['field']],
filter['value']
))
)
# build query: aggregations
if input['retrieve']['aggregate']:
for field_name in input['retrieve']['fields']:
if not field_name.endswith('.count'):
query = query.group_by(fields[field_name])
# build query: sorting
for field_name in input['sort']:
last = field_name[-1:]
if last in ('+', '-'):
field_name = field_name[:-1]
if last == '-':
query = query.order_by(fields[field_name].desc())
else:
query = query.order_by(fields[field_name])
# build and return result
output = copy.deepcopy(input)
output['pagination']['total'] = query.count()
output['results'] = list(
query[input['pagination']['offset']:input['pagination']['offset']+input['pagination']['limit']]
if input['pagination']['limit']
else query[input['pagination']['offset']:]
)
return output
def _haskell(self, input, node_id):
output = copy.deepcopy(input)
output['pagination']['total'] = 0
output['results'] = list()
return output
def post(self, request, node_id):
""" Query the children of the given node.
Example #1
----------
Input:
{
"pagination": {
"offset": 0,
"limit": 10
},
"retrieve": {
"type": "fields",
"list": ["name", "hyperdata.publication_date"]
},
"filters": [
{"field": "hyperdata.publication_date", "operator": ">", "value": "2010-01-01 00:00:00"},
{"field": "ngrams.terms", "operator": "in", "value": ["bee", "bees"]}
],
"sort": ["name"]
}
Output:
{
"pagination": {
"offset": 0,
"limit": 10
},
"retrieve": {
"type": "fields",
"list": ["name", "hyperdata.publication_date"]
},
"results": [
{"id": 12, "name": "A document about bees", "publication_date": "2014-12-03 10:00:00"},
...,
]
}
"""
# authorized field names
sql_fields = set({
'id', 'name',
'nodes.count',
'nodes.countnorm',
'ngrams.count',
'ngrams.terms', 'ngrams.n',
})
for hyperdata in _hyperdata_list:
sql_fields.add('hyperdata.' + hyperdata.name)
if hyperdata.type == 'datetime':
for part in ['year', 'month', 'day', 'hour', 'minute']:
sql_fields.add('hyperdata.' + hyperdata.name + '.' + part)
# authorized field names: Haskell
haskell_fields = set({
'haskell.test',
})
# authorized field names: all of them
authorized_fields = sql_fields | haskell_fields
# input validation
input = validate(request.DATA, {'type': dict, 'items': {
'pagination': {'type': dict, 'items': {
'limit': {'type': int, 'default': 0},
'offset': {'type': int, 'default': 0},
}, 'default': {'limit': 0, 'offset': 0}},
'filters': {'type': list, 'items': {'type': dict, 'items': {
'field': {'type': str, 'required': True, 'range': authorized_fields},
'operator': {'type': str, 'required': True, 'range': list(_operators_dict.keys())},
'value': {'required': True},
}}, 'default': list()},
'retrieve': {'type': dict, 'required': True, 'items': {
'aggregate': {'type': bool, 'default': False},
'fields': {'type': list, 'items': {'type': str, 'range': authorized_fields}, 'range': (1, )},
}},
'sort': {'type': list, 'items': {'type': str}, 'default': list()},
}})
# return result, depending on the queried fields
if set(input['retrieve']['fields']) <= sql_fields:
method = self._sql
elif set(input['retrieve']['fields']) <= haskell_fields:
method = self._haskell
else:
raise ValidationException('queried fields are mixing incompatible types of fields')
return JsonHttpResponse(method(input, node_id), 201)
class NodesList(APIView):
authentication_classes = (SessionAuthentication, BasicAuthentication)
def get(self, request):
print("user id : " + str(request.user))
query = (session
.query(Node.id, Node.name, NodeType.name.label('type'))
.filter(Node.user_id == int(request.user.id))
.join(NodeType)
)
if 'type' in request.GET:
query = query.filter(NodeType.name == request.GET['type'])
if 'parent' in request.GET:
query = query.filter(Node.parent_id == int(request.GET['parent']))
return JsonHttpResponse({'data': [
node._asdict()
for node in query.all()
]})
class Nodes(APIView):
def get(self, request, node_id):
node = session.query(Node).filter(Node.id == node_id).first()
if node is None:
raise APIException('This node does not exist', 404)
return JsonHttpResponse({
'id': node.id,
'name': node.name,
'parent_id': node.parent_id,
'type': cache.NodeType[node.type_id].name,
# 'type': node.type__name,
#'hyperdata': dict(node.hyperdata),
'hyperdata': node.hyperdata,
})
# deleting node by id
# currently, very dangerous.
# it should take the subnodes into account as well,
# for better constistency...
def delete(self, request, node_id):
user = request.user
node = session.query(Node).filter(Node.id == node_id).first()
msgres = str()
try:
move_to_trash(node_id)
msgres = node_id+" moved to Trash"
except Exception as error:
msgres ="error deleting : " + node_id + str(error)
class CorpusController:
@classmethod
def get(cls, corpus_id):
try:
corpus_id = int(corpus_id)
except:
raise ValidationError('Corpora are identified by an integer.', 400)
corpusQuery = session.query(Node).filter(Node.id == corpus_id).first()
# print(str(corpusQuery))
# raise Http404("404 error.")
if not corpusQuery:
raise Http404("No such corpus: %d" % (corpus_id, ))
corpus = corpusQuery.first()
if corpus.type.name != 'Corpus':
raise Http404("No such corpus: %d" % (corpus_id, ))
# if corpus.user != request.user:
# raise Http403("Unauthorized access.")
return corpus
@classmethod
def ngrams(cls, request, node_id):
# parameters retrieval and validation
startwith = request.GET.get('startwith', '').replace("'", "\\'")
# build query
ParentNode = aliased(Node)
query = (session
.query(Ngram.terms, func.count('*'))
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
.join(Node, Node.id == Node_Ngram.node_id)
.join(ParentNode, ParentNode.id == Node.parent_id)
.filter(ParentNode.id == node_id)
.filter(Ngram.terms.like('%s%%' % (startwith, )))
.group_by(Ngram.terms)
.order_by(func.count('*').desc())
)
# response building
format = request.GET.get('format', 'json')
if format == 'json':
return JsonHttpResponse({
"data": [{
'terms': row[0],
'occurrences': row[1]
} for row in query.all()],
})
elif format == 'csv':
return CsvHttpResponse(
[['terms', 'occurences']] + [row for row in query.all()]
)
else:
raise ValidationError('Unrecognized "format=%s", should be "csv" or "json"' % (format, ))
from django.http import HttpResponse, Http404
from django.core.exceptions import PermissionDenied, SuspiciousOperation
from django.core.exceptions import ValidationError
from django.core.urlresolvers import reverse
from django.contrib.auth.decorators import login_required
from sqlalchemy import text, distinct, or_,not_
from sqlalchemy.sql import func, desc
from sqlalchemy.orm import aliased
import datetime
import copy
import json
from gargantext_web.db import cache
from gargantext_web.validation import validate, ValidationException
from gargantext_web.db import session,get_session, Node, NodeNgram, NodeNgramNgram, NodeNodeNgram, Ngram, Hyperdata, Node_Ngram, get_or_create_node
def DebugHttpResponse(data):
return HttpResponse('<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), ))
import time
import json
class JSONEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()[:19] + 'Z'
else:
return super(self.__class__, self).default(obj)
json_encoder = JSONEncoder(indent=4)
def JsonHttpResponse(data, status=200):
return HttpResponse(
content = json_encoder.encode(data),
content_type = 'application/json; charset=utf-8',
status = status
)
Http400 = SuspiciousOperation
Http403 = PermissionDenied
import csv
def CsvHttpResponse(data, headers=None, status=200):
response = HttpResponse(
content_type = "text/csv",
status = status
)
writer = csv.writer(response, delimiter=',')
if headers:
writer.writerow(headers)
for row in data:
writer.writerow(row)
return response
_ngrams_order_columns = {
"frequency" : "-count",
"alphabetical" : "terms"
}
from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework.renderers import JSONRenderer
from rest_framework.exceptions import APIException as _APIException
class APIException(_APIException):
def __init__(self, message, code=500):
self.status_code = code
self.detail = message
from rest_framework.decorators import api_view
#@login_required
# TODO how to secure REST ?
class List(APIView):
def get_metadata ( self , ngram_ids , parent_id ):
# implicit global session
start_ = time.time()
nodes_ngrams = session.query(Ngram.id , Ngram.terms).filter( Ngram.id.in_( list(ngram_ids.keys()))).all()
for node in nodes_ngrams:
if node.id in ngram_ids:
ngram_ids[node.id] = {
"id": node.id,
"name": node.terms,
"scores": {
"tfidf": 0,
"occs":0
}
}
# occ_list = get_or_create_node(nodetype='Occurrences', corpus_id=parent_id).id
# print( occ_list )
try:
tfidf_list = get_or_create_node(nodetype='Tfidf (global)', corpus_id=parent_id).id
ngram_tfidf = session.query(NodeNodeNgram.ngram_id,NodeNodeNgram.score).filter( NodeNodeNgram.nodex_id==tfidf_list , NodeNodeNgram.ngram_id.in_( list(ngram_ids.keys()) )).all()
for n in ngram_tfidf:
if n.ngram_id in ngram_ids:
ngram_ids[n.ngram_id]["scores"]["tfidf"] += n.score
except:
pass
try:
occ_list = get_or_create_node(nodetype='Occurrences', corpus_id=parent_id).id
ngram_occs = session.query(NodeNodeNgram.ngram_id,NodeNodeNgram.score).filter( NodeNodeNgram.nodex_id==occ_list , NodeNodeNgram.ngram_id.in_( list(ngram_ids.keys()) )).all()
for n in ngram_occs:
if n.ngram_id in ngram_ids:
ngram_ids[n.ngram_id]["scores"]["occs"] += round(n.score)
except:
pass
end_ = time.time()
return { "data":ngram_ids , "secs":(end_ - start_) }
def get(self, request, corpus_id , list_name ):
if not request.user.is_authenticated():
return JsonHttpResponse( {"request" : "forbidden"} )
# implicit global session
corpus = session.query(Node).filter( Node.id==corpus_id ).first()
# if corpus==None:
# return JsonHttpResponse( {"request" : "forbidden"} )
start_ = time.time()
list_name = list_name.title()+"List"
node_list = get_or_create_node(nodetype=list_name, corpus=corpus )
nodes_ngrams = session.query(NodeNgram.ngram_id).filter(NodeNgram.node_id==node_list.id ).all()
ngram_ids = {}
for node in nodes_ngrams:
ngram_ids[node.ngram_id] = True
end_ = time.time()
measurements = {
"get_ngram_ids" : {
"s":(end_ - start_),
"n": len(ngram_ids.keys())
}
}
if request.GET.get('custom', False) != False:
ngrams_meta = self.get_metadata( ngram_ids , corpus_id )
ngram_ids = ngrams_meta["data"]
measurements["tfidf"] = { "s" : ngrams_meta["secs"], "n": len(ngrams_meta["data"].keys()) }
return JsonHttpResponse( {"data":ngram_ids , "time":measurements } )
class Ngrams(APIView):
'''
REST application to manage ngrams
Example :
http://localhost:8000/api/node/1444485/ngrams?format=json&score=tfidf,occs
'''
def get(self, request, node_id):
if not request.user.is_authenticated():
return JsonHttpResponse( {"request" : "forbidden"} )
# implicit global session
corpus = session.query(Node).filter( Node.id==node_id).first()
# if corpus==None:
# return JsonHttpResponse( {"request" : "forbidden"} )
start_ = time.time()
ParentNode = aliased(Node)
group_by = []
results = ['id', 'terms']
ngrams_query = (session
.query(Ngram.id, Ngram.terms)
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
.join(Node, Node.id == Node_Ngram.node_id)
)
the_score = "tfidf"
if request.GET.get('score', False) != False:
the_score = request.GET['score']
if 'occs' in the_score:
Occs = NodeNodeNgram
occs_id = get_or_create_node(nodetype='Occurrences', corpus=corpus).id
ngrams_query = (ngrams_query.add_column(Occs.score.label('occs'))
.join(Occs, Occs.ngram_id == Ngram.id)
.filter(Occs.nodex_id==occs_id)
)
group_by.append(Occs.score)
results.append('occs')
if 'tfidf' in the_score:
Tfidf = aliased(NodeNodeNgram)
tfidf_id = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus).id
ngrams_query = (ngrams_query.add_column(Tfidf.score.label('tfidf'))
.join(Tfidf, Tfidf.ngram_id == Ngram.id)
.filter(Tfidf.nodex_id == tfidf_id)
)
group_by.append(Tfidf.score)
results.append('tfidf')
if 'cvalue' in the_score:
Cvalue = aliased(NodeNodeNgram)
cvalue_id = get_or_create_node(nodetype='Cvalue', corpus=corpus).id
ngrams_query = (ngrams_query.add_column(Cvalue.score.label('cvalue'))
.join(Cvalue, Cvalue.ngram_id == Ngram.id)
.filter(Cvalue.nodex_id == cvalue_id)
)
group_by.append(Cvalue.score)
results.append('cvalue')
if 'specificity' in the_score:
Spec = aliased(NodeNodeNgram)
spec_id = get_or_create_node(nodetype='Specificity', corpus=corpus).id
ngrams_query = (ngrams_query.add_column(Spec.score.label('specificity'))
.join(Spec, Spec.ngram_id == Ngram.id)
.filter(Spec.nodex_id == spec_id)
)
group_by.append(Spec.score)
results.append('specificity')
order_query = request.GET.get('order', False)
if order_query == 'occs':
ngrams_query = ngrams_query.order_by(desc(occs))
elif order_query == 'cvalue':
ngrams_query = ngrams_query.order_by(desc(Cvalue.score))
elif order_query == 'tfidf':
ngrams_query = ngrams_query.order_by(desc(Tfidf.score))
elif order_query == 'specificity':
ngrams_query = ngrams_query.order_by(desc(Spec.score))
offset = int(request.GET.get('offset', 0))
limit = int(request.GET.get('limit', 20))
ngrams_query = (ngrams_query.filter(Node.parent_id == node_id)
.group_by(Ngram.id, Ngram.terms, *group_by)
)
if request.GET.get('ngram_id', False) != False:
ngram_id = int(request.GET['ngram_id'])
Group = aliased(NodeNgramNgram)
group_id = get_or_create_node(nodetype='Group', corpus=corpus).id
ngrams_query = (ngrams_query.join(Group, Group.ngramx_id == ngram_id )
.filter(Group.node_id == group_id)
.filter(Group.ngramx_id == ngram_id)
)
list_query = request.GET.get('list', 'miam')
list_id = request.GET.get('list_id', False)
if list_query == 'miam':
Miam = aliased(NodeNgram)
miam_id = get_or_create_node(nodetype='MiamList', corpus=corpus).id
ngrams_query = (ngrams_query.join(Miam, Miam.ngram_id == Ngram.id )
.filter(Miam.node_id == miam_id)
)
elif list_query == 'stop':
Stop = aliased(NodeNgram)
stop_id = get_or_create_node(nodetype='StopList', corpus=corpus).id
ngrams_query = (ngrams_query.join(Stop, Stop.ngram_id == Ngram.id )
.filter(Stop.node_id == stop_id)
)
elif list_query == 'map':
CoocX = aliased(NodeNgramNgram)
CoocY = aliased(NodeNgramNgram)
cooc_id = get_or_create_node(nodetype='Cooccurrence', corpus=corpus).id
ngrams_query = (ngrams_query.join(CoocX, CoocX.ngramx_id == Ngram.id )
.join(CoocY, CoocY.ngramy_id == Ngram.id)
.filter(CoocX.node_id == cooc_id)
.filter(CoocY.node_id == cooc_id)
)
elif list_id != False:
list_id = int(list_id)
node = session.query(Node).filter(Node.id==node_id).first()
if node.type_id == cache.NodeType['StopList'].id or node.type_id == cache.NodeType['MiamList'].id:
List = aliased(NodeNgram)
ngrams_query = (ngrams_query.join(List, List.ngram_id == Ngram.id )
.filter(List.node_id == node.id)
)
elif node.type_id == cache.NodeType['Cooccurrence'].id:
CoocX = aliased(NodeNgramNgram)
CoocY = aliased(NodeNgramNgram)
ngrams_query = (ngrams_query.join(CoocX, CoocX.ngramx_id == Ngram.id )
.join(CoocY, CoocY.ngramy_id == Ngram.id)
.filter(CoocX.node_id == node.id)
.filter(CoocY.node_id == node.id)
)
output = []
for ngram in ngrams_query[offset : offset+limit]:
info = { "scores" : {} }
try: info["id"] = ngram.id
except: pass
try: info["name"] = ngram.terms
except: pass
try: info["scores"]["occs"] = ngram.occs
except: pass
try: info["scores"]["tfidf"] = ngram.tfidf
except: pass
try: info["scores"]["cvalue"] = ngram.cvalue
except: pass
try: info["scores"]["specificity"] = ngram.specificity
except: pass
output.append( info )
end_ = time.time()
measurements = {
"s":(end_ - start_),
"n": len(output)
}
# return formatted result
return JsonHttpResponse({
'pagination': {
'offset': offset,
'limit': limit,
'total': len(output),
},
'data': output,
"time" : measurements
})
def post(self , request , node_id ):
return JsonHttpResponse(["POST","ok"])
def put (self, request, corpus_id):
"""
Add ngrams to Miam list
"""
group_rawreq = dict(request.data)
print( "group_rawreq:" )
print( group_rawreq )
return JsonHttpResponse(["PUT","ok"])
class Group(APIView):
'''
REST API to manage groups of Ngrams
Groups can be synonyms, a cathegory or ngrams groups with stems or lems.
'''
def get_group_id(self , node_id , user_id):
node_id = int(node_id)
# implicit global session
corpus = session.query(Node).filter( Node.id==node_id).first()
if corpus==None: return None
group = get_or_create_node(corpus=corpus, nodetype='Group')
return(group.id)
def get(self, request, corpus_id):
if not request.user.is_authenticated():
return JsonHttpResponse( {"request" : "forbidden"} )
group_id = self.get_group_id(corpus_id , request.user.id)
if group_id==None:
return JsonHttpResponse( {"request" : "forbidden"} )
#api/node/$corpus_id/ngrams?ngram_id=12
# ngram_id = 1 #request.GET.get('ngram_id', False)
# ngram_id = int(node_id)
# #api/node/$corpus_id/ngrams?all=True
# all_option = request.GET.get('all', False)
# all_option = 1 #int(all_option)
# IMPORTANT: Algorithm for getting the groups:
# 1. pairs_list <- Get all pairs from get_group_id()
# 2. G <- Do a non-directed graph of pairs_list
# 3. DG <- Do a directed graph of pairs_list
# 4. cliques_list <- find_cliques of G
# 5. groups <- Iterate in sinonims_cliques and set the mainNode per each clique: take the highest max_outdegree-node of each clique, using DG
import networkx as nx
G = nx.Graph()
DG = nx.DiGraph()
# implicit global session
ngrams_ngrams = (session
.query(NodeNgramNgram)
.filter(NodeNgramNgram.node_id==group_id)
)
# ngramy_id=476996, score=1.0, node_id=75081, id=1282846, ngramx_id=493431
for ng in ngrams_ngrams:
# n_x = ( session.query(Ngram).filter(Ngram.id==ng.ngramx_id) ).first()
# n_y = ( session.query(Ngram).filter(Ngram.id==ng.ngramy_id) ).first()
G.add_edge( ng.ngramx_id , ng.ngramy_id )
DG.add_edge( ng.ngramx_id , ng.ngramy_id )
# group = dict(list())
sinonims_cliques = nx.find_cliques( G )
# for nn in ngrams_ngrams.all():
# group[nn.ngramx_id] = group.get(nn.ngramx_id, []) + [nn.ngramy_id]
groups = { "nodes": {} , "links": {} }
for clique in sinonims_cliques:
max_deg = -1
mainNode = -1
mainNode_sinonims = []
if len(clique) > 1:
for node in clique:
# just a lookup hash with *both*
# the mainNode and the subnodes
groups["nodes"][node] = False
# choosing mainNode
node_outdeg = DG.out_degree(node)
if node_outdeg>max_deg:
max_deg = node_outdeg
mainNode = node
# the links themselves main => [subs]
for node in clique:
if mainNode!=node:
mainNode_sinonims.append( node )
groups["links"][ mainNode ] = mainNode_sinonims
# for i in groups["nodes"]:
# print(i)
ngrams = [int(i) for i in list(groups["nodes"].keys())]
# groups["nodes"] = get_occtfidf( ngrams , request.user.id , corpus_id , "Group")
return JsonHttpResponse( { "data" : groups } )
def post(self, request, node_id):
return JsonHttpResponse( ["hola" , "mundo"] )
def delete(self, request, corpus_id):
# input validation
# implicit global session
input = validate(request.DATA, {'data' : {'source': int, 'target': list}})
group_id = get_group_id(corpus_id , request.user.id)
for data in input['data']:
if data['source'] > 0 and len(data['target']) > 0:
for target_id in data['target']:
(session.query(NodeNgramNgram)
.filter(NodeNgramNgram.node_id==group_id)
.filter(NodeNgramNgram.ngramx_id==data['source'])
.delete()
)
return JsonHttpResponse(True, 201)
else:
raise APIException('Missing parameter: "{\'data\' : [\'source\': Int, \'target\': [Int]}"', 400)
def put(self , request , corpus_id ):
# implicit global session
group_rawreq = dict(request.data)
GDict = []
group_new = {}
for g in group_rawreq:
gdict = []
mainform = int(g.replace("[]",""))
gdict.append(mainform)
group_new[mainform] = list(map(int, group_rawreq[g]))
for subform in group_new[mainform]:
gdict.append(subform)
GDict.append( gdict )
existing_group_id = self.get_group_id(corpus_id , request.user.id)
# implicit global session
grouped_ngrams = (session
.query(NodeNgramNgram)
.filter(NodeNgramNgram.node_id==existing_group_id)
)
# [ - - - new group = old clique + new clique - - - ] #
NewGroups = {}
Rels_2_delete = {}
for ng in grouped_ngrams:
for i in range(len(GDict)):
clique_i = GDict[i]
neighbours = {}
for node in clique_i:
if node==ng.ngramx_id:
neighbours[ng.ngramy_id] = True
if node==ng.ngramy_id:
neighbours[ng.ngramx_id] = True
if len(list(neighbours.keys()))>0:
voisinage = {}
for node_ in clique_i:
voisinage[node_] = True
for node_ in neighbours:
voisinage[node_] = True
clique_i = list(voisinage.keys())
Rels_2_delete[ng.id] = True
if i not in NewGroups:
NewGroups[i] = {}
for node in clique_i:
NewGroups[i][node] = True
for i in NewGroups:
NewGroups[i] = list(NewGroups[i].keys())
# [ - - - / new group = old clique + new clique - - - ] #
# [ - - - considering main form of the query - - - ] #
for i in range(len(GDict)):
ordered = []
for j in range(len(NewGroups[i])):
if NewGroups[i][j]!=GDict[i][0]:
ordered.append( NewGroups[i][j] )
NewGroups[i] = [ GDict[i][0] ] + ordered
# [ - - - / considering main form of the query - - - ] #
# [ - - - deleting old clique - - - ] #
for rel_id in Rels_2_delete:
session.query(NodeNgramNgram).filter(NodeNgramNgram.id==rel_id ).delete()
session.commit()
# [ - - - / deleting old clique - - - ] #
# [ - - - doing links of new clique and adding to DB - - - ] #
from itertools import combinations
for i in NewGroups:
edges = combinations(NewGroups[i], 2)
for n in edges:
n1=n[0]
n2=n[1]
nodengramngram = NodeNgramNgram(node_id=existing_group_id, ngramx_id=n1 , ngramy_id=n2, score=1.0)
session.add(nodengramngram)
session.commit()
# [ - - - / doing links of new clique and adding to DB - - - ] #
# import networkx as nx
# G = nx.Graph()
# DG = nx.DiGraph()
# for ng in grouped_ngrams:
# n_x = ( session.query(Ngram).filter(Ngram.id==ng.ngramx_id) ).first()
# n_y = ( session.query(Ngram).filter(Ngram.id==ng.ngramy_id) ).first()
# G.add_edge( str(ng.ngramx_id)+" "+n_x.terms , str(ng.ngramy_id)+" "+n_y.terms )
# DG.add_edge( str(ng.ngramx_id)+" "+n_x.terms , str(ng.ngramy_id)+" "+n_y.terms )
# # group = dict(list())
# sinonims_cliques = nx.find_cliques( G )
# # for nn in ngrams_ngrams.all():
# # group[nn.ngramx_id] = group.get(nn.ngramx_id, []) + [nn.ngramy_id]
# groups = { "nodes": {} , "links": {} }
# for clique in sinonims_cliques:
# max_deg = -1
# mainNode = -1
# mainNode_sinonims = []
# for node in clique:
# groups["nodes"][node] = False
# node_outdeg = DG.out_degree(node)
# if node_outdeg>max_deg:
# max_deg = node_outdeg
# mainNode = node
# for node in clique:
# if mainNode!=node:
# mainNode_sinonims.append( node )
# groups["links"][ mainNode ] = mainNode_sinonims
# import pprint
# print("GDict:")
# pprint.pprint( GDict )
# print("")
# print("NewGroups:")
# pprint.pprint( NewGroups )
# print("")
# print("Ids to delete:")
# pprint.pprint( Rels_2_delete )
# print("")
# print('groups["links"]:')
# pprint.pprint( groups["links"] )
# print("")
return JsonHttpResponse(True, 201)
class Keep(APIView):
"""
Actions on one existing Ngram in one list
"""
renderer_classes = (JSONRenderer,)
authentication_classes = (SessionAuthentication, BasicAuthentication)
def get (self, request, corpus_id):
# implicit global session
# list_id = session.query(Node).filter(Node.id==list_id).first()
corpus = session.query(Node).filter( Node.id==corpus_id ).first()
node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus )
nodes_in_map = session.query(NodeNgram).filter(NodeNgram.node_id==node_mapList.id ).all()
results = {}
for node in nodes_in_map:
results[node.ngram_id] = True
return JsonHttpResponse(results)
def put (self, request, corpus_id):
"""
Add ngrams to map list
"""
# implicit global session
group_rawreq = dict(request.data)
ngram_2add = [int(i) for i in list(group_rawreq.keys())]
corpus = session.query(Node).filter( Node.id==corpus_id ).first()
node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus )
for ngram_id in ngram_2add:
map_node = Node_Ngram( weight=1.0, ngram_id=ngram_id , node_id=node_mapList.id)
session.add(map_node)
session.commit()
return JsonHttpResponse(True, 201)
def delete (self, request, corpus_id):
"""
Delete ngrams from the map list
"""
# implicit global session
group_rawreq = dict(request.data)
# print("group_rawreq:")
# print(group_rawreq)
from django.utils.html import escape
ngram_2del = [int(i) for i in list(group_rawreq.keys())]
corpus = session.query(Node).filter( Node.id==corpus_id ).first()
node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus )
ngram_2del = session.query(NodeNgram).filter(NodeNgram.node_id==node_mapList.id , NodeNgram.ngram_id.in_(ngram_2del) ).all()
for map_node in ngram_2del:
try:
session.delete(map_node)
session.commit()
except:
pass
return JsonHttpResponse(True, 201)
from django.conf.urls import patterns, url
from gargantext_web import views_optimized
from rest_v1_0 import api, ngrams, graph
from annotations import views
import tests.ngramstable.views as samtest
urlpatterns = patterns('',
# REST URLS
# What is REST ?
# https://en.wikipedia.org/wiki/Representational_state_transfer
#url(r'^api$', rest_v1_0.api.Root), # = ?
url(r'nodes$', api.NodesList.as_view()),
url(r'nodes/(\d+)$', api.Nodes.as_view()),
url(r'nodes/(\d+)/children/ngrams$', api.NodesChildrenNgrams.as_view()), # => repeated children ?
url(r'nodes/(\d+)/children/ids$', api.NodesChildrenNgramsIds.as_view()), # => repeated children ?
# NGRAMS table & annotations
url(r'node/(\d+)/ngrams$' , ngrams.Ngrams.as_view()),
url(r'node/(\d+)/ngrams/group$', ngrams.Group.as_view()),
url(r'node/(\d+)/ngrams/keep$', ngrams.Keep.as_view()),
# url(r'node/(?P<list_id>[0-9]+)/ngrams/keep/(?P<ngram_ids>[0-9,\+]+)+$' , ngrams.Keep.as_view()),
url(r'node/(?P<list_id>[0-9]+)/ngrams/(?P<ngram_ids>[0-9,\+]+)+$', views.NgramEdit.as_view()),
url(r'node/(?P<corpus_id>[0-9]+)/ngrams/list/(?P<list_name>\w+)$' , ngrams.List.as_view()),
url(r'node/corpus/(?P<node_ids>[0-9,\+]+)+$' , samtest.get_corpuses),
#url(r'nodes/(\d+)/children/hyperdata$', api.NodesChildrenMetatadata.as_view()),
#url(r'nodes/(\d+)/children/hyperdata$', api.NodesChildrenMetatadata.as_view()),
url(r'nodes/(\d+)/children/queries$', api.NodesChildrenQueries.as_view()),
url(r'nodes/(\d+)/children/duplicates$', api.NodesChildrenDuplicates.as_view()),
# url(r'^api/nodes/(\d+)/children/duplicates/delete$', api.NodesChildrenDuplicates.delete ),
url(r'nodes/(\d+)/ngrams$', api.CorpusController.ngrams),
url(r'nodes/(\d+)/graph$', graph.Graph.as_view()),
url(r'corpus/(\d+)/graph$', graph.Graph.as_view()),
url(r'hyperdata$', api.ApiHyperdata.as_view()),
url(r'ngrams$', api.ApiNgrams.as_view()),
url(r'tfidf/(\d+)/(\w+)$', views_optimized.tfidf),
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment