Commit 4362b85b authored by delanoe's avatar delanoe

[FEAT ANALYTICS HISTORIES] Update to database change + debug + CSS.

parent c5594508
......@@ -67,7 +67,6 @@ INDEXED_HYPERDATA = {
, 'convert_from_db': str
},
'authors':
{ 'id' : 4
, 'type' : str
......
from gargantext.util.http import ValidationException, APIView \
, get_parameters, JsonHttpResponse, Http404\
, HttpResponse
from gargantext.util.db import session, delete, func, bulk_insert
from gargantext.models import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNode, NodeHyperdata, HyperdataKey
from gargantext.constants import INDEXED_HYPERDATA
from django.core.exceptions import PermissionDenied, SuspiciousOperation
from sqlalchemy import or_, not_
from sqlalchemy.sql import func
from sqlalchemy.orm import aliased
import datetime
import collections
from gargantext.util.db import *
from gargantext.util.validation import validate
from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework.exceptions import APIException as _APIException
def DebugHttpResponse(data):
return HttpResponse('<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), ))
import json
class JSONEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()[:19] + 'Z'
else:
return super(self.__class__, self).default(obj)
json_encoder = JSONEncoder(indent=4)
def JsonHttpResponse(data, status=200):
return HttpResponse(
content = json_encoder.encode(data),
content_type = 'application/json; charset=utf-8',
status = status
)
Http400 = SuspiciousOperation
Http403 = PermissionDenied
import csv
def CsvHttpResponse(data, headers=None, status=200):
response = HttpResponse(
content_type = "text/csv",
status = status
)
writer = csv.writer(response, delimiter=',')
if headers:
writer.writerow(headers)
for row in data:
writer.writerow(row)
return response
class APIException(_APIException):
def __init__(self, message, code=500):
self.status_code = code
self.detail = message
class NodeNgramsQueries(APIView):
_resolutions = {
'second': lambda d: d + datetime.timedelta(seconds=1),
'minute': lambda d: d + datetime.timedelta(minutes=1),
'hour': lambda d: d + datetime.timedelta(hours=1),
'day': lambda d: d + datetime.timedelta(days=1),
'week': lambda d: d + datetime.timedelta(days=7),
'month': lambda d: (d + datetime.timedelta(days=32)).replace(day=1),
'year': lambda d: (d + datetime.timedelta(days=367)).replace(day=1, month=1),
'decade': lambda d: (d + datetime.timedelta(days=3660)).replace(day=1, month=1),
'century': lambda d: (d + datetime.timedelta(days=36600)).replace(day=1, month=1),
}
_operators = {
'=': lambda field, value: (field == value),
'!=': lambda field, value: (field != value),
'<': lambda field, value: (field < value),
'>': lambda field, value: (field > value),
'<=': lambda field, value: (field <= value),
'>=': lambda field, value: (field >= value),
'in': lambda field, value: (or_(*tuple(field == x for x in value))),
'contains': lambda field, value: (field.contains(value)),
'doesnotcontain': lambda field, value: (not_(field.contains(value))),
'startswith': lambda field, value: (field.startswith(value)),
'endswith': lambda field, value: (field.endswith(value)),
}
_converters = {
'float': float,
'int': int,
'datetime': lambda x: x + '2000-01-01 00:00:00Z'[len(x):],
'text': str,
'string': str,
}
def post(self, request, project_id):
# example only
input = request.data or {
'x': {
'with_empty': True,
'resolution': 'decade',
'value': 'publication_date',
},
'y': {
# 'divided_by': 'total_ngrams_count',
# 'divided_by': 'total_documents_count',
},
'filter': {
# 'ngrams': ['bees', 'bee', 'honeybee', 'honeybees', 'honey bee', 'honey bees'],
# 'ngrams': ['insecticide', 'pesticide'],
# 'corpora': [52633],
# 'date': {'min': '1995-12-31'}
},
# 'format': 'csv',
}
# input validation
input = validate(input, {'type': dict, 'default': {}, 'items': {
'x': {'type': dict, 'default': {}, 'items': {
# which hyperdata to choose for the date
'value': {'type': str, 'default': 'publication_date', 'range': {'publication_date', }},
# time resolution
'resolution': {'type': str, 'range': self._resolutions.keys(), 'default': 'month'},
# should we add zeroes for empty values?
'with_empty': {'type': bool, 'default': False},
}},
'y': {'type': dict, 'default': {}, 'items': {
# mesured value
'value': {'type': str, 'default': 'ngrams_count', 'range': {'ngrams_count', 'documents_count', 'ngrams_tfidf'}},
# value by which we should normalize
'divided_by': {'type': str, 'range': {'total_documents_count', 'documents_count', 'total_ngrams_count'}},
}},
# filtering
'filter': {'type': dict, 'default': {}, 'items': {
# filter by metadata
'hyperdata': {'type': list, 'default': [], 'items': {'type': dict, 'items': {
'key': {'type': str, 'range': self._operators.keys()},
'operator': {'type': str},
'value': {'type': str},
}}},
# filter by date
'date': {'type': dict, 'items': {
'min': {'type': datetime.datetime},
'max': {'type': datetime.datetime},
}, 'default': {}},
# filter by corpora
'corpora' : {'type': list, 'default': [], 'items': {'type': int}},
# filter by ngrams
'ngrams' : {'type': list, 'default': [], 'items': {'type': str}},
}},
# output format
'format': {'type': str, 'default': 'json', 'range': {'json', 'csv'}},
}})
# build query: prepare columns
X = aliased(NodeHyperdata)
column_x = func.date_trunc(input['x']['resolution'], X.value_utc)
column_y = {
'documents_count': func.count(Node.id.distinct()),
'ngrams_count': func.sum(NodeNgram.weight),
# 'ngrams_tfidf': func.sum(NodeNodeNgram.weight),
}[input['y']['value']]
# build query: base
print(input)
query_base = (session
.query(column_x)
.select_from(Node)
.join(NodeNgram , NodeNgram.node_id == Node.id)
.join(X , X.node_id == NodeNgram.node_id)
#.filter(X.key == input['x']['value'])
.group_by(column_x)
.order_by(column_x)
)
# build query: base, filter by corpora or project
if 'corpora' in input['filter'] and input['filter']['corpora']:
query_base = (query_base
.filter(Node.parent_id.in_(input['filter']['corpora']))
)
else:
ParentNode = aliased(Node)
query_base = (query_base
.join(ParentNode, ParentNode.id == Node.parent_id)
.filter(ParentNode.parent_id == project_id)
)
# build query: base, filter by date
if 'date' in input['filter']:
if 'min' in input['filter']['date']:
query_base = query_base.filter(X.value >= input['filter']['date']['min'])
if 'max' in input['filter']['date']:
query_base = query_base.filter(X.value <= input['filter']['date']['max'])
# build query: filter by ngrams
query_result = query_base.add_columns(column_y)
if 'ngrams' in input['filter'] and input['filter']['ngrams']:
query_result = (query_result
.join(Ngram, Ngram.id == NodeNgram.ngram_id)
.filter(Ngram.terms.in_(input['filter']['ngrams']))
)
# build query: filter by metadata
if 'hyperdata' in input['filter']:
for h, hyperdata in enumerate(input['filter']['hyperdata']):
print(h,hyperdata)
# get hyperdata in database
#if hyperdata_model is None:
# continue
#hyperdata_id, hyperdata_type = hyperdata_model
# create alias and query it
operator = self._operators[hyperdata['operator']]
type_string = type2string(INDEXED_HYPERDATA[hyperdata['key']]['type'])
value = self._converters[type_string](hyperdata['value'])
query_result = (query_result
.join(NodeHyperdata , NodeHyperdata.node_id == NodeNgram.node_id)
.filter(NodeHyperdata.key == hyperdata['key'])
.filter(operator(NodeHyperdata.value, value))
)
# build result: prepare data
date_value_list = query_result.all()
#print(date_value_list)
if date_value_list:
date_min = date_value_list[0][0].replace(tzinfo=None)
date_max = date_value_list[-2][0].replace(tzinfo=None)
# build result: prepare interval
result = collections.OrderedDict()
if input['x']['with_empty'] and date_value_list:
compute_next_date = self._resolutions[input['x']['resolution']]
date = date_min
while date <= date_max:
result[date] = 0.0
date = compute_next_date(date)
# build result: integrate
for date, value in date_value_list[0:-1]:
result[date.replace(tzinfo=None)] = value
# build result: normalize
query_normalize = None
if date_value_list and 'divided_by' in input['y'] and input['y']['divided_by']:
if input['y']['divided_by'] == 'total_documents_count':
query_normalize = query_base.add_column(func.count(Node.id.distinct()))
elif input['y']['divided_by'] == 'total_ngrams_count':
query_normalize = query_base.add_column(func.sum(NodeNgram.weight))
if query_normalize is not None:
for date, value in query_normalize[0:-1]:
date = date.replace(tzinfo=None)
if date in result:
result[date] /= value
# return result with proper formatting
if input['format'] == 'json':
return JsonHttpResponse({
'query': input,
'result': sorted(result.items()),
}, 201)
elif input['format'] == 'csv':
return CsvHttpResponse(sorted(result.items()), ('date', 'value'), 201)
class ApiNgrams(APIView):
def get(self, request):
# parameters retrieval and validation
startwith = request.GET.get('startwith', '').replace("'", "\\'")
# query ngrams
ParentNode = aliased(Node)
ngrams_query = (session
.query(Ngram.terms, func.sum(NodeNgram.weight).label('count'))
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.join(Node, Node.id == NodeNgram.node_id)
.group_by(Ngram.terms)
# .group_by(Ngram)
.order_by(func.sum(NodeNgram.weight).desc(), Ngram.terms)
)
# filters
if 'startwith' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.startswith(request.GET['startwith']))
if 'contain' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.contains(request.GET['contain']))
if 'corpus_id' in request.GET:
corpus_id_list = list(map(int, request.GET.get('corpus_id', '').split(',')))
if corpus_id_list and corpus_id_list[0]:
ngrams_query = ngrams_query.filter(Node.parent_id.in_(corpus_id_list))
# pagination
offset = int(request.GET.get('offset', 0))
limit = int(request.GET.get('limit', 20))
total = ngrams_query.count()
# return formatted result
return JsonHttpResponse({
'pagination': {
'offset': offset,
'limit': limit,
'total': total,
},
'data': [
{
'terms': ngram.terms,
'count': ngram.count,
}
for ngram in ngrams_query[offset : offset+limit]
],
})
_operators_dict = {
"=": lambda field, value: (field == value),
"!=": lambda field, value: (field != value),
"<": lambda field, value: (field < value),
">": lambda field, value: (field > value),
"<=": lambda field, value: (field <= value),
">=": lambda field, value: (field >= value),
"in": lambda field, value: (or_(*tuple(field == x for x in value))),
"contains": lambda field, value: (field.contains(value)),
"doesnotcontain": lambda field, value: (not_(field.contains(value))),
"startswith": lambda field, value: (field.startswith(value)),
}
od = collections.OrderedDict(sorted(INDEXED_HYPERDATA.items()))
_hyperdata_list = [ { key : value }
for key, value in od.items()
if key != 'abstract'
]
def type2string(given_type):
if given_type == int:
return "integer"
elif given_type == str:
return "string"
elif given_type == datetime.datetime:
return "datetime"
def get_metadata(corpus_id_list):
# query hyperdata keys
ParentNode = aliased(Node)
hyperdata_query = (session
.query(NodeHyperdata.key)
.join(Node, Node.id == NodeHyperdata.node_id)
.filter(Node.parent_id.in_(corpus_id_list))
.group_by(NodeHyperdata.key)
)
# build a collection with the hyperdata keys
collection = []
for hyperdata in INDEXED_HYPERDATA.keys():
valuesCount = 0
values = None
# count values and determine their span
values_count = None
values_from = None
values_to = None
if hyperdata != 'text':
node_hyperdata_query = (session
.query(NodeHyperdata.key)
.join(Node, Node.id == NodeHyperdata.node_id)
.filter(Node.parent_id.in_(corpus_id_list))
.filter(NodeHyperdata.key == hyperdata)
.group_by(NodeHyperdata.key)
.order_by(NodeHyperdata.key)
)
values_count = node_hyperdata_query.count()
# values_count, values_from, values_to = node_hyperdata_query.first()
# if there is less than 32 values, retrieve them
values = None
if isinstance(values_count, int) and values_count <= 48:
if hyperdata == 'datetime':
values = [row.isoformat() for row in node_hyperdata_query.all()]
else:
values = [row for row in node_hyperdata_query.all()]
# adding this hyperdata to the collection
collection.append({
'key': str(hyperdata),
'type': type2string(INDEXED_HYPERDATA[hyperdata]['type']),
'values': values,
'valuesFrom': values_from,
'valuesTo': values_to,
'valuesCount': values_count,
})
# give the result back
return collection
class ApiHyperdata(APIView):
def get(self, request):
corpus_id_list = list(map(int, request.GET['corpus_id'].split(',')))
return JsonHttpResponse({
'data': get_metadata(corpus_id_list),
})
......@@ -2,28 +2,34 @@ from django.conf.urls import url
from . import nodes
from . import ngramlists
from . import analytics
urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view())
, url(r'^nodes/(\d+)$' , nodes.NodeResource.as_view() )
, url(r'^nodes/(\d+)/having$' , nodes.NodeListHaving.as_view() )
urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view() )
, url(r'^nodes/(\d+)$' , nodes.NodeResource.as_view() )
, url(r'^nodes/(\d+)/having$' , nodes.NodeListHaving.as_view() )
# Analytics
, url(r'^nodes/(\d+)/histories$', analytics.NodeNgramsQueries.as_view())
, url(r'^ngrams/$' , analytics.ApiNgrams.as_view() )
, url(r'hyperdata$' , analytics.ApiHyperdata.as_view() )
# get a list of ngram_ids or ngram_infos by list_id
# url(r'^ngramlists/(\d+)$', ngramlists.List.as_view()),
, url(r'^nodes/(\d+)/facets$' , nodes.CorpusFacet.as_view() )
, url(r'^nodes/(\d+)/favorites$', nodes.CorpusFavorites.as_view() )
, url(r'^nodes/(\d+)/facets$' , nodes.CorpusFacet.as_view() )
, url(r'^nodes/(\d+)/favorites$', nodes.CorpusFavorites.as_view() )
# in these two routes the node is supposed to be a *corpus* node
, url(r'^ngramlists/change$', ngramlists.ListChange.as_view() )
, url(r'^ngramlists/change$', ngramlists.ListChange.as_view() )
# add or remove ngram from a list
# ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2
# rm <=> DEL ngramlists/change?list=42&ngrams=1,2
, url(r'^ngramlists/groups$', ngramlists.GroupChange.as_view())
, url(r'^ngramlists/groups$', ngramlists.GroupChange.as_view() )
# modify grouping couples of a group node
# ex: POST ngramlists/groups?node=43
# post data looks like : {"767":[209,640],"779":[436,265,385]}"
, url(r'^ngramlists/family$' , ngramlists.ListFamily.as_view())
, url(r'^ngramlists/family$' , ngramlists.ListFamily.as_view() )
# entire combination of lists from a corpus, dedicated to termtable
# (or any combination of lists that go together :
# - a mainlist
......@@ -31,6 +37,6 @@ urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view()
# - an optional maplist
# - an optional grouplist
, url(r'^ngramlists/maplist$' , ngramlists.MapListGlance.as_view())
, url(r'^ngramlists/maplist$' , ngramlists.MapListGlance.as_view() )
# fast access to maplist, similarly formatted for termtable
]
......@@ -43,12 +43,6 @@ def docs_by_titles(request, project_id, corpus_id):
},
)
@requires_auth
def chart(request, project_id, corpus_id):
authorized, user, project, corpus = _get_user_project_corpus(request, project_id, corpus_id)
@requires_auth
def docs_by_journals(request, project_id, corpus_id):
'''
......@@ -76,3 +70,25 @@ def docs_by_journals(request, project_id, corpus_id):
},
)
@requires_auth
def analytics(request, project_id, corpus_id):
authorized, user, project, corpus = _get_user_project_corpus(request, project_id, corpus_id)
if not authorized:
return HttpResponseForbidden()
# response!
return render(
template_name = 'pages/analytics/histories.html',
request = request,
context = {
'debug': DEBUG,
'date': datetime.now(),
'project': project,
'corpus': corpus,
'resourcename' : resourcename(corpus),
'view': 'analytics',
'user': request.user
},
)
......@@ -22,12 +22,13 @@ urlpatterns = [
# corpora
url(r'^projects/(\d+)/corpora/(\d+)/?$', corpora.docs_by_titles),
url(r'^projects/(\d+)/corpora/(\d+)/chart/?$', corpora.chart),
# corpus by journals
url(r'^projects/(\d+)/corpora/(\d+)/journals/?$', corpora.docs_by_journals),
# terms table for the corpus
url(r'^projects/(\d+)/corpora/(\d+)/terms/?$', terms.ngramtable),
# Analytics
url(r'^projects/(\d+)/corpora/(\d+)/analytics/?$', corpora.analytics),
]
from django.http import HttpResponse, Http404
from django.core.exceptions import PermissionDenied, SuspiciousOperation
from django.core.exceptions import ValidationError
from django.core.urlresolvers import reverse
from sqlalchemy import text, distinct, or_,not_
from sqlalchemy.sql import func, desc
from sqlalchemy.orm import aliased
import datetime
import copy
from gargantext_web.views import move_to_trash
from gargantext_web.db import cache, Node, NodeNgram, NodeNgramNgram, NodeNodeNgram, Ngram, Hyperdata, Node_Ngram\
, NodeType, Node_Hyperdata
from gargantext_web.views import session
from gargantext_web.validation import validate, ValidationException
from node import models
def DebugHttpResponse(data):
return HttpResponse('<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), ))
import json
class JSONEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()[:19] + 'Z'
else:
return super(self.__class__, self).default(obj)
json_encoder = JSONEncoder(indent=4)
def JsonHttpResponse(data, status=200):
return HttpResponse(
content = json_encoder.encode(data),
content_type = 'application/json; charset=utf-8',
status = status
)
Http400 = SuspiciousOperation
Http403 = PermissionDenied
import csv
def CsvHttpResponse(data, headers=None, status=200):
response = HttpResponse(
content_type = "text/csv",
status = status
)
writer = csv.writer(response, delimiter=',')
if headers:
writer.writerow(headers)
for row in data:
writer.writerow(row)
return response
_ngrams_order_columns = {
"frequency" : "-count",
"alphabetical" : "terms"
}
from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework.exceptions import APIException as _APIException
class APIException(_APIException):
def __init__(self, message, code=500):
self.status_code = code
self.detail = message
_operators_dict = {
"=": lambda field, value: (field == value),
"!=": lambda field, value: (field != value),
"<": lambda field, value: (field < value),
">": lambda field, value: (field > value),
"<=": lambda field, value: (field <= value),
">=": lambda field, value: (field >= value),
"in": lambda field, value: (or_(*tuple(field == x for x in value))),
"contains": lambda field, value: (field.contains(value)),
"doesnotcontain": lambda field, value: (not_(field.contains(value))),
"startswith": lambda field, value: (field.startswith(value)),
}
_hyperdata_list = [
hyperdata
for hyperdata in session.query(Hyperdata).order_by(Hyperdata.name)
]
_hyperdata_dict = {
hyperdata.name: hyperdata
for hyperdata in _hyperdata_list
}
from rest_framework.decorators import api_view
@api_view(('GET',))
def Root(request, format=None):
return Response({
'users': reverse('user-list', request=request, format=format),
'snippets': reverse('snippet-list', request=request, format=format)
})
class NodesChildrenNgrams(APIView):
def get(self, request, node_id):
# query ngrams
ParentNode = aliased(Node)
ngrams_query = (session
.query(Ngram.terms, func.sum(Node_Ngram.weight).label('count'))
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
.join(Node, Node.id == Node_Ngram.node_id)
.filter(Node.parent_id == node_id)
.group_by(Ngram.terms)
# .group_by(Ngram)
.order_by(func.sum(Node_Ngram.weight).desc(), Ngram.terms)
)
# filters
if 'startwith' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.startswith(request.GET['startwith']))
if 'contain' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.contains(request.GET['contain']))
#if 'doesnotcontain' in request.GET:
# ngrams_query = ngrams_query.filter(not_(Ngram.terms.contains(request.GET['doesnotcontain'])))
# pagination
offset = int(request.GET.get('offset', 0))
limit = int(request.GET.get('limit', 20))
total = ngrams_query.count()
# return formatted result
return JsonHttpResponse({
'pagination': {
'offset': offset,
'limit': limit,
'total': total,
},
'data': [
{
# 'id': ngram.id,
'terms': ngram.terms,
'count': ngram.count,
}
for ngram in ngrams_query[offset : offset+limit]
],
})
class NodesChildrenNgramsIds(APIView):
def get(self, request, node_id):
# query ngrams
ParentNode = aliased(Node)
ngrams_query = (session
.query(Node.id, func.sum(Node_Ngram.weight).label('count'))
.join(Node_Ngram, Node_Ngram.node_id == Node.id)
.join(Ngram, Ngram.id == Node_Ngram.ngram_id)
.filter(Node.parent_id == node_id)
.filter(Node.type_id == cache.NodeType['Document'].id)
.group_by(Node.id)
# .group_by(Ngram)
.order_by(func.sum(Node_Ngram.weight).desc())
)
# filters
if 'startwith' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.startswith(request.GET['startwith']))
if 'contain' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.contains(request.GET['contain']))
#if 'doesnotcontain' in request.GET:
# ngrams_query = ngrams_query.filter(not_(Ngram.terms.contains(request.GET['doesnotcontain'])))
# pagination
offset = int(request.GET.get('offset', 0))
limit = int(request.GET.get('limit', 20))
total = ngrams_query.count()
# return formatted result
return JsonHttpResponse({
'pagination': {
'offset': offset,
'limit': limit,
'total': total,
},
'data': [
{
'id': node,
'count': count
}
for node, count in ngrams_query[offset : offset+limit]
],
})
from gargantext_web.db import get_or_create_node
class Ngrams(APIView):
def get(self, request, node_id):
# query ngrams
ParentNode = aliased(Node)
corpus = session.query(Node).filter(Node.id==node_id).first()
group_by = []
results = ['id', 'terms']
ngrams_query = (session
.query(Ngram.id, Ngram.terms)
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
.join(Node, Node.id == Node_Ngram.node_id)
)
# get the scores
if 'tfidf' in request.GET['score']:
Tfidf = aliased(NodeNodeNgram)
tfidf_id = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus).id
ngrams_query = (ngrams_query.add_column(Tfidf.score.label('tfidf'))
.join(Tfidf, Tfidf.ngram_id == Ngram.id)
.filter(Tfidf.nodex_id == tfidf_id)
)
group_by.append(Tfidf.score)
results.append('tfidf')
if 'cvalue' in request.GET['score']:
Cvalue = aliased(NodeNodeNgram)
cvalue_id = get_or_create_node(nodetype='Cvalue', corpus=corpus).id
ngrams_query = (ngrams_query.add_column(Cvalue.score.label('cvalue'))
.join(Cvalue, Cvalue.ngram_id == Ngram.id)
.filter(Cvalue.nodex_id == cvalue_id)
)
group_by.append(Cvalue.score)
results.append('cvalue')
if 'specificity' in request.GET['score']:
Spec = aliased(NodeNodeNgram)
spec_id = get_or_create_node(nodetype='Specificity', corpus=corpus).id
ngrams_query = (ngrams_query.add_column(Spec.score.label('specificity'))
.join(Spec, Spec.ngram_id == Ngram.id)
.filter(Spec.nodex_id == spec_id)
)
group_by.append(Spec.score)
results.append('specificity')
if request.GET.get('order', False) == 'cvalue':
ngrams_query = ngrams_query.order_by(desc(Cvalue.score))
elif request.GET.get('order', False) == 'tfidf':
ngrams_query = ngrams_query.order_by(desc(Tfidf.score))
elif request.GET.get('order', False) == 'specificity':
ngrams_query = ngrams_query.order_by(desc(Spec.score))
offset = int(request.GET.get('offset', 0))
limit = int(request.GET.get('limit', 20))
ngrams_query = (ngrams_query.filter(Node.parent_id == node_id)
.group_by(Ngram.id, Ngram.terms, *group_by)
)
if request.GET.get('ngram_id', False) != False:
ngram_id = int(request.GET['ngram_id'])
Group = aliased(NodeNgramNgram)
group_id = get_or_create_node(nodetype='Group', corpus=corpus).id
ngrams_query = (ngrams_query.join(Group, Group.ngramx_id == ngram_id )
.filter(Group.node_id == group_id)
.filter(Group.ngramx_id == ngram_id)
)
# filters by list type (soon list_id to factorize it in javascript)
list_query = request.GET.get('list', 'miam')
if list_query == 'miam':
Miam = aliased(NodeNgram)
miam_id = get_or_create_node(nodetype='MiamList', corpus=corpus).id
ngrams_query = (ngrams_query.join(Miam, Miam.ngram_id == Ngram.id )
.filter(Miam.node_id == miam_id)
)
elif list_query == 'stop':
Stop = aliased(NodeNgram)
stop_id = get_or_create_node(nodetype='StopList', corpus=corpus).id
ngrams_query = (ngrams_query.join(Stop, Stop.ngram_id == Ngram.id )
.filter(Stop.node_id == stop_id)
)
elif list_query == 'map':
# ngram could be in ngramx_id or ngramy_id
CoocX = aliased(NodeNgramNgram)
CoocY = aliased(NodeNgramNgram)
cooc_id = get_or_create_node(nodetype='Cooccurrence', corpus=corpus).id
ngrams_query = (ngrams_query.join(CoocX, CoocX.ngramx_id == Ngram.id )
.join(CoocY, CoocY.ngramy_id == Ngram.id)
.filter(CoocX.node_id == cooc_id)
.filter(CoocY.node_id == cooc_id)
)
total = ngrams_query.count()
# return formatted result
return JsonHttpResponse({
'pagination': {
'offset': offset,
'limit': limit,
'total': total,
},
'data': [
{
'id' : ngram.id
, 'terms' : ngram.terms
, 'tfidf' : ngram.tfidf
, 'cvalue': ngram.cvalue
} for ngram in ngrams_query[offset : offset+limit]
# TODO : dict comprehension in list comprehension :
# { x : eval('ngram.' + x) for x in results
# } for ngram in ngrams_query[offset : offset+limit]
],
})
class NodesChildrenDuplicates(APIView):
def _fetch_duplicates(self, request, node_id, extra_columns=None, min_count=1):
# input validation
if extra_columns is None:
extra_columns = []
if 'keys' not in request.GET:
raise APIException('Missing GET parameter: "keys"', 400)
keys = request.GET['keys'].split(',')
# hyperdata retrieval
hyperdata_query = (session
.query(Hyperdata)
.filter(Hyperdata.name.in_(keys))
)
# build query elements
columns = []
aliases = []
for hyperdata in hyperdata_query:
# aliases
_Hyperdata = aliased(Hyperdata)
_Node_Hyperdata = aliased(Node_Hyperdata)
aliases.append(_Node_Hyperdata)
# what shall we retrieve?
columns.append(
getattr(_Node_Hyperdata, 'value_' + hyperdata.type)
)
# build the query
groups = list(columns)
duplicates_query = (session
.query(*(extra_columns + [func.count()] + columns))
.select_from(Node)
)
for _Node_Hyperdata, hyperdata in zip(aliases, hyperdata_query):
duplicates_query = duplicates_query.outerjoin(_Node_Hyperdata, _Node_Hyperdata.node_id == Node.id)
duplicates_query = duplicates_query.filter(_Node_Hyperdata.hyperdata_id == hyperdata.id)
duplicates_query = duplicates_query.filter(Node.parent_id == node_id)
duplicates_query = duplicates_query.group_by(*columns)
duplicates_query = duplicates_query.order_by(func.count().desc())
duplicates_query = duplicates_query.having(func.count() > min_count)
# and now, return it
return duplicates_query
def get(self, request, node_id):
# data to be returned
duplicates = self._fetch_duplicates(request, node_id)
# pagination
offset = int(request.GET.get('offset', 0))
limit = int(request.GET.get('limit', 10))
total = duplicates.count()
# response building
return JsonHttpResponse({
'pagination': {
'offset': offset,
'limit': limit,
'total': total,
},
'data': [
{
'count': duplicate[0],
'values': duplicate[1:],
}
for duplicate in duplicates[offset : offset+limit]
]
})
def delete(self, request, node_id):
# get the minimum ID for each of the nodes sharing the same hyperdata
kept_node_ids_query = self._fetch_duplicates(request, node_id, [func.min(Node.id).label('id')], 0)
kept_node_ids = [kept_node.id for kept_node in kept_node_ids_query]
# TODO with new orm
duplicate_nodes = models.Node.objects.filter( parent_id=node_id ).exclude(id__in=kept_node_ids)
# # delete the stuff
# delete_query = (session
# .query(Node)
# .filter(Node.parent_id == node_id)
# .filter(~Node.id.in_(kept_node_ids))
# )
count = len(duplicate_nodes)
for node in duplicate_nodes:
print("deleting node ",node.id)
move_to_trash(node.id)
# print(delete_query)
# # delete_query.delete(synchronize_session=True)
# session.flush()
return JsonHttpResponse({
'deleted': count
})
# retrieve metadata from a given list of parent node
def get_metadata(corpus_id_list):
# query hyperdata keys
ParentNode = aliased(Node)
hyperdata_query = (session
.query(Hyperdata)
.join(Node_Hyperdata, Node_Hyperdata.hyperdata_id == Hyperdata.id)
.join(Node, Node.id == Node_Hyperdata.node_id)
.filter(Node.parent_id.in_(corpus_id_list))
.group_by(Hyperdata)
)
# build a collection with the hyperdata keys
collection = []
for hyperdata in hyperdata_query:
valuesCount = 0
values = None
# count values and determine their span
values_count = None
values_from = None
values_to = None
if hyperdata.type != 'text':
value_column = getattr(Node_Hyperdata, 'value_' + hyperdata.type)
node_hyperdata_query = (session
.query(value_column)
.join(Node, Node.id == Node_Hyperdata.node_id)
.filter(Node.parent_id.in_(corpus_id_list))
.filter(Node_Hyperdata.hyperdata_id == hyperdata.id)
.group_by(value_column)
.order_by(value_column)
)
values_count = node_hyperdata_query.count()
# values_count, values_from, values_to = node_hyperdata_query.first()
# if there is less than 32 values, retrieve them
values = None
if isinstance(values_count, int) and values_count <= 48:
if hyperdata.type == 'datetime':
values = [row[0].isoformat() for row in node_hyperdata_query.all()]
else:
values = [row[0] for row in node_hyperdata_query.all()]
# adding this hyperdata to the collection
collection.append({
'key': hyperdata.name,
'type': hyperdata.type,
'values': values,
'valuesFrom': values_from,
'valuesTo': values_to,
'valuesCount': values_count,
})
# give the result back
return collection
class ApiHyperdata(APIView):
def get(self, request):
corpus_id_list = list(map(int, request.GET['corpus_id'].split(',')))
return JsonHttpResponse({
'data': get_metadata(corpus_id_list),
})
# retrieve ngrams from a given list of parent node
def get_ngrams(corpus_id_list):
pass
class ApiNgrams(APIView):
def get(self, request):
# parameters retrieval and validation
startwith = request.GET.get('startwith', '').replace("'", "\\'")
# query ngrams
ParentNode = aliased(Node)
ngrams_query = (session
.query(Ngram.terms, func.sum(Node_Ngram.weight).label('count'))
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
.join(Node, Node.id == Node_Ngram.node_id)
.group_by(Ngram.terms)
# .group_by(Ngram)
.order_by(func.sum(Node_Ngram.weight).desc(), Ngram.terms)
)
# filters
if 'startwith' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.startswith(request.GET['startwith']))
if 'contain' in request.GET:
ngrams_query = ngrams_query.filter(Ngram.terms.contains(request.GET['contain']))
if 'corpus_id' in request.GET:
corpus_id_list = list(map(int, request.GET.get('corpus_id', '').split(',')))
if corpus_id_list and corpus_id_list[0]:
ngrams_query = ngrams_query.filter(Node.parent_id.in_(corpus_id_list))
# pagination
offset = int(request.GET.get('offset', 0))
limit = int(request.GET.get('limit', 20))
total = ngrams_query.count()
# return formatted result
return JsonHttpResponse({
'pagination': {
'offset': offset,
'limit': limit,
'total': total,
},
'data': [
{
'terms': ngram.terms,
'count': ngram.count,
}
for ngram in ngrams_query[offset : offset+limit]
],
})
class NodesChildrenQueries(APIView):
def _sql(self, input, node_id):
fields = dict()
tables = set('nodes')
hyperdata_aliases = dict()
# retrieve all unique fields names
fields_names = input['retrieve']['fields'].copy()
fields_names += [filter['field'] for filter in input['filters']]
fields_names += input['sort']
fields_names = set(fields_names)
# relate fields to their respective ORM counterparts
for field_name in fields_names:
field_name_parts = field_name.split('.')
field = None
if len(field_name_parts) == 1:
field = getattr(Node, field_name)
elif field_name_parts[1] == 'count':
if field_name_parts[0] == 'nodes':
field = func.count(Node.id)
elif field_name_parts[0] == 'ngrams':
field = func.count(Ngram.id)
tables.add('ngrams')
elif field_name_parts[0] == 'ngrams':
field = getattr(Ngram, field_name_parts[1])
tables.add('ngrams')
elif field_name_parts[0] == 'hyperdata':
hyperdata = _hyperdata_dict[field_name_parts[1]]
if hyperdata not in hyperdata_aliases:
hyperdata_aliases[hyperdata] = aliased(Node_Hyperdata)
hyperdata_alias = hyperdata_aliases[hyperdata]
field = getattr(hyperdata_alias, 'value_%s' % hyperdata.type)
if len(field_name_parts) == 3:
field = func.date_trunc(field_name_parts[2], field)
fields[field_name] = field
# build query: selected fields
query = (session
.query(*(fields[field_name] for field_name in input['retrieve']['fields']))
)
# build query: selected tables
query = query.select_from(Node)
if 'ngrams' in tables:
query = (query
.join(Node_Ngram, Node_Ngram.node_id == Node.id)
.join(Ngram, Ngram.id == Node_Ngram.ngram_id)
)
for hyperdata, hyperdata_alias in hyperdata_aliases.items():
query = (query
.join(hyperdata_alias, hyperdata_alias.node_id == Node.id)
.filter(hyperdata_alias.hyperdata_id == hyperdata.id)
)
# build query: filtering
query = (query
.filter(Node.parent_id == node_id)
)
for filter in input['filters']:
query = (query
.filter(_operators_dict[filter['operator']](
fields[filter['field']],
filter['value']
))
)
# build query: aggregations
if input['retrieve']['aggregate']:
for field_name in input['retrieve']['fields']:
if not field_name.endswith('.count'):
query = query.group_by(fields[field_name])
# build query: sorting
for field_name in input['sort']:
last = field_name[-1:]
if last in ('+', '-'):
field_name = field_name[:-1]
if last == '-':
query = query.order_by(fields[field_name].desc())
else:
query = query.order_by(fields[field_name])
# build and return result
output = copy.deepcopy(input)
output['pagination']['total'] = query.count()
output['results'] = list(
query[input['pagination']['offset']:input['pagination']['offset']+input['pagination']['limit']]
if input['pagination']['limit']
else query[input['pagination']['offset']:]
)
return output
def _haskell(self, input, node_id):
output = copy.deepcopy(input)
output['pagination']['total'] = 0
output['results'] = list()
return output
def post(self, request, node_id):
""" Query the children of the given node.
Example #1
----------
Input:
{
"pagination": {
"offset": 0,
"limit": 10
},
"retrieve": {
"type": "fields",
"list": ["name", "hyperdata.publication_date"]
},
"filters": [
{"field": "hyperdata.publication_date", "operator": ">", "value": "2010-01-01 00:00:00"},
{"field": "ngrams.terms", "operator": "in", "value": ["bee", "bees"]}
],
"sort": ["name"]
}
Output:
{
"pagination": {
"offset": 0,
"limit": 10
},
"retrieve": {
"type": "fields",
"list": ["name", "hyperdata.publication_date"]
},
"results": [
{"id": 12, "name": "A document about bees", "publication_date": "2014-12-03 10:00:00"},
...,
]
}
"""
# authorized field names
sql_fields = set({
'id', 'name',
'nodes.count',
'nodes.countnorm',
'ngrams.count',
'ngrams.terms', 'ngrams.n',
})
for hyperdata in _hyperdata_list:
sql_fields.add('hyperdata.' + hyperdata.name)
if hyperdata.type == 'datetime':
for part in ['year', 'month', 'day', 'hour', 'minute']:
sql_fields.add('hyperdata.' + hyperdata.name + '.' + part)
# authorized field names: Haskell
haskell_fields = set({
'haskell.test',
})
# authorized field names: all of them
authorized_fields = sql_fields | haskell_fields
# input validation
input = validate(request.DATA, {'type': dict, 'items': {
'pagination': {'type': dict, 'items': {
'limit': {'type': int, 'default': 0},
'offset': {'type': int, 'default': 0},
}, 'default': {'limit': 0, 'offset': 0}},
'filters': {'type': list, 'items': {'type': dict, 'items': {
'field': {'type': str, 'required': True, 'range': authorized_fields},
'operator': {'type': str, 'required': True, 'range': list(_operators_dict.keys())},
'value': {'required': True},
}}, 'default': list()},
'retrieve': {'type': dict, 'required': True, 'items': {
'aggregate': {'type': bool, 'default': False},
'fields': {'type': list, 'items': {'type': str, 'range': authorized_fields}, 'range': (1, )},
}},
'sort': {'type': list, 'items': {'type': str}, 'default': list()},
}})
# return result, depending on the queried fields
if set(input['retrieve']['fields']) <= sql_fields:
method = self._sql
elif set(input['retrieve']['fields']) <= haskell_fields:
method = self._haskell
else:
raise ValidationException('queried fields are mixing incompatible types of fields')
return JsonHttpResponse(method(input, node_id), 201)
class NodesList(APIView):
authentication_classes = (SessionAuthentication, BasicAuthentication)
def get(self, request):
print("user id : " + str(request.user))
query = (session
.query(Node.id, Node.name, NodeType.name.label('type'))
.filter(Node.user_id == int(request.user.id))
.join(NodeType)
)
if 'type' in request.GET:
query = query.filter(NodeType.name == request.GET['type'])
if 'parent' in request.GET:
query = query.filter(Node.parent_id == int(request.GET['parent']))
return JsonHttpResponse({'data': [
node._asdict()
for node in query.all()
]})
class Nodes(APIView):
def get(self, request, node_id):
node = session.query(Node).filter(Node.id == node_id).first()
if node is None:
raise APIException('This node does not exist', 404)
return JsonHttpResponse({
'id': node.id,
'name': node.name,
'parent_id': node.parent_id,
'type': cache.NodeType[node.type_id].name,
# 'type': node.type__name,
#'hyperdata': dict(node.hyperdata),
'hyperdata': node.hyperdata,
})
# deleting node by id
# currently, very dangerous.
# it should take the subnodes into account as well,
# for better constistency...
def delete(self, request, node_id):
user = request.user
node = session.query(Node).filter(Node.id == node_id).first()
msgres = str()
try:
move_to_trash(node_id)
msgres = node_id+" moved to Trash"
except Exception as error:
msgres ="error deleting : " + node_id + str(error)
class CorpusController:
@classmethod
def get(cls, corpus_id):
try:
corpus_id = int(corpus_id)
except:
raise ValidationError('Corpora are identified by an integer.', 400)
corpusQuery = session.query(Node).filter(Node.id == corpus_id).first()
# print(str(corpusQuery))
# raise Http404("404 error.")
if not corpusQuery:
raise Http404("No such corpus: %d" % (corpus_id, ))
corpus = corpusQuery.first()
if corpus.type.name != 'Corpus':
raise Http404("No such corpus: %d" % (corpus_id, ))
# if corpus.user != request.user:
# raise Http403("Unauthorized access.")
return corpus
@classmethod
def ngrams(cls, request, node_id):
# parameters retrieval and validation
startwith = request.GET.get('startwith', '').replace("'", "\\'")
# build query
ParentNode = aliased(Node)
query = (session
.query(Ngram.terms, func.count('*'))
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
.join(Node, Node.id == Node_Ngram.node_id)
.join(ParentNode, ParentNode.id == Node.parent_id)
.filter(ParentNode.id == node_id)
.filter(Ngram.terms.like('%s%%' % (startwith, )))
.group_by(Ngram.terms)
.order_by(func.count('*').desc())
)
# response building
format = request.GET.get('format', 'json')
if format == 'json':
return JsonHttpResponse({
"data": [{
'terms': row[0],
'occurrences': row[1]
} for row in query.all()],
})
elif format == 'csv':
return CsvHttpResponse(
[['terms', 'occurences']] + [row for row in query.all()]
)
else:
raise ValidationError('Unrecognized "format=%s", should be "csv" or "json"' % (format, ))
from django.http import HttpResponse, Http404
from django.core.exceptions import PermissionDenied, SuspiciousOperation
from django.core.exceptions import ValidationError
from django.core.urlresolvers import reverse
from django.contrib.auth.decorators import login_required
from sqlalchemy import text, distinct, or_,not_
from sqlalchemy.sql import func, desc
from sqlalchemy.orm import aliased
import datetime
import copy
import json
from gargantext_web.db import cache
from gargantext_web.validation import validate, ValidationException
from gargantext_web.db import session,get_session, Node, NodeNgram, NodeNgramNgram, NodeNodeNgram, Ngram, Hyperdata, Node_Ngram, get_or_create_node
def DebugHttpResponse(data):
return HttpResponse('<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), ))
import time
import json
class JSONEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()[:19] + 'Z'
else:
return super(self.__class__, self).default(obj)
json_encoder = JSONEncoder(indent=4)
def JsonHttpResponse(data, status=200):
return HttpResponse(
content = json_encoder.encode(data),
content_type = 'application/json; charset=utf-8',
status = status
)
Http400 = SuspiciousOperation
Http403 = PermissionDenied
import csv
def CsvHttpResponse(data, headers=None, status=200):
response = HttpResponse(
content_type = "text/csv",
status = status
)
writer = csv.writer(response, delimiter=',')
if headers:
writer.writerow(headers)
for row in data:
writer.writerow(row)
return response
_ngrams_order_columns = {
"frequency" : "-count",
"alphabetical" : "terms"
}
from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework.renderers import JSONRenderer
from rest_framework.exceptions import APIException as _APIException
class APIException(_APIException):
def __init__(self, message, code=500):
self.status_code = code
self.detail = message
from rest_framework.decorators import api_view
#@login_required
# TODO how to secure REST ?
class List(APIView):
def get_metadata ( self , ngram_ids , parent_id ):
# implicit global session
start_ = time.time()
nodes_ngrams = session.query(Ngram.id , Ngram.terms).filter( Ngram.id.in_( list(ngram_ids.keys()))).all()
for node in nodes_ngrams:
if node.id in ngram_ids:
ngram_ids[node.id] = {
"id": node.id,
"name": node.terms,
"scores": {
"tfidf": 0,
"occs":0
}
}
# occ_list = get_or_create_node(nodetype='Occurrences', corpus_id=parent_id).id
# print( occ_list )
try:
tfidf_list = get_or_create_node(nodetype='Tfidf (global)', corpus_id=parent_id).id
ngram_tfidf = session.query(NodeNodeNgram.ngram_id,NodeNodeNgram.score).filter( NodeNodeNgram.nodex_id==tfidf_list , NodeNodeNgram.ngram_id.in_( list(ngram_ids.keys()) )).all()
for n in ngram_tfidf:
if n.ngram_id in ngram_ids:
ngram_ids[n.ngram_id]["scores"]["tfidf"] += n.score
except:
pass
try:
occ_list = get_or_create_node(nodetype='Occurrences', corpus_id=parent_id).id
ngram_occs = session.query(NodeNodeNgram.ngram_id,NodeNodeNgram.score).filter( NodeNodeNgram.nodex_id==occ_list , NodeNodeNgram.ngram_id.in_( list(ngram_ids.keys()) )).all()
for n in ngram_occs:
if n.ngram_id in ngram_ids:
ngram_ids[n.ngram_id]["scores"]["occs"] += round(n.score)
except:
pass
end_ = time.time()
return { "data":ngram_ids , "secs":(end_ - start_) }
def get(self, request, corpus_id , list_name ):
if not request.user.is_authenticated():
return JsonHttpResponse( {"request" : "forbidden"} )
# implicit global session
corpus = session.query(Node).filter( Node.id==corpus_id ).first()
# if corpus==None:
# return JsonHttpResponse( {"request" : "forbidden"} )
start_ = time.time()
list_name = list_name.title()+"List"
node_list = get_or_create_node(nodetype=list_name, corpus=corpus )
nodes_ngrams = session.query(NodeNgram.ngram_id).filter(NodeNgram.node_id==node_list.id ).all()
ngram_ids = {}
for node in nodes_ngrams:
ngram_ids[node.ngram_id] = True
end_ = time.time()
measurements = {
"get_ngram_ids" : {
"s":(end_ - start_),
"n": len(ngram_ids.keys())
}
}
if request.GET.get('custom', False) != False:
ngrams_meta = self.get_metadata( ngram_ids , corpus_id )
ngram_ids = ngrams_meta["data"]
measurements["tfidf"] = { "s" : ngrams_meta["secs"], "n": len(ngrams_meta["data"].keys()) }
return JsonHttpResponse( {"data":ngram_ids , "time":measurements } )
class Ngrams(APIView):
'''
REST application to manage ngrams
Example :
http://localhost:8000/api/node/1444485/ngrams?format=json&score=tfidf,occs
'''
def get(self, request, node_id):
if not request.user.is_authenticated():
return JsonHttpResponse( {"request" : "forbidden"} )
# implicit global session
corpus = session.query(Node).filter( Node.id==node_id).first()
# if corpus==None:
# return JsonHttpResponse( {"request" : "forbidden"} )
start_ = time.time()
ParentNode = aliased(Node)
group_by = []
results = ['id', 'terms']
ngrams_query = (session
.query(Ngram.id, Ngram.terms)
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
.join(Node, Node.id == Node_Ngram.node_id)
)
the_score = "tfidf"
if request.GET.get('score', False) != False:
the_score = request.GET['score']
if 'occs' in the_score:
Occs = NodeNodeNgram
occs_id = get_or_create_node(nodetype='Occurrences', corpus=corpus).id
ngrams_query = (ngrams_query.add_column(Occs.score.label('occs'))
.join(Occs, Occs.ngram_id == Ngram.id)
.filter(Occs.nodex_id==occs_id)
)
group_by.append(Occs.score)
results.append('occs')
if 'tfidf' in the_score:
Tfidf = aliased(NodeNodeNgram)
tfidf_id = get_or_create_node(nodetype='Tfidf (global)', corpus=corpus).id
ngrams_query = (ngrams_query.add_column(Tfidf.score.label('tfidf'))
.join(Tfidf, Tfidf.ngram_id == Ngram.id)
.filter(Tfidf.nodex_id == tfidf_id)
)
group_by.append(Tfidf.score)
results.append('tfidf')
if 'cvalue' in the_score:
Cvalue = aliased(NodeNodeNgram)
cvalue_id = get_or_create_node(nodetype='Cvalue', corpus=corpus).id
ngrams_query = (ngrams_query.add_column(Cvalue.score.label('cvalue'))
.join(Cvalue, Cvalue.ngram_id == Ngram.id)
.filter(Cvalue.nodex_id == cvalue_id)
)
group_by.append(Cvalue.score)
results.append('cvalue')
if 'specificity' in the_score:
Spec = aliased(NodeNodeNgram)
spec_id = get_or_create_node(nodetype='Specificity', corpus=corpus).id
ngrams_query = (ngrams_query.add_column(Spec.score.label('specificity'))
.join(Spec, Spec.ngram_id == Ngram.id)
.filter(Spec.nodex_id == spec_id)
)
group_by.append(Spec.score)
results.append('specificity')
order_query = request.GET.get('order', False)
if order_query == 'occs':
ngrams_query = ngrams_query.order_by(desc(occs))
elif order_query == 'cvalue':
ngrams_query = ngrams_query.order_by(desc(Cvalue.score))
elif order_query == 'tfidf':
ngrams_query = ngrams_query.order_by(desc(Tfidf.score))
elif order_query == 'specificity':
ngrams_query = ngrams_query.order_by(desc(Spec.score))
offset = int(request.GET.get('offset', 0))
limit = int(request.GET.get('limit', 20))
ngrams_query = (ngrams_query.filter(Node.parent_id == node_id)
.group_by(Ngram.id, Ngram.terms, *group_by)
)
if request.GET.get('ngram_id', False) != False:
ngram_id = int(request.GET['ngram_id'])
Group = aliased(NodeNgramNgram)
group_id = get_or_create_node(nodetype='Group', corpus=corpus).id
ngrams_query = (ngrams_query.join(Group, Group.ngramx_id == ngram_id )
.filter(Group.node_id == group_id)
.filter(Group.ngramx_id == ngram_id)
)
list_query = request.GET.get('list', 'miam')
list_id = request.GET.get('list_id', False)
if list_query == 'miam':
Miam = aliased(NodeNgram)
miam_id = get_or_create_node(nodetype='MiamList', corpus=corpus).id
ngrams_query = (ngrams_query.join(Miam, Miam.ngram_id == Ngram.id )
.filter(Miam.node_id == miam_id)
)
elif list_query == 'stop':
Stop = aliased(NodeNgram)
stop_id = get_or_create_node(nodetype='StopList', corpus=corpus).id
ngrams_query = (ngrams_query.join(Stop, Stop.ngram_id == Ngram.id )
.filter(Stop.node_id == stop_id)
)
elif list_query == 'map':
CoocX = aliased(NodeNgramNgram)
CoocY = aliased(NodeNgramNgram)
cooc_id = get_or_create_node(nodetype='Cooccurrence', corpus=corpus).id
ngrams_query = (ngrams_query.join(CoocX, CoocX.ngramx_id == Ngram.id )
.join(CoocY, CoocY.ngramy_id == Ngram.id)
.filter(CoocX.node_id == cooc_id)
.filter(CoocY.node_id == cooc_id)
)
elif list_id != False:
list_id = int(list_id)
node = session.query(Node).filter(Node.id==node_id).first()
if node.type_id == cache.NodeType['StopList'].id or node.type_id == cache.NodeType['MiamList'].id:
List = aliased(NodeNgram)
ngrams_query = (ngrams_query.join(List, List.ngram_id == Ngram.id )
.filter(List.node_id == node.id)
)
elif node.type_id == cache.NodeType['Cooccurrence'].id:
CoocX = aliased(NodeNgramNgram)
CoocY = aliased(NodeNgramNgram)
ngrams_query = (ngrams_query.join(CoocX, CoocX.ngramx_id == Ngram.id )
.join(CoocY, CoocY.ngramy_id == Ngram.id)
.filter(CoocX.node_id == node.id)
.filter(CoocY.node_id == node.id)
)
output = []
for ngram in ngrams_query[offset : offset+limit]:
info = { "scores" : {} }
try: info["id"] = ngram.id
except: pass
try: info["name"] = ngram.terms
except: pass
try: info["scores"]["occs"] = ngram.occs
except: pass
try: info["scores"]["tfidf"] = ngram.tfidf
except: pass
try: info["scores"]["cvalue"] = ngram.cvalue
except: pass
try: info["scores"]["specificity"] = ngram.specificity
except: pass
output.append( info )
end_ = time.time()
measurements = {
"s":(end_ - start_),
"n": len(output)
}
# return formatted result
return JsonHttpResponse({
'pagination': {
'offset': offset,
'limit': limit,
'total': len(output),
},
'data': output,
"time" : measurements
})
def post(self , request , node_id ):
return JsonHttpResponse(["POST","ok"])
def put (self, request, corpus_id):
"""
Add ngrams to Miam list
"""
group_rawreq = dict(request.data)
print( "group_rawreq:" )
print( group_rawreq )
return JsonHttpResponse(["PUT","ok"])
class Group(APIView):
'''
REST API to manage groups of Ngrams
Groups can be synonyms, a cathegory or ngrams groups with stems or lems.
'''
def get_group_id(self , node_id , user_id):
node_id = int(node_id)
# implicit global session
corpus = session.query(Node).filter( Node.id==node_id).first()
if corpus==None: return None
group = get_or_create_node(corpus=corpus, nodetype='Group')
return(group.id)
def get(self, request, corpus_id):
if not request.user.is_authenticated():
return JsonHttpResponse( {"request" : "forbidden"} )
group_id = self.get_group_id(corpus_id , request.user.id)
if group_id==None:
return JsonHttpResponse( {"request" : "forbidden"} )
#api/node/$corpus_id/ngrams?ngram_id=12
# ngram_id = 1 #request.GET.get('ngram_id', False)
# ngram_id = int(node_id)
# #api/node/$corpus_id/ngrams?all=True
# all_option = request.GET.get('all', False)
# all_option = 1 #int(all_option)
# IMPORTANT: Algorithm for getting the groups:
# 1. pairs_list <- Get all pairs from get_group_id()
# 2. G <- Do a non-directed graph of pairs_list
# 3. DG <- Do a directed graph of pairs_list
# 4. cliques_list <- find_cliques of G
# 5. groups <- Iterate in sinonims_cliques and set the mainNode per each clique: take the highest max_outdegree-node of each clique, using DG
import networkx as nx
G = nx.Graph()
DG = nx.DiGraph()
# implicit global session
ngrams_ngrams = (session
.query(NodeNgramNgram)
.filter(NodeNgramNgram.node_id==group_id)
)
# ngramy_id=476996, score=1.0, node_id=75081, id=1282846, ngramx_id=493431
for ng in ngrams_ngrams:
# n_x = ( session.query(Ngram).filter(Ngram.id==ng.ngramx_id) ).first()
# n_y = ( session.query(Ngram).filter(Ngram.id==ng.ngramy_id) ).first()
G.add_edge( ng.ngramx_id , ng.ngramy_id )
DG.add_edge( ng.ngramx_id , ng.ngramy_id )
# group = dict(list())
sinonims_cliques = nx.find_cliques( G )
# for nn in ngrams_ngrams.all():
# group[nn.ngramx_id] = group.get(nn.ngramx_id, []) + [nn.ngramy_id]
groups = { "nodes": {} , "links": {} }
for clique in sinonims_cliques:
max_deg = -1
mainNode = -1
mainNode_sinonims = []
if len(clique) > 1:
for node in clique:
# just a lookup hash with *both*
# the mainNode and the subnodes
groups["nodes"][node] = False
# choosing mainNode
node_outdeg = DG.out_degree(node)
if node_outdeg>max_deg:
max_deg = node_outdeg
mainNode = node
# the links themselves main => [subs]
for node in clique:
if mainNode!=node:
mainNode_sinonims.append( node )
groups["links"][ mainNode ] = mainNode_sinonims
# for i in groups["nodes"]:
# print(i)
ngrams = [int(i) for i in list(groups["nodes"].keys())]
# groups["nodes"] = get_occtfidf( ngrams , request.user.id , corpus_id , "Group")
return JsonHttpResponse( { "data" : groups } )
def post(self, request, node_id):
return JsonHttpResponse( ["hola" , "mundo"] )
def delete(self, request, corpus_id):
# input validation
# implicit global session
input = validate(request.DATA, {'data' : {'source': int, 'target': list}})
group_id = get_group_id(corpus_id , request.user.id)
for data in input['data']:
if data['source'] > 0 and len(data['target']) > 0:
for target_id in data['target']:
(session.query(NodeNgramNgram)
.filter(NodeNgramNgram.node_id==group_id)
.filter(NodeNgramNgram.ngramx_id==data['source'])
.delete()
)
return JsonHttpResponse(True, 201)
else:
raise APIException('Missing parameter: "{\'data\' : [\'source\': Int, \'target\': [Int]}"', 400)
def put(self , request , corpus_id ):
# implicit global session
group_rawreq = dict(request.data)
GDict = []
group_new = {}
for g in group_rawreq:
gdict = []
mainform = int(g.replace("[]",""))
gdict.append(mainform)
group_new[mainform] = list(map(int, group_rawreq[g]))
for subform in group_new[mainform]:
gdict.append(subform)
GDict.append( gdict )
existing_group_id = self.get_group_id(corpus_id , request.user.id)
# implicit global session
grouped_ngrams = (session
.query(NodeNgramNgram)
.filter(NodeNgramNgram.node_id==existing_group_id)
)
# [ - - - new group = old clique + new clique - - - ] #
NewGroups = {}
Rels_2_delete = {}
for ng in grouped_ngrams:
for i in range(len(GDict)):
clique_i = GDict[i]
neighbours = {}
for node in clique_i:
if node==ng.ngramx_id:
neighbours[ng.ngramy_id] = True
if node==ng.ngramy_id:
neighbours[ng.ngramx_id] = True
if len(list(neighbours.keys()))>0:
voisinage = {}
for node_ in clique_i:
voisinage[node_] = True
for node_ in neighbours:
voisinage[node_] = True
clique_i = list(voisinage.keys())
Rels_2_delete[ng.id] = True
if i not in NewGroups:
NewGroups[i] = {}
for node in clique_i:
NewGroups[i][node] = True
for i in NewGroups:
NewGroups[i] = list(NewGroups[i].keys())
# [ - - - / new group = old clique + new clique - - - ] #
# [ - - - considering main form of the query - - - ] #
for i in range(len(GDict)):
ordered = []
for j in range(len(NewGroups[i])):
if NewGroups[i][j]!=GDict[i][0]:
ordered.append( NewGroups[i][j] )
NewGroups[i] = [ GDict[i][0] ] + ordered
# [ - - - / considering main form of the query - - - ] #
# [ - - - deleting old clique - - - ] #
for rel_id in Rels_2_delete:
session.query(NodeNgramNgram).filter(NodeNgramNgram.id==rel_id ).delete()
session.commit()
# [ - - - / deleting old clique - - - ] #
# [ - - - doing links of new clique and adding to DB - - - ] #
from itertools import combinations
for i in NewGroups:
edges = combinations(NewGroups[i], 2)
for n in edges:
n1=n[0]
n2=n[1]
nodengramngram = NodeNgramNgram(node_id=existing_group_id, ngramx_id=n1 , ngramy_id=n2, score=1.0)
session.add(nodengramngram)
session.commit()
# [ - - - / doing links of new clique and adding to DB - - - ] #
# import networkx as nx
# G = nx.Graph()
# DG = nx.DiGraph()
# for ng in grouped_ngrams:
# n_x = ( session.query(Ngram).filter(Ngram.id==ng.ngramx_id) ).first()
# n_y = ( session.query(Ngram).filter(Ngram.id==ng.ngramy_id) ).first()
# G.add_edge( str(ng.ngramx_id)+" "+n_x.terms , str(ng.ngramy_id)+" "+n_y.terms )
# DG.add_edge( str(ng.ngramx_id)+" "+n_x.terms , str(ng.ngramy_id)+" "+n_y.terms )
# # group = dict(list())
# sinonims_cliques = nx.find_cliques( G )
# # for nn in ngrams_ngrams.all():
# # group[nn.ngramx_id] = group.get(nn.ngramx_id, []) + [nn.ngramy_id]
# groups = { "nodes": {} , "links": {} }
# for clique in sinonims_cliques:
# max_deg = -1
# mainNode = -1
# mainNode_sinonims = []
# for node in clique:
# groups["nodes"][node] = False
# node_outdeg = DG.out_degree(node)
# if node_outdeg>max_deg:
# max_deg = node_outdeg
# mainNode = node
# for node in clique:
# if mainNode!=node:
# mainNode_sinonims.append( node )
# groups["links"][ mainNode ] = mainNode_sinonims
# import pprint
# print("GDict:")
# pprint.pprint( GDict )
# print("")
# print("NewGroups:")
# pprint.pprint( NewGroups )
# print("")
# print("Ids to delete:")
# pprint.pprint( Rels_2_delete )
# print("")
# print('groups["links"]:')
# pprint.pprint( groups["links"] )
# print("")
return JsonHttpResponse(True, 201)
class Keep(APIView):
"""
Actions on one existing Ngram in one list
"""
renderer_classes = (JSONRenderer,)
authentication_classes = (SessionAuthentication, BasicAuthentication)
def get (self, request, corpus_id):
# implicit global session
# list_id = session.query(Node).filter(Node.id==list_id).first()
corpus = session.query(Node).filter( Node.id==corpus_id ).first()
node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus )
nodes_in_map = session.query(NodeNgram).filter(NodeNgram.node_id==node_mapList.id ).all()
results = {}
for node in nodes_in_map:
results[node.ngram_id] = True
return JsonHttpResponse(results)
def put (self, request, corpus_id):
"""
Add ngrams to map list
"""
# implicit global session
group_rawreq = dict(request.data)
ngram_2add = [int(i) for i in list(group_rawreq.keys())]
corpus = session.query(Node).filter( Node.id==corpus_id ).first()
node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus )
for ngram_id in ngram_2add:
map_node = Node_Ngram( weight=1.0, ngram_id=ngram_id , node_id=node_mapList.id)
session.add(map_node)
session.commit()
return JsonHttpResponse(True, 201)
def delete (self, request, corpus_id):
"""
Delete ngrams from the map list
"""
# implicit global session
group_rawreq = dict(request.data)
# print("group_rawreq:")
# print(group_rawreq)
from django.utils.html import escape
ngram_2del = [int(i) for i in list(group_rawreq.keys())]
corpus = session.query(Node).filter( Node.id==corpus_id ).first()
node_mapList = get_or_create_node(nodetype='MapList', corpus=corpus )
ngram_2del = session.query(NodeNgram).filter(NodeNgram.node_id==node_mapList.id , NodeNgram.ngram_id.in_(ngram_2del) ).all()
for map_node in ngram_2del:
try:
session.delete(map_node)
session.commit()
except:
pass
return JsonHttpResponse(True, 201)
from django.conf.urls import patterns, url
from gargantext_web import views_optimized
from rest_v1_0 import api, ngrams, graph
from annotations import views
import tests.ngramstable.views as samtest
urlpatterns = patterns('',
# REST URLS
# What is REST ?
# https://en.wikipedia.org/wiki/Representational_state_transfer
#url(r'^api$', rest_v1_0.api.Root), # = ?
url(r'nodes$', api.NodesList.as_view()),
url(r'nodes/(\d+)$', api.Nodes.as_view()),
url(r'nodes/(\d+)/children/ngrams$', api.NodesChildrenNgrams.as_view()), # => repeated children ?
url(r'nodes/(\d+)/children/ids$', api.NodesChildrenNgramsIds.as_view()), # => repeated children ?
# NGRAMS table & annotations
url(r'node/(\d+)/ngrams$' , ngrams.Ngrams.as_view()),
url(r'node/(\d+)/ngrams/group$', ngrams.Group.as_view()),
url(r'node/(\d+)/ngrams/keep$', ngrams.Keep.as_view()),
# url(r'node/(?P<list_id>[0-9]+)/ngrams/keep/(?P<ngram_ids>[0-9,\+]+)+$' , ngrams.Keep.as_view()),
url(r'node/(?P<list_id>[0-9]+)/ngrams/(?P<ngram_ids>[0-9,\+]+)+$', views.NgramEdit.as_view()),
url(r'node/(?P<corpus_id>[0-9]+)/ngrams/list/(?P<list_name>\w+)$' , ngrams.List.as_view()),
url(r'node/corpus/(?P<node_ids>[0-9,\+]+)+$' , samtest.get_corpuses),
#url(r'nodes/(\d+)/children/hyperdata$', api.NodesChildrenMetatadata.as_view()),
#url(r'nodes/(\d+)/children/hyperdata$', api.NodesChildrenMetatadata.as_view()),
url(r'nodes/(\d+)/children/queries$', api.NodesChildrenQueries.as_view()),
url(r'nodes/(\d+)/children/duplicates$', api.NodesChildrenDuplicates.as_view()),
# url(r'^api/nodes/(\d+)/children/duplicates/delete$', api.NodesChildrenDuplicates.delete ),
url(r'nodes/(\d+)/ngrams$', api.CorpusController.ngrams),
url(r'nodes/(\d+)/graph$', graph.Graph.as_view()),
url(r'corpus/(\d+)/graph$', graph.Graph.as_view()),
url(r'hyperdata$', api.ApiHyperdata.as_view()),
url(r'ngrams$', api.ApiNgrams.as_view()),
url(r'tfidf/(\d+)/(\w+)$', views_optimized.tfidf),
)
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
// Documentations:
// n3-charts/line-chart
// define operators (for hyperdata filtering, according to the considered type)
var operators = {
'text': [
{'label': 'contains', 'key': 'contains'},
{'label': 'does not contain', 'key': 'doesnotcontain'},
],
'string': [
{'label': 'starts with', 'key': 'startswith'},
{'label': 'contains', 'key': 'contains'},
{'label': 'does not contain', 'key': 'doesnotcontain'},
{'label': 'ends with', 'key': 'endswith'},
{'label': 'is', 'key': '='},
{'label': 'is before', 'key': '<'},
{'label': 'is after', 'key': '>'}
],
'integer': [
{'label': 'is', 'key': '='},
{'label': 'is lower than', 'key': '<'},
{'label': 'is higher than', 'key': '>'}
],
'float': [
{'label': 'is', 'key': '='},
{'label': 'is lower than', 'key': '<'},
{'label': 'is higher than', 'key': '>'}
],
'datetime': [
{'label': 'is', 'key': '='},
{'label': 'is before', 'key': '<'},
{'label': 'is after', 'key': '>'}
],
};
$.each(operators, function(type, type_operators) {
type_operators.unshift({});
});
// define available periods of time
var periods = ['second', 'minute', 'hour', 'day', 'week', 'month', 'year', 'decade', 'century'];
var strDate = function(date) {
return date.getFullYear() + '-' +
('00' + (date.getMonth() + 1)).slice(-2) + '-' +
('00' + date.getDate()).slice(-2) + 'T' +
('00' + date.getHours()).slice(-2) + ':' +
('00' + date.getMinutes()).slice(-2) + ':' +
('00' + date.getSeconds()).slice(-2) + 'Z';
}
var addZero = function(x) {
return (x<10) ? ('0'+x) : x;
};
var addZeros = function(x, n) {
x = x.toString();
return '0000'.substr(0, n - x.length) + x;
};
var groupings = {
datetime: {
century: {
representation: function(x) {return x.toISOString().substr(0, 2) + 'th century'},
truncate: function(x) {return x.substr(0, 2) + '00-01-01T00:00:00Z';},
next: function(x) {
x = new Date(x);
x.setFullYear(x.getFullYear()+100);
x.setHours(0);
return strDate(x);
},
},
decade: {
representation: function(x) {return x.toISOString().substr(0, 3)} + '0s',
truncate: function(x) {return x.substr(0, 3) + '0-01-01T00:00:00Z';},
next: function(x) {
x = new Date(x);
x.setFullYear(x.getFullYear() + 10);
x.setHours(0);
return strDate(x);
},
},
year: {
representation: function(x) {return x.toISOString().substr(0, 4)},
truncate: function(x) {return x.substr(0, 4) + '-01-01T00:00:00Z';},
next: function(x) {
var y = parseInt(x.substr(0, 4));
return addZeros(y + 1, 4) + x.substr(4);
},
},
month: {
representation: function(x) {return x.toISOString().substr(0, 7)},
truncate: function(x) {return x.substr(0, 7) + '-01T00:00:00Z';},
next: function(x) {
var m = parseInt(x.substr(5, 2));
if (m == 12) {
var y = parseInt(x.substr(0, 4));
return addZeros(y + 1, 4) + '-01' + x.substr(7);
} else {
return x.substr(0, 5) + addZero(m + 1) + x.substr(7);
}
},
},
day: {
representation: function(x) {return x.toISOString().substr(0, 10)},
truncate: function(x) {return x.substr(0, 10) + 'T00:00:00Z';},
next: function(x) {
x = new Date(x);
x.setDate(x.getDate() + 1);
x.setHours(0);
return strDate(x);
},
},
},
numeric: {
unit: {
representation: function(x) {return x.toString()},
truncate: function(x) {return Math.round(x)},
next: function(x) {return x+1;},
},
},
};
// Define the application
var gargantext = angular.module('Gargantext', ['n3-charts.linechart', 'ngCookies', 'ngTagsInput']);
// Customize the application's scope
angular.module('Gargantext').run(function($rootScope, $http, $cookies){
// Access Math library from anywhere in the scope of the application
$rootScope.Math = Math;
// Access to an HSB to RGB converter
$rootScope.getColor = function(i, n){
var h = .3 + (i / n) % 1;
var s = .7;
var v = .8;
var i = Math.floor(h * 6);
var f = h * 6 - i;
var p = v * (1 - s);
var q = v * (1 - f * s);
var t = v * (1 - (1 - f) * s);
var r, g, b;
switch (i % 6) {
case 0: r = v; g = t; b = p; break;
case 1: r = q; g = v; b = p; break;
case 2: r = p; g = v; b = t; break;
case 3: r = p; g = q; b = v; break;
case 4: r = t; g = p; b = v; break;
case 5: r = v; g = p; b = q; break;
}
r = Math.round(255 * r);
g = Math.round(255 * g);
b = Math.round(255 * b);
var color = 'rgb(' + r + ',' + g + ',' + b + ')';
return color;
};
// Access to a range function, very similar to the one available in Python
$rootScope.range = function(min, max, step){
if (max == undefined){
max = min;
min = 0;
}
step = step || 1;
var output = [];
for (var i=min; i<max; i+=step){
output.push(i);
}
return output;
};
// Pre-defined stuff
$rootScope.operators = operators;
$rootScope.periods = periods;
// For CSRF token compatibility with Django
$http.defaults.headers.post['X-CSRFToken'] = $cookies['csrftoken'];
});
// Controller for datasets
gargantext.controller('DatasetController', function($scope, $http) {
// are we loading data from the server right now?
$scope.is_loading = false;
// initital parameters for the y-axis of the query
$scope.query_y = {
'value': 'documents_count',
'is_relative': false,
'divided_by': 'total_ngrams_count',
};
// filters: corpora retrieval
$scope.corpora = [];
if (/^\/projects\/\d+\/corpora\/\d+/.test(location.pathname)) {
$scope.project_id = parseInt(location.pathname.split('/')[2]);
} else {
console.error('The id of the project has to be set.');
}
$scope.updateCorpora = function() {
$http.get('/api/nodes?types[]=CORPUS&parent_id=' + $scope.project_id, {cache: true}).success(function(response){
$scope.corpora = response.records;
// Initially set to what is indicated in the URL
if (/^\/projects\/\d+\/corpora\/\d+/.test(location.pathname)) {
var corpus_id = parseInt(location.pathname.split('/')[4]);
$.each($scope.corpora, function(c, corpus) {
corpus.is_selected = (corpus.id == corpus_id);
});
$scope.updateHyperdataList();
$scope.updateDataset();
}
});
};
var getSelectedCorporaIdList = function() {
var corpus_id_list = [];
$.each($scope.corpora, function(c, corpus) {
if (corpus.is_selected) {
corpus_id_list.push(corpus.id);
}
});
return corpus_id_list;
}
$scope.updateCorpora();
// filters: ngrams
$scope.getNgrams = function(query) {
var url = '/api/ngrams?limit=10';
url += '&contain=' + encodeURI(query);
url += '&corpus_id=' + getSelectedCorporaIdList().join(',');
var appendTransform = function(defaults, transform) {
defaults = angular.isArray(defaults) ? defaults : [defaults];
return defaults.concat(transform);
}
return $http.get(url, {
cache: true,
transformResponse: appendTransform($http.defaults.transformResponse, function(value) {
return value.data;
})
});
};
// filters: corpora
$scope.corporaSelectNone = function() {
$.each($scope.corpora, function(c, corpus){
corpus.is_selected = false;
});
$scope._updateHyperdataList(function() {
$scope.updateDataset();
});
};
$scope.corporaSelectAll = function() {
$.each($scope.corpora, function(c, corpus){
corpus.is_selected = true;
});
$scope._updateHyperdataList(function() {
$scope.updateDataset();
});
};
// filters: hyperdata, according to the considered corpora
$scope.hyperdataList = [];
$scope.updateHyperdataTimer = null;
$scope.setHyperdataList = function(hyperdataList) {
// add an empty item for each value
$.each(hyperdataList, function(h, hyperdata) {
if (hyperdata.values) {
hyperdata.values.unshift(undefined);
}
});
// do not keep the ones we are not interested into
var rejectedHyperdata = ['doi', 'volume', 'page'];
$scope.hyperdataList = [];
$.each(hyperdataList, function(h, hyperdata) {
if (rejectedHyperdata.indexOf(hyperdata.key) == -1) {
hyperdata.name = hyperdata.key.split('_')[0];
$scope.hyperdataList.push(hyperdata);
}
});
}
$scope.updateHyperdataList = function() {
if ($scope.updateHyperdataTimer) {
clearTimeout($scope.updateHyperdataTimer);
}
$scope.updateHyperdataTimer = setTimeout($scope._updateHyperdataList, 500);
};
$scope._updateHyperdataList = function(callback) {
var corpus_id_list = getSelectedCorporaIdList();
if (corpus_id_list && corpus_id_list.length) {
var url = '/api/hyperdata?corpus_id=';
url += corpus_id_list.join(',');
$scope.is_loading = true;
$http.get(url, {cache: true}).success(function(response){
$scope.is_loading = false;
$scope.setHyperdataList(response.data);
if (callback) {
callback();
}
});
} else {
$scope.hyperdataList = [];
}
};
// update the dataset, according to the various filters applied to it
$scope.updateDatasetTimer = null;
$scope.updateDataset = function() {
if ($scope.updateDatasetTimer) {
clearTimeout($scope.updateDatasetTimer);
}
$scope.updateDatasetTimer = setTimeout($scope._updateDataset, 500);
};
$scope._updateDataset = function() {
// parameters
var parameters = {
'x': {
'with_empty': true,
'resolution': $scope.query_x.resolution,
'value': 'publication_date',
},
'y': {
'value': $scope.query_y.value,
},
'filter': {
},
'format': 'json',
};
// x: normalization
if ($scope.query_y.is_relative) {
parameters.y.divided_by = $scope.query_y.divided_by;
}
// filter: ngrams
if ($scope.query_y.ngrams && $scope.query_y.ngrams.length) {
parameters.filter.ngrams = [];
$.each($scope.query_y.ngrams, function(n, ngram) {
parameters.filter.ngrams.push(ngram.terms)
})
console.log($scope.query_y.ngrams);
}
// filter : corpora
parameters.filter.corpora = [];
parameters.filter.corpora.push(getSelectedCorporaIdList().join(','));
// filter: hyperdata
parameters.filter.hyperdata = [];
$.each($scope.hyperdataList, function(h, hyperdata) {
if ((hyperdata.values || hyperdata.operator) && hyperdata.value) {
if (hyperdata.values) {
hyperdata.operator = '=';
}
parameters.filter.hyperdata.push({
'key': hyperdata.key,
'operator': hyperdata.operator,
'value': hyperdata.value
});
}
});
// retrieve data
var url = '/api/nodes/' + $scope.project_id + '/histories';
$scope.is_loading = true;
$http.post(url, parameters, {cache: true}).success(function(response){
$scope.is_loading = false;
// event firing to parent
$scope.$emit('updateDatasets', {
response: response,
dataset_index: $scope.$index,
});
});
};
$scope.$on('updateDataset', function(e, data) {
$scope.updateDataset();
});
});
// Controller for graphs
gargantext.controller('GraphController', function($scope, $http, $element) {
// initial values
$scope.query_x = {
'resolution': 'year'
};
// initialization
$scope.datasets = [{}];
$scope.options = {
stacking: false
};
$scope.seriesOptions = {
thicknessNumber: 3,
thickness: '3px',
type: 'column',
striped: false
};
$scope.graph = {
data: [],
options: {
axes: {
x: {key: 'x', type: 'date'},
y: {key: 'y', type: 'linear'},
},
tension: 1.0,
lineMode: 'linear',
// tooltip: {mode: 'scrubber', formatter: function(x, y, series) {
// var grouping = groupings.datetime[$scope.groupingKey];
// return grouping.representation(x) + ' → ' + y;
// }},
drawLegend: true,
drawDots: true,
columnsHGap: 5
}
};
// add a dataset
$scope.addDataset = function() {
$scope.datasets.push({});
};
// remove a dataset
$scope.removeDataset = function(datasetIndex) {
if ($scope.datasets.length > 1) {
$scope.datasets.splice(datasetIndex, 1);
dataset_results.splice(datasetIndex, 1);
$scope.updateDatasets();
} else {
alert('You can not remove the last dataset.')
}
};
// update the datasets (catches the event thrown by children dataset controllers)
$scope.updateDatasets = function(must_refresh) {
// refresh all data
if (must_refresh) {
$scope.$broadcast('updateDataset');
}
// create temporary representation for the result
var values = {}
var n = dataset_results.length;
for (var i=0; i<n; i++) {
var result = dataset_results[i];
var key = 'y' + i;
for (var j=0, m=result.length; j<m; j++) {
var date = result[j][0];
var value = result[j][1];
if (!values[date]) {
values[date] = {};
}
values[date][key] = value;
}
}
// put that in an array
var data = [];
$.each(values, function(date, keys_values) {
var row = {x: new Date(date)};
for (var i=0; i<n; i++) {
var key = 'y' + i;
row[key] = keys_values[key] || 0;
}
data.push(row);
});
// sort the array
data.sort(function(a, b) {
return (new Date(a.x)).getTime() - (new Date(b.x)).getTime();
});
// show time!
$scope.graph.data = data;
// update series names
var series = [];
for (var i=0; i<n; i++) {
var seriesElement = {
id: 'series_'+ i,
y: 'y'+ i,
axis: 'y',
color: $scope.getColor(i, n),
label: 'Project, corpus, docs|ngrams, terms'
};
angular.forEach($scope.seriesOptions, function(value, key) {
seriesElement[key] = value;
});
series.push(seriesElement);
}
$scope.graph.options.series = series;
};
var dataset_results = [];
$scope.$on('updateDatasets', function(e, data) {
// data extraction
var dataset_index = data.dataset_index;
var result = data.response.result;
// update full results array
while (dataset_results.length < $scope.datasets.length) {
dataset_results.push([]);
}
while (dataset_results.length > $scope.datasets.length) {
dataset_results.splice(-1, 1);
}
dataset_results[dataset_index] = result;
$scope.updateDatasets();
});
});
:after,:before,tags-input *{-moz-box-sizing:border-box;-webkit-box-sizing:border-box;box-sizing:border-box}tags-input .host{position:relative;margin-top:5px;margin-bottom:5px}tags-input .host:active{outline:0}tags-input .tags{-moz-appearance:textfield;-webkit-appearance:textfield;padding:1px;overflow:hidden;word-wrap:break-word;cursor:text;background-color:#fff;border:1px solid #a9a9a9;box-shadow:1px 1px 1px 0 #d3d3d3 inset}tags-input .tags.focused{outline:0;-webkit-box-shadow:0 0 3px 1px rgba(5,139,242,.6);-moz-box-shadow:0 0 3px 1px rgba(5,139,242,.6);box-shadow:0 0 3px 1px rgba(5,139,242,.6)}tags-input .tags .tag-list{margin:0;padding:0;list-style-type:none}tags-input .tags .tag-item{margin:2px;padding:0 5px;display:inline-block;float:left;font:14px "Helvetica Neue",Helvetica,Arial,sans-serif;height:26px;line-height:25px;border:1px solid #acacac;border-radius:3px;background:-webkit-linear-gradient(top,#f0f9ff 0,#cbebff 47%,#a1dbff 100%);background:linear-gradient(to bottom,#f0f9ff 0,#cbebff 47%,#a1dbff 100%)}tags-input .tags .tag-item.selected{background:-webkit-linear-gradient(top,#febbbb 0,#fe9090 45%,#ff5c5c 100%);background:linear-gradient(to bottom,#febbbb 0,#fe9090 45%,#ff5c5c 100%)}tags-input .tags .tag-item .remove-button{margin:0 0 0 5px;padding:0;border:none;background:0 0;cursor:pointer;vertical-align:middle;font:700 16px Arial,sans-serif;color:#585858}tags-input .tags .tag-item .remove-button:active{color:red}tags-input .tags .input{border:0;outline:0;margin:2px;padding:0;padding-left:5px;float:left;height:26px;font:14px "Helvetica Neue",Helvetica,Arial,sans-serif}tags-input .tags .input.invalid-tag{color:red}tags-input .tags .input::-ms-clear{display:none}tags-input.ng-invalid .tags{-webkit-box-shadow:0 0 3px 1px rgba(255,0,0,.6);-moz-box-shadow:0 0 3px 1px rgba(255,0,0,.6);box-shadow:0 0 3px 1px rgba(255,0,0,.6)}tags-input .autocomplete{margin-top:5px;position:absolute;padding:5px 0;z-index:999;width:100%;background-color:#fff;border:1px solid rgba(0,0,0,.2);-webkit-box-shadow:0 5px 10px rgba(0,0,0,.2);-moz-box-shadow:0 5px 10px rgba(0,0,0,.2);box-shadow:0 5px 10px rgba(0,0,0,.2)}tags-input .autocomplete .suggestion-list{margin:0;padding:0;list-style-type:none}tags-input .autocomplete .suggestion-item{padding:5px 10px;cursor:pointer;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;font:16px "Helvetica Neue",Helvetica,Arial,sans-serif;color:#000;background-color:#fff}tags-input .autocomplete .suggestion-item.selected,tags-input .autocomplete .suggestion-item.selected em{color:#fff;background-color:#0097cf}tags-input .autocomplete .suggestion-item em{font:normal bold 16px "Helvetica Neue",Helvetica,Arial,sans-serif;color:#000;background-color:#fff}
\ No newline at end of file
/*! ngTagsInput v2.1.1 License: MIT */!function(){"use strict";function a(){var a={};return{on:function(b,c){return b.split(" ").forEach(function(b){a[b]||(a[b]=[]),a[b].push(c)}),this},trigger:function(b,c){return angular.forEach(a[b],function(a){a.call(null,c)}),this}}}function b(a,b){return a=a||[],a.length>0&&!angular.isObject(a[0])&&a.forEach(function(c,d){a[d]={},a[d][b]=c}),a}function c(a,b,c){for(var d=null,f=0;f<a.length;f++)if(e(a[f][c]).toLowerCase()===e(b[c]).toLowerCase()){d=a[f];break}return d}function d(a,b,c){if(!b)return a;var d=b.replace(/([.?*+^$[\]\\(){}|-])/g,"\\$1");return a.replace(new RegExp(d,"gi"),c)}function e(a){return angular.isUndefined(a)||null==a?"":a.toString().trim()}function f(a){return a.replace(/&/g,"&amp;").replace(/</g,"&lt;").replace(/>/g,"&gt;")}var g={backspace:8,tab:9,enter:13,escape:27,space:32,up:38,down:40,comma:188},h=9007199254740991,i=["text","email","url"],j=angular.module("ngTagsInput",[]);j.directive("tagsInput",["$timeout","$document","tagsInputConfig",function(d,f,j){function k(a,b){var d,f,g,h={};return d=function(b){return e(b[a.displayProperty])},f=function(b,c){b[a.displayProperty]=c},g=function(b){var e=d(b);return e&&e.length>=a.minLength&&e.length<=a.maxLength&&a.allowedTagsPattern.test(e)&&!c(h.items,b,a.displayProperty)},h.items=[],h.addText=function(a){var b={};return f(b,a),h.add(b)},h.add=function(c){var e=d(c);return a.replaceSpacesWithDashes&&(e=e.replace(/\s/g,"-")),f(c,e),g(c)?(h.items.push(c),b.trigger("tag-added",{$tag:c})):e&&b.trigger("invalid-tag",{$tag:c}),c},h.remove=function(a){var c=h.items.splice(a,1)[0];return b.trigger("tag-removed",{$tag:c}),c},h.removeLast=function(){var b,c=h.items.length-1;return a.enableEditingLastTag||h.selected?(h.selected=null,b=h.remove(c)):h.selected||(h.selected=h.items[c]),b},h}function l(a){return-1!==i.indexOf(a)}return{restrict:"E",require:"ngModel",scope:{tags:"=ngModel",onTagAdded:"&",onTagRemoved:"&"},replace:!1,transclude:!0,templateUrl:"ngTagsInput/tags-input.html",controller:["$scope","$attrs","$element",function(b,c,d){b.events=new a,j.load("tagsInput",b,c,{type:[String,"text",l],placeholder:[String,"Add a tag"],tabindex:[Number,null],removeTagSymbol:[String,String.fromCharCode(215)],replaceSpacesWithDashes:[Boolean,!0],minLength:[Number,3],maxLength:[Number,h],addOnEnter:[Boolean,!0],addOnSpace:[Boolean,!1],addOnComma:[Boolean,!0],addOnBlur:[Boolean,!0],allowedTagsPattern:[RegExp,/.+/],enableEditingLastTag:[Boolean,!1],minTags:[Number,0],maxTags:[Number,h],displayProperty:[String,"text"],allowLeftoverText:[Boolean,!1],addFromAutocompleteOnly:[Boolean,!1]}),b.tagList=new k(b.options,b.events),this.registerAutocomplete=function(){var a=d.find("input");return a.on("keydown",function(a){b.events.trigger("input-keydown",a)}),{addTag:function(a){return b.tagList.add(a)},focusInput:function(){a[0].focus()},getTags:function(){return b.tags},getCurrentTagText:function(){return b.newTag.text},getOptions:function(){return b.options},on:function(a,c){return b.events.on(a,c),this}}}}],link:function(a,c,h,i){var j,k=[g.enter,g.comma,g.space,g.backspace],l=a.tagList,m=a.events,n=a.options,o=c.find("input"),p=["minTags","maxTags","allowLeftoverText"];j=function(){i.$setValidity("maxTags",a.tags.length<=n.maxTags),i.$setValidity("minTags",a.tags.length>=n.minTags),i.$setValidity("leftoverText",n.allowLeftoverText?!0:!a.newTag.text)},m.on("tag-added",a.onTagAdded).on("tag-removed",a.onTagRemoved).on("tag-added",function(){a.newTag.text=""}).on("tag-added tag-removed",function(){i.$setViewValue(a.tags)}).on("invalid-tag",function(){a.newTag.invalid=!0}).on("input-change",function(){l.selected=null,a.newTag.invalid=null}).on("input-focus",function(){i.$setValidity("leftoverText",!0)}).on("input-blur",function(){n.addFromAutocompleteOnly||(n.addOnBlur&&l.addText(a.newTag.text),j())}).on("option-change",function(a){-1!==p.indexOf(a.name)&&j()}),a.newTag={text:"",invalid:null},a.getDisplayText=function(a){return e(a[n.displayProperty])},a.track=function(a){return a[n.displayProperty]},a.newTagChange=function(){m.trigger("input-change",a.newTag.text)},a.$watch("tags",function(c){a.tags=b(c,n.displayProperty),l.items=a.tags}),a.$watch("tags.length",function(){j()}),o.on("keydown",function(b){if(!b.isImmediatePropagationStopped||!b.isImmediatePropagationStopped()){var c,d,e=b.keyCode,f=b.shiftKey||b.altKey||b.ctrlKey||b.metaKey,h={};if(!f&&-1!==k.indexOf(e))if(h[g.enter]=n.addOnEnter,h[g.comma]=n.addOnComma,h[g.space]=n.addOnSpace,c=!n.addFromAutocompleteOnly&&h[e],d=!c&&e===g.backspace&&0===a.newTag.text.length,c)l.addText(a.newTag.text),a.$apply(),b.preventDefault();else if(d){var i=l.removeLast();i&&n.enableEditingLastTag&&(a.newTag.text=i[n.displayProperty]),a.$apply(),b.preventDefault()}}}).on("focus",function(){a.hasFocus||(a.hasFocus=!0,m.trigger("input-focus"),a.$apply())}).on("blur",function(){d(function(){var b=f.prop("activeElement"),d=b===o[0],e=c[0].contains(b);(d||!e)&&(a.hasFocus=!1,m.trigger("input-blur"))})}),c.find("div").on("click",function(){o[0].focus()})}}}]),j.directive("autoComplete",["$document","$timeout","$sce","tagsInputConfig",function(a,h,i,j){function k(a,d){var e,f,g,i={};return f=function(a,b){return a.filter(function(a){return!c(b,a,d.tagsInput.displayProperty)})},i.reset=function(){g=null,i.items=[],i.visible=!1,i.index=-1,i.selected=null,i.query=null,h.cancel(e)},i.show=function(){i.selected=null,i.visible=!0},i.load=function(c,j){h.cancel(e),e=h(function(){i.query=c;var e=a({$query:c});g=e,e.then(function(a){e===g&&(a=b(a.data||a,d.tagsInput.displayProperty),a=f(a,j),i.items=a.slice(0,d.maxResultsToShow),i.items.length>0?i.show():i.reset())})},d.debounceDelay,!1)},i.selectNext=function(){i.select(++i.index)},i.selectPrior=function(){i.select(--i.index)},i.select=function(a){0>a?a=i.items.length-1:a>=i.items.length&&(a=0),i.index=a,i.selected=i.items[a]},i.reset(),i}return{restrict:"E",require:"^tagsInput",scope:{source:"&"},templateUrl:"ngTagsInput/auto-complete.html",link:function(a,b,c,h){var l,m,n,o,p,q,r=[g.enter,g.tab,g.escape,g.up,g.down];j.load("autoComplete",a,c,{debounceDelay:[Number,100],minLength:[Number,3],highlightMatchedText:[Boolean,!0],maxResultsToShow:[Number,10],loadOnDownArrow:[Boolean,!1],loadOnEmpty:[Boolean,!1],loadOnFocus:[Boolean,!1]}),n=a.options,m=h.registerAutocomplete(),n.tagsInput=m.getOptions(),l=new k(a.source,n),o=function(a){return a[n.tagsInput.displayProperty]},p=function(a){return e(o(a))},q=function(a){return a&&a.length>=n.minLength||!a&&n.loadOnEmpty},a.suggestionList=l,a.addSuggestionByIndex=function(b){l.select(b),a.addSuggestion()},a.addSuggestion=function(){var a=!1;return l.selected&&(m.addTag(l.selected),l.reset(),m.focusInput(),a=!0),a},a.highlight=function(a){var b=p(a);return b=f(b),n.highlightMatchedText&&(b=d(b,f(l.query),"<em>$&</em>")),i.trustAsHtml(b)},a.track=function(a){return o(a)},m.on("tag-added tag-removed invalid-tag input-blur",function(){l.reset()}).on("input-change",function(a){q(a)?l.load(a,m.getTags()):l.reset()}).on("input-focus",function(){var a=m.getCurrentTagText();n.loadOnFocus&&q(a)&&l.load(a,m.getTags())}).on("input-keydown",function(b){var c=!1;b.stopImmediatePropagation=function(){c=!0,b.stopPropagation()},b.isImmediatePropagationStopped=function(){return c};var d=b.keyCode,e=!1;-1!==r.indexOf(d)&&(l.visible?d===g.down?(l.selectNext(),e=!0):d===g.up?(l.selectPrior(),e=!0):d===g.escape?(l.reset(),e=!0):(d===g.enter||d===g.tab)&&(e=a.addSuggestion()):d===g.down&&a.options.loadOnDownArrow&&(l.load(m.getCurrentTagText(),m.getTags()),e=!0),e&&(b.preventDefault(),b.stopImmediatePropagation(),a.$apply()))})}}}]),j.directive("tiTranscludeAppend",function(){return function(a,b,c,d,e){e(function(a){b.append(a)})}}),j.directive("tiAutosize",["tagsInputConfig",function(a){return{restrict:"A",require:"ngModel",link:function(b,c,d,e){var f,g,h=a.getTextAutosizeThreshold();f=angular.element('<span class="input"></span>'),f.css("display","none").css("visibility","hidden").css("width","auto").css("white-space","pre"),c.parent().append(f),g=function(a){var b,e=a;return angular.isString(e)&&0===e.length&&(e=d.placeholder),e&&(f.text(e),f.css("display",""),b=f.prop("offsetWidth"),f.css("display","none")),c.css("width",b?b+h+"px":""),a},e.$parsers.unshift(g),e.$formatters.unshift(g),d.$observe("placeholder",function(a){e.$modelValue||g(a)})}}}]),j.directive("tiBindAttrs",function(){return function(a,b,c){a.$watch(c.tiBindAttrs,function(a){angular.forEach(a,function(a,b){c.$set(b,a)})},!0)}}),j.provider("tagsInputConfig",function(){var a={},b={},c=3;this.setDefaults=function(b,c){return a[b]=c,this},this.setActiveInterpolation=function(a,c){return b[a]=c,this},this.setTextAutosizeThreshold=function(a){return c=a,this},this.$get=["$interpolate",function(d){var e={};return e[String]=function(a){return a},e[Number]=function(a){return parseInt(a,10)},e[Boolean]=function(a){return"true"===a.toLowerCase()},e[RegExp]=function(a){return new RegExp(a)},{load:function(c,f,g,h){var i=function(){return!0};f.options={},angular.forEach(h,function(h,j){var k,l,m,n,o,p;k=h[0],l=h[1],m=h[2]||i,n=e[k],o=function(){var b=a[c]&&a[c][j];return angular.isDefined(b)?b:l},p=function(a){f.options[j]=a&&m(a)?n(a):o()},b[c]&&b[c][j]?g.$observe(j,function(a){p(a),f.events.trigger("option-change",{name:j,newValue:a})}):p(g[j]&&d(g[j])(f.$parent))})},getTextAutosizeThreshold:function(){return c}}}]}),j.run(["$templateCache",function(a){a.put("ngTagsInput/tags-input.html",'<div class="host" tabindex="-1" ti-transclude-append=""><div class="tags" ng-class="{focused: hasFocus}"><ul class="tag-list"><li class="tag-item" ng-repeat="tag in tagList.items track by track(tag)" ng-class="{ selected: tag == tagList.selected }"><span ng-bind="getDisplayText(tag)"></span> <a class="remove-button" ng-click="tagList.remove($index)" ng-bind="options.removeTagSymbol"></a></li></ul><input class="input" ng-model="newTag.text" ng-change="newTagChange()" ng-trim="false" ng-class="{\'invalid-tag\': newTag.invalid}" ti-bind-attrs="{type: options.type, placeholder: options.placeholder, tabindex: options.tabindex}" ti-autosize=""></div></div>'),a.put("ngTagsInput/auto-complete.html",'<div class="autocomplete" ng-show="suggestionList.visible"><ul class="suggestion-list"><li class="suggestion-item" ng-repeat="item in suggestionList.items track by track(item)" ng-class="{selected: item == suggestionList.selected}" ng-click="addSuggestionByIndex($index)" ng-mouseenter="suggestionList.select($index)" ng-bind-html="highlight(item)"></li></ul></div>')}])}();
\ No newline at end of file
......@@ -111,7 +111,7 @@
</a>
<i class="caret"></i>
<ul class="dropdown-menu">
{% if view != "graph" %}
{% if view == "titles" %}
<li>
<a tabindex="-1"
data-url="/projects/{{project.id}}/corpora/{{ corpus.id }}/explorer?field1=ngrams&amp;field2=ngrams&amp;distance=conditional&amp;bridgeness=5" onclick='gotoexplorer(this)' >With conditional distance </a>
......@@ -137,6 +137,15 @@
{% endif %}
{% endfor %}
<li>
<a type="button" class="btn btn-default {% if view == 'analytics' %} active {% endif %}"
onclick="javascript:location.href='/projects/{{project.id}}/corpora/{{ corpus.id }}/analytics'"
data-target='#' href='#'>Analytics
</a>
</li>
</ul>
</div>
{% endif %}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment