Commit a664c5d2 authored by delanoe's avatar delanoe

Merge remote-tracking branch 'origin/c24b-testing' into testing

parents b697ed58 b6877db9
from rest_framework.status import *
from rest_framework.exceptions import APIException
from rest_framework.response import Response
from rest_framework.renderers import JSONRenderer, BrowsableAPIRenderer
from rest_framework.views import APIView
from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from rest_framework.permissions import IsAuthenticated
from gargantext.models import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNode
from gargantext.constants import RESOURCETYPES, NODETYPES, get_resource
from gargantext.util.db import session, delete, func, bulk_insert
from gargantext.util.db_cache import cache, or_
from gargantext.util.validation import validate
from gargantext.models import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNode
from gargantext.constants import RESOURCETYPES, NODETYPES, get_resource
from gargantext.util.http import ValidationException, APIView, JsonHttpResponse, get_parameters
from gargantext.util.files import upload
from gargantext.util.db import session, delete, func, bulk_insert
from gargantext.util.scheduling import scheduled
#import
#NODES format
_user_default_fields =["is_staff","is_superuser","is_active", "username", "email", "first_name", "last_name", "id"]
_api_default_fields = ['id', 'parent_id', 'name', 'typename', 'date']
_doc_default_fields = ['id', 'parent_id', 'name', 'typename', 'date', "hyperdata"]
#_resource_default_fields = [['id', 'parent_id', 'name', 'typename', "hyperdata.method"]
#_corpus_default_fields = ['id', 'parent_id', 'name', 'typename', 'date', "hyperdata","resource"]
def format_parent(node):
'''format the parent'''
try:
#USER
if node.username != "":
return {field: getattr(node, field) for field in _user_default_fields}
except:
#DOC
if node.typename == "DOCUMENT":
return {field: getattr(node, field) for field in _doc_default_fields}
elif node.typename == "CORPUS":
parent = {field: getattr(node, field) for field in _doc_default_fields}
#documents
#parent["documents"] = {"count":node.children("DOCUMENT").count()}
#resources
#parent["resources"] = {"count":node.children("RESOURCE").count()}
#status
#return {field: getattr(node, field) for field in _doc_default_fields}
parent["status_msg"] = status_message
return parent
#PROJECT, RESOURCES?
else:
return {field: getattr(node, field) for field in _api_default_fields}
def format_records(node_list):
'''format the records list'''
if len(node_list) == 0:
return []
node1 = node_list[0]
#USER
if node1.typename == "USER":
return [{field: getattr(node, field) for field in _user_default_fields} for node in node_list]
#DOCUMENT
elif node1.typename == "DOCUMENT":
return [{field: getattr(node, field) for field in _doc_default_fields} for node in node_list]
#CORPUS, PROJECT, RESOURCES?
elif node1.typename == "CORPUS":
records = []
for node in node_list:
#PROJECTS VIEW SHOULD NE BE SO DETAILED
record = {field: getattr(node, field) for field in _doc_default_fields}
record["resources"] = [n.id for n in node.children("RESOURCE")]
record["documents"] = [n.id for n in node.children("DOCUMENT")]
#record["resources"] = format_records([n for n in node.children("RESOURCE")])
#record["documents"] = format_records([n for n in node.children("DOCUMENT")])
status = node.status()
if status is not None and not status['complete']:
if not status['error']:
status_message = '(in progress: %s, %d complete)' % (
status['action'].replace('_', ' '),
status['progress'],
)
else:
status_message = '(aborted: "%s" after %i docs)' % (
status['error'][-1],
status['progress']
)
else:
status_message = ''
record["status"] = status_message
records.append(record)
return records
else:
return [{field: getattr(node, field) for field in _api_default_fields} for node in node_list]
def check_rights(request, node_id):
'''check that the node belong to USER'''
node = session.query(Node).filter(Node.id == node_id).first()
if node is None:
raise APIException("403 Unauthorized")
# return Response({'detail' : "Node #%s not found" %(node_id) },
# status = status.HTTP_404_NOT_FOUND)
elif node.user_id != request.user.id:
#response_data = {"log": "Unauthorized"}
#return JsonHttpResponse(response_data, status=403)
raise APIException("403 Unauthorized")
else:
return node
def format_response(parent, records):
#print(records)
return { "parent": format_parent(parent),
"records": format_records(records),
"count":len(records)
}
from django.core.exceptions import *
from .api import * #APIView, APIException entre autres
from gargantext.util.db import session
from gargantext.models import Node
from gargantext.util.http import *
class CorpusView(APIView):
'''API endpoint that represent a corpus'''
def get(self, request, project_id, corpus_id, view = "DOCUMENT"):
'''GET corpus detail
default view full documents
'''
params = get_parameters(request)
if "view" in params.keys():
filter_view = params["view"].upper()
if view in ["DOCUMENT", "JOURNAL", "TITLE", "ANALYTICS", "RESSOURCE"]:
view = filter_view
project = session.query(Node).filter(Node.id == project_id, Node.typename == "PROJECT").first()
check_rights(request, project.id)
if project is None:
return Response({'detail' : "PROJECT Node #%s not found" %(project_id) },
status = status.HTTP_404_NOT_FOUND)
corpus = session.query(Node).filter(Node.id == corpus_id, Node.typename == "CORPUS").first()
if corpus is None:
return Response({'detail' : "CORPUS Node #%s not found" %(corpus_id) },
status = status.HTTP_404_NOT_FOUND)
documents = session.query(Node).filter(Node.parent_id == corpus_id, Node.typename == view).all()
context = format_response(corpus, documents)
return Response(context)
def delete(self, request, project_id, corpus_id):
'''DELETE corpus'''
print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>delete")
# project = session.query(Node).filter(Node.id == project_id, Node.typename == "PROJECT").first()
# check_rights(request, project.id)
# if project is None:
# return Response({'detail' : "PROJECT Node #%s not found" %(project_id) },
# status = status.HTTP_404_NOT_FOUND)
corpus = session.query(Node).filter(Node.id == corpus_id, Node.typename == "CORPUS").first()
if corpus is None:
return Response({'detail' : "CORPUS Node #%s not found" %(corpus_id) },
status = status.HTTP_404_NOT_FOUND)
documents = session.query(Node).filter(Node.parent_id == corpus_id).all()
session.delete(documents)
session.delete(corpus)
session.commit()
return Response(detail="Deleted corpus #%s" %str(corpus_id), status=HTTP_204_NO_CONTENT)
def put(self, request, project_id, corpus_id, view="DOCUMENT"):
'''UPDATE corpus'''
project = session.query(Node).filter(Node.id == project_id, Node.typename == "PROJECT").first()
project = check_rights(request, project.id)
if project is None:
return Response({'detail' : "PROJECT Node #%s not found" %(project_id) },
status = status.HTTP_404_NOT_FOUND)
corpus = session.query(Node).filter(Node.id == corpus_id, Node.typename == "CORPUS").first()
if corpus is None:
return Response({'detail' : "CORPUS Node #%s not found" %(corpus_id) },
status = status.HTTP_404_NOT_FOUND)
#documents = session.query(Node).filter(Node.parent_id == corpus_id, Node.typename= view).all()
for key, val in request.data.items():
if key in ["name", "date", "username", "hyperdata"]:
if key == "username":
#changement de propriétaire
#user = session.query(Node).filter(Node.typename=="USER", Node.username== username).first()
#print(user)
#set(node, user_id, user.id)
pass
elif key == "hyperdata":
#updating some contextualvalues of the corpus
pass
else:
setattr(node, key, val)
session.add(node)
session.commit()
'''#updating children???
'''
return Response({"detail":"Updated corpus #" %str(corpus.id)}, status=HTTP_202_ACCEPTED)
def post(self, request, project_id, corpus_id):
'''ADD a new RESOURCE to CORPUS'''
project = session.query(Node).filter(Node.id == project_id, Node.typename == "PROJECT").first()
check_rights(request, project.id)
if project is None:
return Response({'detail' : "PROJECT Node #%s not found" %(project_id) },
status = status.HTTP_404_NOT_FOUND)
corpus = session.query(Node).filter(Node.id == corpus_id, Node.typename == "CORPUS").first()
if corpus is None:
return Response({'detail' : "CORPUS Node #%s not found" %(corpus_id) },
status = status.HTTP_404_NOT_FOUND)
from gargantext.models import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNode from gargantext.models import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNode
from gargantext.constants import NODETYPES, DEFAULT_N_DOCS_HAVING_NGRAM from gargantext.constants import NODETYPES
from gargantext.util.db import session, delete, func, bulk_insert from gargantext.util.db import session, delete, func, bulk_insert
from gargantext.util.db_cache import cache, or_ from gargantext.util.db_cache import cache, or_
from gargantext.util.validation import validate from gargantext.util.validation import validate
...@@ -8,7 +8,7 @@ from gargantext.util.http import ValidationException, APIView \ ...@@ -8,7 +8,7 @@ from gargantext.util.http import ValidationException, APIView \
, get_parameters, JsonHttpResponse, Http404\ , get_parameters, JsonHttpResponse, Http404\
, HttpResponse , HttpResponse
from .api import *
from collections import defaultdict from collections import defaultdict
import csv import csv
...@@ -67,6 +67,48 @@ def _query_nodes(request, node_id=None): ...@@ -67,6 +67,48 @@ def _query_nodes(request, node_id=None):
# return the result! # return the result!
return parameters, query, count return parameters, query, count
class Status(APIView):
'''API endpoint that represent the current status of the node'''
renderer_classes = (JSONRenderer, BrowsableAPIRenderer)
def get(self, request, node_id):
user = cache.User[request.user.id]
check_rights(request, node_id)
node = session.query(Node).filter(Node.id == node_id, Node.user_id== user.id).first()
if node is None:
return Response({"detail":"Node not Found for this user"}, status=HTTP_404_NOT_FOUND)
else:
context = format_response(node, [n for n in node.children()])
try:
context["status"] = node.hyperdata["statuses"]
except KeyError:
context["status"] = None
return Response(context)
def post(self, request, data):
'''create a new status for node'''
raise NotImplementedError
def put(self, request, data):
'''update status for node'''
user = cache.User[request.user.id]
check_rights(request, node_id)
node = session.query(Node).filter(Node.id == node_id).first()
raise NotImplementedError
#return Response({"detail":"Udpated status for NODE #%i " %node.id}, status=HTTP_202_ACCEPTED)
def delete(self, request):
'''delete status for node'''
user = cache.User[request.user.id]
check_rights(request, node_id)
node = session.query(Node).filter(Node.id == node_id).first()
if node is None:
return Response({"detail":"Node not Found"}, status=HTTP_404_NOT_FOUND)
node.hyperdata["status"] = []
session.add(node)
session.commit()
return Response({"detail":"Deleted status for NODE #%i " %node.id}, status=HTTP_204_NO_CONTENT)
class NodeListResource(APIView): class NodeListResource(APIView):
...@@ -143,8 +185,6 @@ class NodeListHaving(APIView): ...@@ -143,8 +185,6 @@ class NodeListHaving(APIView):
Simple implementation: Simple implementation:
Takes IDs of corpus and ngram and returns list of relevent documents in json format Takes IDs of corpus and ngram and returns list of relevent documents in json format
according to TFIDF score (order is decreasing). according to TFIDF score (order is decreasing).
2016-09: add total counts to output json
''' '''
def get(self, request, corpus_id): def get(self, request, corpus_id):
parameters = get_parameters(request) parameters = get_parameters(request)
...@@ -155,7 +195,7 @@ class NodeListHaving(APIView): ...@@ -155,7 +195,7 @@ class NodeListHaving(APIView):
except : except :
raise ValidationException('"ngram_ids" needs integers separated by comma.') raise ValidationException('"ngram_ids" needs integers separated by comma.')
limit = DEFAULT_N_DOCS_HAVING_NGRAM limit=5
nodes_list = [] nodes_list = []
corpus = session.query(Node).filter(Node.id==corpus_id).first() corpus = session.query(Node).filter(Node.id==corpus_id).first()
...@@ -178,18 +218,26 @@ class NodeListHaving(APIView): ...@@ -178,18 +218,26 @@ class NodeListHaving(APIView):
.filter(Node.typename == 'DOCUMENT', Node.parent_id== corpus.id) .filter(Node.typename == 'DOCUMENT', Node.parent_id== corpus.id)
.filter(or_(*[NodeNodeNgram.ngram_id==ngram_id for ngram_id in ngram_ids])) .filter(or_(*[NodeNodeNgram.ngram_id==ngram_id for ngram_id in ngram_ids]))
.group_by(Node) .group_by(Node)
.order_by(func.sum(NodeNodeNgram.score).desc())
.limit(limit)
) )
# print("\n")
# get the total count before applying limit # print("in TFIDF:")
nodes_count = nodes_query.count() # print("\tcorpus_id:",corpus_id)
# convert query result to a list of dicts
# now the query with the limit # if nodes_query is None:
nodes_results_query = (nodes_query # print("TFIDF error, juste take sums")
.order_by(func.sum(NodeNodeNgram.score).desc()) # nodes_query = (session
.limit(limit) # .query(Node, func.sum(NodeNgram.weight))
) # .join(NodeNgram, NodeNgram.node_id == Node.id)
# .filter(Node.parent_id == corpus_id)
for node, score in nodes_results_query: # .filter(Node.typename == 'DOCUMENT')
# .filter(or_(*[NodeNgram.ngram_id==ngram_id for ngram_id in ngram_ids]))
# .group_by(Node)
# .order_by(func.sum(NodeNgram.weight).desc())
# .limit(limit)
# )
for node, score in nodes_query:
print(node,score) print(node,score)
print("\t corpus:",corpus_id,"\t",node.name) print("\t corpus:",corpus_id,"\t",node.name)
node_dict = { node_dict = {
...@@ -201,10 +249,7 @@ class NodeListHaving(APIView): ...@@ -201,10 +249,7 @@ class NodeListHaving(APIView):
node_dict[key] = node.hyperdata[key] node_dict[key] = node.hyperdata[key]
nodes_list.append(node_dict) nodes_list.append(node_dict)
return JsonHttpResponse({ return JsonHttpResponse(nodes_list)
'count': nodes_count,
'records': nodes_list
})
...@@ -438,8 +483,7 @@ class CorpusFacet(APIView): ...@@ -438,8 +483,7 @@ class CorpusFacet(APIView):
# check that the hyperfield parameter makes sense # check that the hyperfield parameter makes sense
_facet_available_subfields = [ _facet_available_subfields = [
'journal', 'publication_year', 'rubrique', 'journal', 'publication_year', 'rubrique',
'language_iso2', 'language_iso3', 'language_name', 'language_iso2', 'language_iso3', 'language_name'
'authors'
] ]
parameters = get_parameters(request) parameters = get_parameters(request)
......
from django.conf.urls import url from django.conf.urls import url
from . import nodes from . import nodes
from . import projects
from . import corpora
from . import ngrams from . import ngrams
from . import metrics from . import metrics
from . import ngramlists from . import ngramlists
...@@ -10,7 +12,33 @@ from graph.rest import Graph ...@@ -10,7 +12,33 @@ from graph.rest import Graph
urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view() ) urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view() )
, url(r'^nodes/(\d+)$' , nodes.NodeResource.as_view() ) , url(r'^nodes/(\d+)$' , nodes.NodeResource.as_view() )
, url(r'^nodes/(\d+)/having$' , nodes.NodeListHaving.as_view() ) , url(r'^nodes/(\d+)/having$' , nodes.NodeListHaving.as_view() )
, url(r'^nodes/(\d+)/status$' , nodes.Status.as_view() )
#Projects
, url(r'^projects$' , projects.ProjectList.as_view() )
, url(r'^projects/(\d+)$' , projects.ProjectView.as_view() )
#?view=resource
#?view=docs
#Corpora
, url(r'^projects/(\d+)/corpora/(\d+)$' , corpora.CorpusView.as_view() )
#?view=journal
#?view=title
#?view=analytics
#Sources
#, url(r'^projects/(\d+)/corpora/(\d+)/sources$' , corpora.CorpusSources.as_view() )
#, url(r'^projects/(\d+)/corpora/(\d+)/sources/(\d+)$' , corpora.CorpusSourceView.as_view() )
#Facets
, url(r'^projects/(\d+)/corpora/(\d+)/facets$' , nodes.CorpusFacet.as_view() )
#Favorites
, url(r'^projects/(\d+)/corpora/(\d+)/favorites$', nodes.CorpusFavorites.as_view() )
#Metrics
, url(r'^projects/(\d+)/corpora/(\d+)/metrics$', metrics.CorpusMetrics.as_view() )
#GraphExplorer
, url(r'^projects/(\d+)/corpora/(\d+)/explorer$' , Graph.as_view())
# data for graph explorer (json)
# GET /api/projects/43198/corpora/111107/explorer?
# Corresponding view is : /projects/43198/corpora/111107/explorer?
# Parameters (example):
# explorer?field1=ngrams&field2=ngrams&distance=conditional&bridgeness=5&start=1996-6-1&end=2002-10-5
# Ngrams # Ngrams
, url(r'^ngrams/?$' , ngrams.ApiNgrams.as_view() ) , url(r'^ngrams/?$' , ngrams.ApiNgrams.as_view() )
...@@ -63,10 +91,5 @@ urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view() ...@@ -63,10 +91,5 @@ urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view()
, url(r'^ngramlists/maplist$' , ngramlists.MapListGlance.as_view() ) , url(r'^ngramlists/maplist$' , ngramlists.MapListGlance.as_view() )
# fast access to maplist, similarly formatted for termtable # fast access to maplist, similarly formatted for termtable
, url(r'^projects/(\d+)/corpora/(\d+)/explorer$' , Graph.as_view())
# data for graph explorer (json)
# GET /api/projects/43198/corpora/111107/explorer?
# Corresponding view is : /projects/43198/corpora/111107/explorer?
# Parameters (example):
# explorer?field1=ngrams&field2=ngrams&distance=conditional&bridgeness=5&start=1996-6-1&end=2002-10-5
] ]
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment