Commit b7f451ba authored by Mathieu Rodic's avatar Mathieu Rodic

[FEATURE] Creating a route to GET duplicate documents

https://forge.iscpif.fr/issues/1360
parent 403a670d
......@@ -136,9 +136,8 @@ _ngrams_order_columns = {
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework.exceptions import APIException
from rest_framework.exceptions import APIException as _APIException
_APIException = APIException
class APIException(_APIException):
def __init__(self, message, code=500):
self.status_code = code
......@@ -167,6 +166,53 @@ def Root(request, format=None):
})
class NodesChildrenDuplicates(APIView):
def get(self, request, node_id):
# input validation
if 'keys' not in request.GET:
raise APIException('Missing GET parameter: "keys"', 400)
keys = request.GET['keys'].split(',')
# metadata retrieval
metadata_query = (Metadata
.query(Metadata)
.filter(Metadata.name.in_(keys))
)
# build query elements
columns = []
aliases = []
for metadata in metadata_query:
# aliases
_Metadata = aliased(Metadata)
_Node_Metadata = aliased(Node_Metadata)
aliases.append(_Node_Metadata)
# what shall we retrieve?
columns.append(
getattr(_Node_Metadata, 'value_' + metadata.type)
)
# build the query!
groups = list(columns)
duplicates_query = (get_session()
.query(*( [func.count()] + columns ))
.select_from(Node)
)
for _Node_Metadata, metadata in zip(aliases, metadata_query):
duplicates_query = duplicates_query.outerjoin(_Node_Metadata, _Node_Metadata.node_id == Node.id)
duplicates_query = duplicates_query.filter(_Node_Metadata.metadata_id == metadata.id)
duplicates_query = duplicates_query.filter(Node.parent_id == node_id)
duplicates_query = duplicates_query.group_by(*columns)
duplicates_query = duplicates_query.order_by(func.count().desc())
duplicates_query = duplicates_query.having(func.count() > 1)
# return results
return JsonHttpResponse([
{
'count': duplicate[0],
'values': duplicate[1:],
}
for duplicate in duplicates_query
])
class NodesChildrenMetatadata(APIView):
def get(self, request, node_id):
......
......@@ -48,6 +48,7 @@ urlpatterns = patterns('',
url(r'^api$', gargantext_web.api.Root),
url(r'^api/nodes/(\d+)/children/metadata$', gargantext_web.api.NodesChildrenMetatadata.as_view()),
url(r'^api/nodes/(\d+)/children/queries$', gargantext_web.api.NodesChildrenQueries.as_view()),
url(r'^api/nodes/(\d+)/children/duplicates$', gargantext_web.api.NodesChildrenDuplicates.as_view()),
url(r'^api/nodes/(\d+)$', gargantext_web.api.Nodes.as_view()),
url(r'^api/nodes$', gargantext_web.api.NodesList.as_view()),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment