Commit fef1f1b1 authored by delanoe's avatar delanoe

Merge branch 'romain-refactoring' into refactoring

parents ef4ba14a bbaa365b
...@@ -36,6 +36,8 @@ NODETYPES = [ ...@@ -36,6 +36,8 @@ NODETYPES = [
'CVALUE', # 12 'CVALUE', # 12
'TFIDF-CORPUS', # 13 'TFIDF-CORPUS', # 13
'TFIDF-GLOBAL', # 14 'TFIDF-GLOBAL', # 14
# docs subset
'FAVORITES' # 15
] ]
INDEXED_HYPERDATA = { INDEXED_HYPERDATA = {
......
...@@ -124,6 +124,12 @@ class Node(Base): ...@@ -124,6 +124,12 @@ class Node(Base):
def resources(self): def resources(self):
"""Return all the resources attached to a given node. """Return all the resources attached to a given node.
Mainly used for corpora. Mainly used for corpora.
example:
[{'extracted': True,
'path': '/home/me/gargantext/uploads/corpora/0c/0c5b/0c5b50/0c5b50ad8ebdeb2ae33d8e54141a52ee_Corpus_Europresse-Français-2015-12-11.zip',
'type': 1,
'url': None}]
""" """
if 'resources' not in self.hyperdata: if 'resources' not in self.hyperdata:
self['resources'] = MutableList() self['resources'] = MutableList()
...@@ -132,6 +138,14 @@ class Node(Base): ...@@ -132,6 +138,14 @@ class Node(Base):
def add_resource(self, type, path=None, url=None): def add_resource(self, type, path=None, url=None):
"""Attach a resource to a given node. """Attach a resource to a given node.
Mainly used for corpora. Mainly used for corpora.
this just adds metadata to the CORPUS node (NOT for adding documents)
example:
{'extracted': True,
'path': '/home/me/gargantext/uploads/corpora/0c/0c5b/0c5b50/0c5b50ad8ebdeb2ae33d8e54141a52ee_Corpus_Europresse-Français-2015-12-11.zip',
'type': 1,
'url': None}
""" """
self.resources().append(MutableDict( self.resources().append(MutableDict(
{'type': type, 'path':path, 'url':url, 'extracted': False} {'type': type, 'path':path, 'url':url, 'extracted': False}
...@@ -173,10 +187,9 @@ class Node(Base): ...@@ -173,10 +187,9 @@ class Node(Base):
{'action':action, 'progress':progress, 'complete':complete, 'error':error, 'date':date} {'action':action, 'progress':progress, 'complete':complete, 'error':error, 'date':date}
)) ))
return self['statuses'][-1] return self['statuses'][-1]
class NodeNode(Base): class NodeNode(Base):
__tablename__ = 'nodes_nodes' __tablename__ = 'nodes_nodes'
id = Column(Integer, primary_key=True)
node1_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE'), primary_key=True) node1_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE'), primary_key=True)
node2_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE'), primary_key=True) node2_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE'), primary_key=True)
score = Column(Float(precision=24)) score = Column(Float(precision=24))
...@@ -56,7 +56,15 @@ def parse_extract_indexhyperdata(corpus): ...@@ -56,7 +56,15 @@ def parse_extract_indexhyperdata(corpus):
print('CORPUS #%d: extracted ngrams' % (corpus.id)) print('CORPUS #%d: extracted ngrams' % (corpus.id))
index_hyperdata(corpus) index_hyperdata(corpus)
print('CORPUS #%d: indexed hyperdata' % (corpus.id)) print('CORPUS #%d: indexed hyperdata' % (corpus.id))
# -> 'favorites' node
favs = corpus.add_child(
typename='FAVORITES', name='favorite docs in "%s"' % corpus.name
)
session.add(favs)
session.commit()
print('CORPUS #%d: [%s] new favorites node #%i' % (corpus.id, t(), favs.id))
# ------------------------------- # -------------------------------
# temporary ngram lists workflow # temporary ngram lists workflow
# ------------------------------- # -------------------------------
......
from gargantext.models import Node, Ngram, NodeNgram, NodeNodeNgram from gargantext.models import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNode
from gargantext.constants import NODETYPES from gargantext.constants import NODETYPES
from gargantext.util.db import session, delete, func from gargantext.util.db import session, delete, func, bulk_insert
from gargantext.util.db_cache import cache, or_ from gargantext.util.db_cache import cache, or_
from gargantext.util.validation import validate from gargantext.util.validation import validate
from gargantext.util.http import ValidationException, APIView \ from gargantext.util.http import ValidationException, APIView \
, get_parameters, JsonHttpResponse, Http404 , get_parameters, JsonHttpResponse, Http404\
, HttpResponse
from collections import defaultdict from collections import defaultdict
...@@ -73,7 +74,7 @@ class NodeListResource(APIView): ...@@ -73,7 +74,7 @@ class NodeListResource(APIView):
] ]
}) })
def post(self, request): def post(self, request):
"""Create a new node. """Create a new node.
NOT IMPLEMENTED NOT IMPLEMENTED
...@@ -113,7 +114,7 @@ class NodeListHaving(APIView): ...@@ -113,7 +114,7 @@ class NodeListHaving(APIView):
def get(self, request, corpus_id): def get(self, request, corpus_id):
parameters = get_parameters(request) parameters = get_parameters(request)
parameters = validate(parameters, {'score': str, 'ngram_ids' : list} ) parameters = validate(parameters, {'score': str, 'ngram_ids' : list} )
try : try :
ngram_ids = [int(n) for n in parameters['ngram_ids'].split(',')] ngram_ids = [int(n) for n in parameters['ngram_ids'].split(',')]
except : except :
...@@ -121,9 +122,9 @@ class NodeListHaving(APIView): ...@@ -121,9 +122,9 @@ class NodeListHaving(APIView):
limit=5 limit=5
nodes_list = [] nodes_list = []
corpus = session.query(Node).filter(Node.id==corpus_id).first() corpus = session.query(Node).filter(Node.id==corpus_id).first()
tfidf_id = ( session.query( Node.id ) tfidf_id = ( session.query( Node.id )
.filter( Node.typename == "TFIDF-CORPUS" .filter( Node.typename == "TFIDF-CORPUS"
, Node.parent_id == corpus.id , Node.parent_id == corpus.id
...@@ -131,7 +132,7 @@ class NodeListHaving(APIView): ...@@ -131,7 +132,7 @@ class NodeListHaving(APIView):
.first() .first()
) )
tfidf_id = tfidf_id[0] tfidf_id = tfidf_id[0]
print(tfidf_id) print(tfidf_id)
# request data # request data
...@@ -199,6 +200,119 @@ class NodeResource(APIView): ...@@ -199,6 +200,119 @@ class NodeResource(APIView):
return JsonHttpResponse({'deleted': result.rowcount}) return JsonHttpResponse({'deleted': result.rowcount})
class CorpusFavorites(APIView):
"""Retrieve/update/delete a corpus node's associated favorite docs
(url: GET /api/nodes/<corpus_id>/favorites)
(url: DEL /api/nodes/<corpus_id>/favorites?docs[]=doc1,doc2)
(url: PUT /api/nodes/<corpus_id>/favorites?docs[]=doc1,doc2)
"""
def _get_fav_node(self, corpus_id):
"""
NB: fav_node can be None if no node is defined
this query could be faster if we didn't check that corpus_id is a CORPUS
ie: session.query(Node)
.filter(Node.parent_id==corpus_id)
.filter(Node.typename =='FAVORITES')
"""
corpus = cache.Node[corpus_id]
if corpus.typename != 'CORPUS':
raise ValidationException(
"Only nodes of type CORPUS can accept favorites queries" +
" (but this node has type %s)..." % corpus.typename)
else:
self.corpus = corpus
fav_node = self.corpus.children('FAVORITES').first()
return fav_node
def get(self, request, corpus_id):
response = {}
fav_node = self._get_fav_node(corpus_id)
if fav_node == None:
response = {
'warning':'No favorites node is defined for this corpus (\'%s\')'
% self.corpus.name ,
'doc_ids':[]
}
else:
# each docnode associated to the favnode of this corpusnode
q = (session
.query(NodeNode.node2_id)
.filter(NodeNode.node1_id==fav_node.id))
doc_ids = [row.node2_id for row in q.all()]
response = {
'doc_ids': doc_ids
}
return JsonHttpResponse(response)
def delete(self, request, corpus_id):
"""
DELETE http://localhost:8000/api/nodes/2/favorites?docs=53,54
(will delete docs 53 and 54 from the favorites of corpus 2)
"""
# if not request.user.is_authenticated():
# # can't use @requires_auth because of positional 'self' within class
# return HttpResponse('Unauthorized', status=401)
# user is ok
fav_node = self._get_fav_node(corpus_id)
req_params = validate(
get_parameters(request),
{'docs': list, 'default': ""}
)
nodeids_to_delete = req_params['docs'].split(',')
# it deletes from favourites but not from DB
result = session.execute(
delete(NodeNode)
.where(NodeNode.node1_id == fav_node.id)
.where(NodeNode.node2_id.in_(nodeids_to_delete))
)
session.commit()
return JsonHttpResponse({'count_removed': result.rowcount})
def put(self, request, corpus_id, check_each_doc=True):
# if not request.user.is_authenticated():
# # can't use @requires_auth because of positional 'self' within class
# return HttpResponse('Unauthorized', status=401)
# user is ok
fav_node = self._get_fav_node(corpus_id)
req_params = validate(
get_parameters(request),
{'docs': list, 'default': ""}
)
nodeids_to_add = req_params['docs'].split(',')
if check_each_doc:
# verification que ce sont bien des documents du bon corpus
# un peu long => désactiver par défaut ?
known_docs_q = (session
.query(Node.id)
.filter(Node.parent_id==corpus_id)
.filter(Node.typename=='DOCUMENT')
)
lookup = {known_doc.id:True for known_doc in known_docs_q.all()}
rejected_list = []
for doc_node_id in nodeids_to_add:
if (doc_node_id not in lookup):
rejected_list.append(doc_node_id)
if len(rejected_list):
raise ValidationException(
"Error on some requested docs: %s (Only nodes of type 'doc' AND belonging to corpus %i can be added to favorites.)"
% (str(rejected_list), int(corpus_id)))
# add them
bulk_insert(
NodeNode,
('node1_id', 'node2_id', 'score'),
((fav_node.id, doc_node_id, 1.0 ) for doc_node_id in nodeids_to_add)
)
return JsonHttpResponse({'count_added': len(nodeids_to_add)})
class CorpusFacet(APIView): class CorpusFacet(APIView):
"""Loop through a corpus node's docs => do counts by a hyperdata field """Loop through a corpus node's docs => do counts by a hyperdata field
(url: /api/nodes/<node_id>/facets?hyperfield=<journal>) (url: /api/nodes/<node_id>/facets?hyperfield=<journal>)
...@@ -264,6 +378,3 @@ class CorpusFacet(APIView): ...@@ -264,6 +378,3 @@ class CorpusFacet(APIView):
# // if subfield not in corpus.aggs: # // if subfield not in corpus.aggs:
# // corpus.aggs[subfield] = xcounts # // corpus.aggs[subfield] = xcounts
return (xcounts, total) return (xcounts, total)
...@@ -5,11 +5,14 @@ from . import ngramlists ...@@ -5,11 +5,14 @@ from . import ngramlists
urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view()) urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view())
, url(r'^nodes/(\d+)$' , nodes.NodeResource.as_view() ) , url(r'^nodes/(\d+)$' , nodes.NodeResource.as_view() )
, url(r'^nodes/(\d+)/facets$' , nodes.CorpusFacet.as_view() )
, url(r'^nodes/(\d+)/having$' , nodes.NodeListHaving.as_view() ) , url(r'^nodes/(\d+)/having$' , nodes.NodeListHaving.as_view() )
# get a list of ngram_ids or ngram_infos by list_id # get a list of ngram_ids or ngram_infos by list_id
# url(r'^ngramlists/(\d+)$', ngramlists.List.as_view()), # url(r'^ngramlists/(\d+)$', ngramlists.List.as_view()),
, url(r'^nodes/(\d+)/facets$' , nodes.CorpusFacet.as_view() )
, url(r'^nodes/(\d+)/favorites$', nodes.CorpusFavorites.as_view() )
# in these two routes the node is supposed to be a *corpus* node
, url(r'^ngramlists/change$', ngramlists.ListChange.as_view() ) , url(r'^ngramlists/change$', ngramlists.ListChange.as_view() )
# add or remove ngram from a list # add or remove ngram from a list
# ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2 # ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment