Commit 95e0f482 authored by Romain Loth's avatar Romain Loth

[FEAT] new view/route for adding an ngram via PUT...

[FEAT] new view/route for adding an ngram via PUT 'api/ngrams/?text=mynewngramstring' or adding and indexing via PUT 'api/ngrams/?text=mynewngramstring&corpus=42'
parent b37575f7
......@@ -267,6 +267,9 @@ class NodeNgramsQueries(APIView):
# ?? TODO put in an ngrams.py file separately ?
# remark:
# the post() function is not used for analytics
# but was added here to use the same url "api/ngrams"
class ApiNgrams(APIView):
def get(self, request):
......@@ -323,6 +326,119 @@ class ApiNgrams(APIView):
})
def put(self, request):
"""
Basic external access for *creating an ngram*
---------------------------------------------
1 - checks user authentication before any changes
2 - adds the ngram to Ngram table in DB
3 - (if corpus param is present)
adds the ngram doc counts to NodeNgram table in DB
(aka "index the ngram" throught the docs of the corpus)
4 - returns json with:
'msg' => a success msg
'text' => the initial text content
'term' => the normalized text content
'id' => the new ngram_id
'count' => the number of docs with the ngram in the corpus
(if corpus param is present)
possible inline parameters
--------------------------
@param text=<ngram_string> [required]
@param corpus=<CORPUS_ID> [optional]
"""
from sqlalchemy import insert
from sqlalchemy.exc import IntegrityError
from re import findall
from gargantext.util.db_cache import cache
# import will implement the same text cleaning procedures as toolchain
from gargantext.util.toolchain.parsing import normalize_chars
from gargantext.util.toolchain.ngrams_extraction import normalize_forms
# for indexing
from gargantext.util.toolchain.ngrams_addition import index_new_ngrams
# 1 - check user authentication
if not request.user.is_authenticated():
res = HttpResponse("Unauthorized")
res.status_code = 401
return res
# the params
params = get_parameters(request)
print("PARAMS", [(i,v) for (i,v) in params.items()])
if 'text' in params:
original_text = str(params.pop('text'))
ngram_str = normalize_forms(normalize_chars(original_text))
else:
raise ValidationException('The route PUT /api/ngrams/ is used to create a new ngram\
It requires a "text" parameter,\
for instance /api/ngrams?text=hydrometallurgy')
# if we have a 'corpus' param (to do the indexing)...
do_indexation = False
if 'corpus' in params:
# we retrieve the corpus...
corpus_id = int(params.pop('corpus'))
corpus_node = cache.Node[corpus_id]
# and the user must also have rights on the corpus
if request.user.id == corpus_node.user_id:
do_indexation = True
else:
res = HttpResponse("Unauthorized")
res.status_code = 401
return res
# number of "words" in the ngram
ngram_size = len(findall(r' +', ngram_str)) + 1
# do the additions
try:
log_msg = ""
ngram_id = None
preexisting = session.query(Ngram).filter(Ngram.terms==ngram_str).first()
if preexisting is not None:
ngram_id = preexisting.id
log_msg += "ngram already existed (id %i)\n" % ngram_id
else:
# 2 - insert into Ngrams
new_ngram = Ngram(terms=ngram_str, n=ngram_size)
session.add(new_ngram)
session.commit()
ngram_id = new_ngram.id
log_msg += "ngram was added with new id %i\n" % ngram_id
# 3 - index the term
if do_indexation:
n_added = index_new_ngrams([ngram_id], corpus_node)
log_msg += 'ngram indexed in corpus %i\n' % corpus_id
return JsonHttpResponse({
'msg': log_msg,
'text': original_text,
'term': ngram_str,
'id' : ngram_id,
'count': n_added if do_indexation else 'no corpus provided for indexation'
}, 200)
# just in case
except Exception as e:
return JsonHttpResponse({
'msg': str(e),
'text': original_text
}, 400)
_operators_dict = {
"=": lambda field, value: (field == value),
"!=": lambda field, value: (field != value),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment