[FEAT] new view/route for adding an ngram via PUT...

[FEAT] new view/route for adding an ngram via PUT 'api/ngrams/?text=mynewngramstring' or adding and indexing via PUT 'api/ngrams/?text=mynewngramstring&corpus=42'

[FEAT] new view/route for adding an ngram via PUT...
[FEAT] new view/route for adding an ngram via PUT 'api/ngrams/?text=mynewngramstring' or adding and indexing via PUT 'api/ngrams/?text=mynewngramstring&corpus=42'
95e0f482 · Romain Loth · b37575f7 · 95e0f482
Commit 95e0f482 authored Jul 21, 2016 by Romain Loth
Hide whitespace changes
Inline Side-by-side

Showing with 116 additions and 0 deletions

analytics.py gargantext/views/api/analytics.py +116 -0

No files found.
--- a/gargantext/views/api/analytics.py
+++ b/gargantext/views/api/analytics.py
@@ -267,6 +267,9 @@ class NodeNgramsQueries(APIView):


 # ?? TODO put in an ngrams.py file separately ?
+# remark: 
+#     the post() function is not used for analytics
+#     but was added here to use the same url "api/ngrams"
 class ApiNgrams(APIView):

    def get(self, request):
@@ -323,6 +326,119 @@ class ApiNgrams(APIView):
        })


+    def put(self, request):
+        """
+        Basic external access for *creating an ngram*
+        ---------------------------------------------
+
+         1 - checks user authentication before any changes
+
+         2 - adds the ngram to Ngram table in DB
+
+         3 - (if corpus param is present)
+             adds the ngram doc counts to NodeNgram table in DB
+             (aka "index the ngram" throught the docs of the corpus)
+
+         4 - returns json with:
+             'msg'   => a success msg 
+             'text'  => the initial text content
+             'term'  => the normalized text content
+             'id'    => the new ngram_id
+             'count' => the number of docs with the ngram in the corpus
+                        (if corpus param is present)
+
+        possible inline parameters
+        --------------------------
+        @param    text=<ngram_string>         [required]
+        @param    corpus=<CORPUS_ID>          [optional]
+        """
+
+        from sqlalchemy     import insert
+        from sqlalchemy.exc import IntegrityError
+        from re             import findall
+        
+        from gargantext.util.db_cache  import cache
+
+        # import will implement the same text cleaning procedures as toolchain
+        from gargantext.util.toolchain.parsing           import normalize_chars
+        from gargantext.util.toolchain.ngrams_extraction import normalize_forms
+
+        # for indexing
+        from gargantext.util.toolchain.ngrams_addition   import index_new_ngrams
+
+        # 1 - check user authentication
+        if not request.user.is_authenticated():
+            res = HttpResponse("Unauthorized")
+            res.status_code = 401
+            return res
+
+        # the params
+        params = get_parameters(request)
+
+        print("PARAMS", [(i,v) for (i,v) in params.items()])
+
+        if 'text' in params:
+            original_text = str(params.pop('text'))
+            ngram_str = normalize_forms(normalize_chars(original_text))
+        else:
+            raise ValidationException('The route PUT /api/ngrams/ is used to create a new ngram\
+                                        It requires a "text" parameter,\
+                                        for instance /api/ngrams?text=hydrometallurgy')
+
+        # if we have a 'corpus' param (to do the indexing)...
+        do_indexation = False
+        if 'corpus' in params:
+            # we retrieve the corpus...
+            corpus_id = int(params.pop('corpus'))
+            corpus_node = cache.Node[corpus_id]
+            # and the user must also have rights on the corpus
+            if request.user.id == corpus_node.user_id:
+                do_indexation = True
+            else:
+                res = HttpResponse("Unauthorized")
+                res.status_code = 401
+                return res
+
+        # number of "words" in the ngram
+        ngram_size = len(findall(r' +', ngram_str)) + 1
+
+        # do the additions
+        try:
+            log_msg = ""
+            ngram_id = None
+            preexisting = session.query(Ngram).filter(Ngram.terms==ngram_str).first()
+            if preexisting is not None:
+                ngram_id = preexisting.id
+                log_msg += "ngram already existed (id %i)\n" % ngram_id
+            else:
+                # 2 - insert into Ngrams
+                new_ngram = Ngram(terms=ngram_str, n=ngram_size)
+                session.add(new_ngram)
+                session.commit()
+                ngram_id = new_ngram.id
+                log_msg += "ngram was added with new id %i\n" % ngram_id
+
+            # 3 - index the term
+            if do_indexation:
+                n_added = index_new_ngrams([ngram_id], corpus_node)
+                log_msg += 'ngram indexed in corpus %i\n' % corpus_id
+
+            return JsonHttpResponse({
+                'msg': log_msg,
+                'text': original_text,
+                'term': ngram_str,
+                'id' : ngram_id,
+                'count': n_added if do_indexation else 'no corpus provided for indexation'
+                }, 200)
+
+        # just in case
+        except Exception as e:
+            return JsonHttpResponse({
+                'msg': str(e),
+                'text': original_text
+                }, 400)
+
+
 _operators_dict = {
    "=":                lambda field, value: (field == value),
    "!=":               lambda field, value: (field != value),