[TIDY] move ApiNgrams class from views/api/analytics.py to a new file:...

[TIDY] move ApiNgrams class from views/api/analytics.py to a new file: views/api/ngrams.py because it corresponds to the generic route /api/ngrams and not only for analytics usage

[TIDY] move ApiNgrams class from views/api/analytics.py to a new file:...
[TIDY] move ApiNgrams class from views/api/analytics.py to a new file: views/api/ngrams.py because it corresponds to the generic route /api/ngrams and not only for analytics usage
ff38f3d1 · Romain Loth · 213d626b · ff38f3d1 · ff38f3d1 · ff38f3d1
Commit ff38f3d1 authored Jul 21, 2016 by Romain Loth
Hide whitespace changes
Inline Side-by-side

Showing with 180 additions and 177 deletions

analytics.py gargantext/views/api/analytics.py +0 -176

ngrams.py gargantext/views/api/ngrams.py +176 -0

urls.py gargantext/views/api/urls.py +4 -1

No files found.
--- a/gargantext/views/api/analytics.py
+++ b/gargantext/views/api/analytics.py
 from gargantext.util.http       import ValidationException, APIView \
                                     , get_parameters, JsonHttpResponse, Http404\
                                     , HttpResponse
@@ -265,180 +263,6 @@ class NodeNgramsQueries(APIView):
            return CsvHttpResponse(sorted(result.items()), ('date', 'value'), 201)
-# ?? TODO put in an ngrams.py file separately ?
-# remark: 
-#     the post() function is not used for analytics
-#     but was added here to use the same url "api/ngrams"
-class ApiNgrams(APIView):
-    def get(self, request):
-        # parameters retrieval and validation
-        startwith = request.GET.get('startwith', '').replace("'", "\\'")
-        # query ngrams
-        ParentNode = aliased(Node)
-        ngrams_query = (session
-            .query(Ngram.id, Ngram.terms, func.sum(NodeNgram.weight).label('count'))
-            .join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
-            .join(Node, Node.id == NodeNgram.node_id)
-            .group_by(Ngram.id, Ngram.terms)
-            # .group_by(Ngram)
-            .order_by(func.sum(NodeNgram.weight).desc(), Ngram.terms)
-        )
-        # filters
-        if 'startwith' in request.GET:
-            ngrams_query = ngrams_query.filter(Ngram.terms.startswith(request.GET['startwith']))
-        if 'contain' in request.GET:
-            print("request.GET['contain']")
-            print(request.GET['contain'])
-            ngrams_query = ngrams_query.filter(Ngram.terms.contains(request.GET['contain']))
-        if 'corpus_id' in request.GET:
-            corpus_id_list = list(map(int, request.GET.get('corpus_id', '').split(',')))
-            if corpus_id_list and corpus_id_list[0]:
-                ngrams_query = ngrams_query.filter(Node.parent_id.in_(corpus_id_list))
-        if 'ngram_id' in request.GET:
-            ngram_id_list = list(map(int, request.GET.get('ngram_id', '').split(',')))
-            if ngram_id_list and ngram_id_list[0]:
-                ngrams_query = ngrams_query.filter(Ngram.id.in_(ngram_id_list))
-        # pagination
-        offset = int(request.GET.get('offset', 0))
-        limit = int(request.GET.get('limit', 20))
-        total = ngrams_query.count()
-        # return formatted result
-        return JsonHttpResponse({
-            'pagination': {
-                'offset': offset,
-                'limit': limit,
-                'total': total,
-            },
-            'data': [
-                {
-                    'id': ngram.id,
-                    'terms': ngram.terms,
-                    'count': ngram.count,
-                }
-                for ngram in ngrams_query[offset : offset+limit]
-            ],
-        })
-    def put(self, request):
-        """
-        Basic external access for *creating an ngram*
-        ---------------------------------------------
-         1 - checks user authentication before any changes
-         2 - adds the ngram to Ngram table in DB
-         3 - (if corpus param is present)
-             adds the ngram doc counts to NodeNgram table in DB
-             (aka "index the ngram" throught the docs of the corpus)
-         4 - returns json with:
-             'msg'   => a success msg 
-             'text'  => the initial text content
-             'term'  => the normalized text content
-             'id'    => the new ngram_id
-             'count' => the number of docs with the ngram in the corpus
-                        (if corpus param is present)
-        possible inline parameters
-        --------------------------
-        @param    text=<ngram_string>         [required]
-        @param    corpus=<CORPUS_ID>          [optional]
-        """
-        from sqlalchemy     import insert
-        from sqlalchemy.exc import IntegrityError
-        from re             import findall
-        from gargantext.util.db_cache  import cache
-        # import will implement the same text cleaning procedures as toolchain
-        from gargantext.util.toolchain.parsing           import normalize_chars
-        from gargantext.util.toolchain.ngrams_extraction import normalize_forms
-        # for indexing
-        from gargantext.util.toolchain.ngrams_addition   import index_new_ngrams
-        # 1 - check user authentication
-        if not request.user.is_authenticated():
-            res = HttpResponse("Unauthorized")
-            res.status_code = 401
-            return res
-        # the params
-        params = get_parameters(request)
-        print("PARAMS", [(i,v) for (i,v) in params.items()])
-        if 'text' in params:
-            original_text = str(params.pop('text'))
-            ngram_str = normalize_forms(normalize_chars(original_text))
-        else:
-            raise ValidationException('The route PUT /api/ngrams/ is used to create a new ngram\
-                                        It requires a "text" parameter,\
-                                        for instance /api/ngrams?text=hydrometallurgy')
-        # if we have a 'corpus' param (to do the indexing)...
-        do_indexation = False
-        if 'corpus' in params:
-            # we retrieve the corpus...
-            corpus_id = int(params.pop('corpus'))
-            corpus_node = cache.Node[corpus_id]
-            # and the user must also have rights on the corpus
-            if request.user.id == corpus_node.user_id:
-                do_indexation = True
-            else:
-                res = HttpResponse("Unauthorized")
-                res.status_code = 401
-                return res
-        # number of "words" in the ngram
-        ngram_size = len(findall(r' +', ngram_str)) + 1
-        # do the additions
-        try:
-            log_msg = ""
-            ngram_id = None
-            preexisting = session.query(Ngram).filter(Ngram.terms==ngram_str).first()
-            if preexisting is not None:
-                ngram_id = preexisting.id
-                log_msg += "ngram already existed (id %i)\n" % ngram_id
-            else:
-                # 2 - insert into Ngrams
-                new_ngram = Ngram(terms=ngram_str, n=ngram_size)
-                session.add(new_ngram)
-                session.commit()
-                ngram_id = new_ngram.id
-                log_msg += "ngram was added with new id %i\n" % ngram_id
-            # 3 - index the term
-            if do_indexation:
-                n_added = index_new_ngrams([ngram_id], corpus_node)
-                log_msg += 'ngram indexed in corpus %i\n' % corpus_id
-            return JsonHttpResponse({
-                'msg': log_msg,
-                'text': original_text,
-                'term': ngram_str,
-                'id' : ngram_id,
-                'count': n_added if do_indexation else 'no corpus provided for indexation'
-                }, 200)
-        # just in case
-        except Exception as e:
-            return JsonHttpResponse({
-                'msg': str(e),
-                'text': original_text
-                }, 400)
 _operators_dict = {
    "=":                lambda field, value: (field == value),
    "!=":               lambda field, value: (field != value),

--- a/gargantext/views/api/ngrams.py
+++ b/gargantext/views/api/ngrams.py
+from gargantext.util.http       import ValidationException, APIView \
+                                     , get_parameters, JsonHttpResponse\
+                                     , HttpResponse
+from gargantext.util.db         import session, func
+from gargantext.util.db_cache   import cache 
+from gargantext.models          import Node, Ngram, NodeNgram
+from sqlalchemy.orm             import aliased
+from re                         import findall
+# ngrams put() will implement same text cleaning procedures as toolchain
+from gargantext.util.toolchain.parsing           import normalize_chars
+from gargantext.util.toolchain.ngrams_extraction import normalize_forms
+# for indexing
+from gargantext.util.toolchain.ngrams_addition  import index_new_ngrams
+class ApiNgrams(APIView):
+    def get(self, request):
+        """
+        Used for analytics
+        ------------------
+        Get ngram listing + counts in a given scope
+        """
+        # parameters retrieval and validation
+        startwith = request.GET.get('startwith', '').replace("'", "\\'")
+        # query ngrams
+        ParentNode = aliased(Node)
+        ngrams_query = (session
+            .query(Ngram.id, Ngram.terms, func.sum(NodeNgram.weight).label('count'))
+            .join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
+            .join(Node, Node.id == NodeNgram.node_id)
+            .group_by(Ngram.id, Ngram.terms)
+            # .group_by(Ngram)
+            .order_by(func.sum(NodeNgram.weight).desc(), Ngram.terms)
+        )
+        # filters
+        if 'startwith' in request.GET:
+            ngrams_query = ngrams_query.filter(Ngram.terms.startswith(request.GET['startwith']))
+        if 'contain' in request.GET:
+            print("request.GET['contain']")
+            print(request.GET['contain'])
+            ngrams_query = ngrams_query.filter(Ngram.terms.contains(request.GET['contain']))
+        if 'corpus_id' in request.GET:
+            corpus_id_list = list(map(int, request.GET.get('corpus_id', '').split(',')))
+            if corpus_id_list and corpus_id_list[0]:
+                ngrams_query = ngrams_query.filter(Node.parent_id.in_(corpus_id_list))
+        if 'ngram_id' in request.GET:
+            ngram_id_list = list(map(int, request.GET.get('ngram_id', '').split(',')))
+            if ngram_id_list and ngram_id_list[0]:
+                ngrams_query = ngrams_query.filter(Ngram.id.in_(ngram_id_list))
+        # pagination
+        offset = int(request.GET.get('offset', 0))
+        limit = int(request.GET.get('limit', 20))
+        total = ngrams_query.count()
+        # return formatted result
+        return JsonHttpResponse({
+            'pagination': {
+                'offset': offset,
+                'limit': limit,
+                'total': total,
+            },
+            'data': [
+                {
+                    'id': ngram.id,
+                    'terms': ngram.terms,
+                    'count': ngram.count,
+                }
+                for ngram in ngrams_query[offset : offset+limit]
+            ],
+        })
+    def put(self, request):
+        """
+        Basic external access for *creating an ngram*
+        ---------------------------------------------
+         1 - checks user authentication before any changes
+         2 - adds the ngram to Ngram table in DB
+         3 - (if corpus param is present)
+             adds the ngram doc counts to NodeNgram table in DB
+             (aka "index the ngram" throught the docs of the corpus)
+         4 - returns json with:
+             'msg'   => a success msg 
+             'text'  => the initial text content
+             'term'  => the normalized text content
+             'id'    => the new ngram_id
+             'count' => the number of docs with the ngram in the corpus
+                        (if corpus param is present)
+        possible inline parameters
+        --------------------------
+        @param    text=<ngram_string>         [required]
+        @param    corpus=<CORPUS_ID>          [optional]
+        """
+        # 1 - check user authentication
+        if not request.user.is_authenticated():
+            res = HttpResponse("Unauthorized")
+            res.status_code = 401
+            return res
+        # the params
+        params = get_parameters(request)
+        print("PARAMS", [(i,v) for (i,v) in params.items()])
+        if 'text' in params:
+            original_text = str(params.pop('text'))
+            ngram_str = normalize_forms(normalize_chars(original_text))
+        else:
+            raise ValidationException('The route PUT /api/ngrams/ is used to create a new ngram\
+                                        It requires a "text" parameter,\
+                                        for instance /api/ngrams?text=hydrometallurgy')
+        # if we have a 'corpus' param (to do the indexing)...
+        do_indexation = False
+        if 'corpus' in params:
+            # we retrieve the corpus...
+            corpus_id = int(params.pop('corpus'))
+            corpus_node = cache.Node[corpus_id]
+            # and the user must also have rights on the corpus
+            if request.user.id == corpus_node.user_id:
+                do_indexation = True
+            else:
+                res = HttpResponse("Unauthorized")
+                res.status_code = 401
+                return res
+        # number of "words" in the ngram
+        ngram_size = len(findall(r' +', ngram_str)) + 1
+        # do the additions
+        try:
+            log_msg = ""
+            ngram_id = None
+            preexisting = session.query(Ngram).filter(Ngram.terms==ngram_str).first()
+            if preexisting is not None:
+                ngram_id = preexisting.id
+                log_msg += "ngram already existed (id %i)\n" % ngram_id
+            else:
+                # 2 - insert into Ngrams
+                new_ngram = Ngram(terms=ngram_str, n=ngram_size)
+                session.add(new_ngram)
+                session.commit()
+                ngram_id = new_ngram.id
+                log_msg += "ngram was added with new id %i\n" % ngram_id
+            # 3 - index the term
+            if do_indexation:
+                n_added = index_new_ngrams([ngram_id], corpus_node)
+                log_msg += 'ngram indexed in corpus %i\n' % corpus_id
+            return JsonHttpResponse({
+                'msg': log_msg,
+                'text': original_text,
+                'term': ngram_str,
+                'id' : ngram_id,
+                'count': n_added if do_indexation else 'no corpus provided for indexation'
+                }, 200)
+        # just in case
+        except Exception as e:
+            return JsonHttpResponse({
+                'msg': str(e),
+                'text': original_text
+                }, 400)
--- a/gargantext/views/api/urls.py
+++ b/gargantext/views/api/urls.py
 from django.conf.urls import url
 from . import nodes
+from . import ngrams
 from . import metrics
 from . import ngramlists
 from . import analytics
@@ -9,9 +10,11 @@ urlpatterns = [ url(r'^nodes$'                , nodes.NodeListResource.as_view()
              , url(r'^nodes/(\d+)$'          , nodes.NodeResource.as_view()         )
              , url(r'^nodes/(\d+)/having$'   , nodes.NodeListHaving.as_view()       )
+               # Ngrams
+               , url(r'^ngrams/?$'             , ngrams.ApiNgrams.as_view()          )
               # Analytics
              , url(r'^nodes/(\d+)/histories$', analytics.NodeNgramsQueries.as_view())
-              , url(r'^ngrams/$'              , analytics.ApiNgrams.as_view()        )
              , url(r'hyperdata$'             , analytics.ApiHyperdata.as_view()     )
                # get a list of ngram_ids or ngram_infos by list_id
                # url(r'^ngramlists/(\d+)$', ngramlists.List.as_view()),