modifié : ngram/lists.py

Some functions to manage lists and ngrams. modifié : annotations/views.py Code annotated to give some examples for the REST API that should bin in /srv/gargantext/api modifié : test-list-management.py Run tests (need more tests to finish it)

modifié : ngram/lists.py
Some functions to manage lists and ngrams. modifié : annotations/views.py Code annotated to give some examples for the REST API that should bin in /srv/gargantext/api modifié : test-list-management.py Run tests (need more tests to finish it)
90cf28ad · Administrator · b0a0cef0 · 90cf28ad · 90cf28ad · 90cf28ad
Commit 90cf28ad authored Jun 04, 2015 by Administrator
Hide whitespace changes
Inline Side-by-side

Showing with 170 additions and 21 deletions

views.py annotations/views.py +34 -0

lists.py ngram/lists.py +110 -7

test-list-management.py test-list-management.py +26 -14

No files found.
--- a/annotations/views.py
+++ b/annotations/views.py
@@ -11,6 +11,14 @@ from rest_framework.renderers import JSONRenderer
 from node.models import Node
 from gargantext_web.db import *

+from ngram.lists import listIds, listNgramIds, ngramList
+import sqlalchemy
+from sqlalchemy.sql import func
+from sqlalchemy import desc, asc, or_, and_, Date, cast, select
+from sqlalchemy import literal_column
+from sqlalchemy.orm import aliased
+
+

 def demo(request):
    """Demo page, temporary"""
@@ -19,6 +27,11 @@ def demo(request):
    }, context_instance=RequestContext(request))


+# This class below is a duplicate with the class Nodes in
+# /srv/gargantext/gargantext_web/api.py
+# All information you need from Nodes in api.py is in hyperdata
+# You may modify api.py (keeping compatibility) for your own needs
+# See in urls the url pattern to use
 class Document(APIView):
    """Read-only Document"""
    renderer_classes = (JSONRenderer,)
@@ -49,6 +62,23 @@ class NgramList(APIView):
        """Get All for on List ID"""
        doc_id = request.GET.get('docId')
        # TODO DB query
+        # Example with 'MiamList', same with 'StopList'
+        corpus_id = session.query(Node.parent_id).filter(Node.id == doc_id).first()
+        miamlist_ids = listIds(user_id=request.user.id,
+                              corpus_id=corpus_id,
+                              typeList='MiamList')
+
+        miamlist_id, miamlist_name = miamlist_ids[0]
+
+        # ngrams of list_id of corpus_id:
+        corpus_ngram_miam_list = listNgramIds(list_id=miamList_id)
+
+        # ngrams of list_id of corpus_id:
+        doc_ngram_miam_list = listNgramIds(list_id=miamList_id, doc_id=doc_id)
+
+        # now you can model your dict as you want (for doc or corpus level):
+        ngram_id, ngram_text, ngram_occurrences = doc_ngram_miam_list[0]
+
        data = { '%s' % list_id : { '%s' % doc_id : [
            {
                'uuid': '1',
@@ -193,6 +223,10 @@ class Ngram(APIView):
        annotationId = request.GET.get("annotationId")
        print(annotationDict)
        # TODO DB query
+        # Use the ngramList function in ngram.lists.py for that
+        # It can return True or False
+        ngramList(do='del', ngram_ids=[ngram_id,], list_id=list_id)
+
        return Response({})

    def post(self, request, list_id, ngram_id):

--- a/ngram/lists.py
+++ b/ngram/lists.py
@@ -13,7 +13,7 @@ from sqlalchemy.orm import aliased

 # from gargantext_web.db import Node, get_cursor

-def nodeList(user_id=None, corpus_id=None, typeList='MiamList'):
+def listIds(user_id=None, corpus_id=None, typeList='MiamList'):
    '''
    nodeList : get or create NodeList.
    nodeList :: Integer -> Integer -> String -> [Node]
@@ -26,7 +26,7 @@ def nodeList(user_id=None, corpus_id=None, typeList='MiamList'):

        # Nodes are either in root_list or user_list
        root_list = ['Stem', 'Lem']
-        user_list   = ['MiamList', 'StopList', 'MainList']
+        user_list   = ['MiamList', 'StopList', 'MainList', 'GroupList']

        if typeList in user_list:
            nodes = session.query(Node).filter(
@@ -58,20 +58,123 @@ def nodeList(user_id=None, corpus_id=None, typeList='MiamList'):
    else:
        print("Usage (Warning): Need corpus_id and user_id")

-def stopList(user_id=None, corpus_id=None,
+# Some functions to manage ngrams according to the lists
+
+def listNgramIds(list_id=None, typeList=None,
+                  corpus_id=None, doc_id=None, user_id=None):
+    '''
+    listNgramsIds :: Int | String, Int, Int, Int -> [(Int, Int)]
+    return has types: [(ngram_id, occurrences)]
+
+    Return the list of tuples of
+    ngram_id and its occurrences according to node_id level.
+
+    list_id   : Node.id of the list expected
+    typeList  : needed if no list_id, use typeList such as 'MiamList' or 'StopList'
+    corpus_id : needed to get list_id
+    doc_id    : to get specific ngrams related to a document with Node.id=doc_id
+    user_id   : needed to create list if it does not exist
+    '''
+
+    if list_id is None :
+        if corpus_id is not None :
+            if typeList is not None :
+                if user_id is not None :
+                    try:
+                        list_id = listIds(user_id=user_id,
+                                    corpus_id=corpus_id,
+                                    typeList=typeList)[0][0]
+                    except:
+                        PrintException()
+                else:
+                    print('Need a user_id to create list if needed')
+                    sys.exit()
+            else:
+                print('Need a typeList parameter')
+                sys.exit()
+        else:
+            print('Need a node_id to take default list of type' + typeList)
+            sys.exit()
+    else:
+        ListNgram = aliased(NodeNgram)
+        query     = (session.query(Ngram.id, Ngram.terms, func.count())
+                            .join(ListNgram, ListNgram.ngram_id == Ngram.id)
+                            .filter(ListNgram.node_id == list_id)
+                            .group_by(Ngram.id)
+                    )
+        if doc_id is not None :
+            Doc      = aliased(Node)
+            DocNgram = aliased(NodeNgram)
+
+            query = (query
+                         .join(DocNgram, DocNgram.ngram_id == Ngram.id)
+                         .join(Doc, Doc.id == doc_id)
+                         .filter(DocNgram.node_id == Doc.id)
+                    )
+
+        return(query.all())
+
+
+def ngramList(do=None, ngram_ids=[], list_id=None) :
+    '''
+    ,gramList :: ([Int], Int, String) -> Bool
+    Do (delete | add) [ngram_id] (from | to) the list_id
+
+    options:
+        do        = String : action 'del' or 'add'
+        ngram_id  = [Int]  : list of Ngrams id (Ngrams.id)
+        list_id   = Int    : list id (Node.id)
+    '''
+    if do is None or ngram_ids == [] or list_id is None :
+        print('Need more options: do, ngram_id, list_id')
+        sys.exit(0)
+    else:
+        try:
+            node_type_id = (session.query(Node.type_id)
+                            .filter(Node.id == list_id)
+                            .first()
+                            )
+
+            for ngram_id in ngram_ids:
+                # Need to be optimized with list of ids
+                ngram = (session.query(NodeNgram)
+                        .filter(NodeNgram.ngram_id == ngram_id)
+                        .filter(NodeNgram.node_id  == list_id)
+                        .first()
+                        )
+                if do == 'add':
+                    session.add(ngram)
+                elif do == 'del':
+                    session.delete(ngram)
+
+            session.commit()
+            return(True)
+
+        except:
+            PrintException()
+            return(False)
+
+
+
+# Some functions to manage automatically the lists
+
+def doStopList(user_id=None, corpus_id=None,
            stop_id=None,
            reset=False, limit=None
             ):
    '''
-    Compute the stopList and returns its Node.id
+    Compute automatically the stopList and returns its Node.id
+    Algo: TODO tfidf according type of corpora
    '''

    if stop_id is None:
-        stop_id = nodeList(user_id=user_id,
+        stop_id = nodeListIds(user_id=user_id,
                            corpus_id=corpus_id,
-                            typeList='StopList')
+                            typeList='StopList')[0]
    # according to type of corpus, choose the right default stopList

+
+
 def doList(
            type_list='miam',
            user_id=None, corpus_id=None,
@@ -119,7 +222,7 @@ def doList(

        for list_ in list_dict.keys():
            if  list_dict[list_]['id'] is None:
-                list_dict[list_]['id'] = nodeList(user_id=user_id,
+                list_dict[list_]['id'] = nodeListIds(user_id=user_id,
                                        corpus_id=corpus_id,
                                        typeList=list_dict[list_]['type'])[0][0]
        # Delete previous List ?

--- a/test-list-management.py
+++ b/test-list-management.py
@@ -36,6 +36,9 @@ if project is None:
 corpus = session.query(Node).filter(Node.parent_id == project.id,
                                    Node.type_id   == cache.NodeType['Corpus'].id).first()

+doc_id = session.query(Node.id).filter(Node.parent_id == corpus.id,
+                                       Node.type_id   == cache.NodeType['Document'].id).all()[1]
+
 if corpus is None:
    corpus = Node(
        parent_id = project.id,
@@ -56,26 +59,35 @@ if corpus is None:
    extract_ngrams(corpus, ('title', 'abstract'))
    compute_tfidf(corpus)

-
+print('Miam list', listIds(typeList='MiamList', corpus_id=corpus.id, user_id=user.id)[0][0])

 # Stemming the corpus
 print('Working on corpus:', corpus.id, corpus.name)
 stem_id = stem_corpus(corpus_id=corpus.id)
 print('Stem Node.id is', stem_id)

-for typeList in ['MiamList', 'StopList', 'MainList', 'Stem']:
-    n = nodeList(user_id=user.id,
+for typeList in ['MiamList', 'StopList', 'MainList', 'GroupList']:
+    n = listIds(user_id=user.id,
                           corpus_id=corpus.id,
                           typeList=typeList)
-    print(n)
-
-
-type_list='miam'
-try:
-    d = doList(type_list=type_list, user_id = user.id, corpus_id = corpus.id, stem_id=stem_id, limit=150)
-    print('Size of the ' + type_list + ' list:',
-          session.query(NodeNgram).filter(NodeNgram.node_id == d).count()
-          )
-except:
-    PrintException()
+    #print(n[0][0])
+    print('Test having list_id')
+    print(n, listNgramIds(list_id=n[0][0])[:3])
+#
+    print('Test having typeList and corpus.id')
+    print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, user_id=user.id)[:3])
+#
+#    print('Test having typeList and corpus.id and doc_id')
+#    print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, doc_id=doc_id, user_id=user.id)[:3])

+#
+#
+#type_list='miam'
+#try:
+#    d = doList(type_list=type_list, user_id = user.id, corpus_id = corpus.id, stem_id=stem_id, limit=150)
+#    print('Size of the ' + type_list + ' list:',
+#          session.query(NodeNgram).filter(NodeNgram.node_id == d).count()
+#          )
+#except:
+#    PrintException()
+#