Commit 90cf28ad authored by Administrator's avatar Administrator

modifié : ngram/lists.py

Some functions to manage lists and ngrams.

	modifié :         annotations/views.py
Code annotated to give some examples for the REST API that should bin in
/srv/gargantext/api

	modifié :         test-list-management.py
Run tests (need more tests to finish it)
parent b0a0cef0
......@@ -11,6 +11,14 @@ from rest_framework.renderers import JSONRenderer
from node.models import Node
from gargantext_web.db import *
from ngram.lists import listIds, listNgramIds, ngramList
import sqlalchemy
from sqlalchemy.sql import func
from sqlalchemy import desc, asc, or_, and_, Date, cast, select
from sqlalchemy import literal_column
from sqlalchemy.orm import aliased
def demo(request):
"""Demo page, temporary"""
......@@ -19,6 +27,11 @@ def demo(request):
}, context_instance=RequestContext(request))
# This class below is a duplicate with the class Nodes in
# /srv/gargantext/gargantext_web/api.py
# All information you need from Nodes in api.py is in hyperdata
# You may modify api.py (keeping compatibility) for your own needs
# See in urls the url pattern to use
class Document(APIView):
"""Read-only Document"""
renderer_classes = (JSONRenderer,)
......@@ -49,6 +62,23 @@ class NgramList(APIView):
"""Get All for on List ID"""
doc_id = request.GET.get('docId')
# TODO DB query
# Example with 'MiamList', same with 'StopList'
corpus_id = session.query(Node.parent_id).filter(Node.id == doc_id).first()
miamlist_ids = listIds(user_id=request.user.id,
corpus_id=corpus_id,
typeList='MiamList')
miamlist_id, miamlist_name = miamlist_ids[0]
# ngrams of list_id of corpus_id:
corpus_ngram_miam_list = listNgramIds(list_id=miamList_id)
# ngrams of list_id of corpus_id:
doc_ngram_miam_list = listNgramIds(list_id=miamList_id, doc_id=doc_id)
# now you can model your dict as you want (for doc or corpus level):
ngram_id, ngram_text, ngram_occurrences = doc_ngram_miam_list[0]
data = { '%s' % list_id : { '%s' % doc_id : [
{
'uuid': '1',
......@@ -193,6 +223,10 @@ class Ngram(APIView):
annotationId = request.GET.get("annotationId")
print(annotationDict)
# TODO DB query
# Use the ngramList function in ngram.lists.py for that
# It can return True or False
ngramList(do='del', ngram_ids=[ngram_id,], list_id=list_id)
return Response({})
def post(self, request, list_id, ngram_id):
......
......@@ -13,7 +13,7 @@ from sqlalchemy.orm import aliased
# from gargantext_web.db import Node, get_cursor
def nodeList(user_id=None, corpus_id=None, typeList='MiamList'):
def listIds(user_id=None, corpus_id=None, typeList='MiamList'):
'''
nodeList : get or create NodeList.
nodeList :: Integer -> Integer -> String -> [Node]
......@@ -26,7 +26,7 @@ def nodeList(user_id=None, corpus_id=None, typeList='MiamList'):
# Nodes are either in root_list or user_list
root_list = ['Stem', 'Lem']
user_list = ['MiamList', 'StopList', 'MainList']
user_list = ['MiamList', 'StopList', 'MainList', 'GroupList']
if typeList in user_list:
nodes = session.query(Node).filter(
......@@ -58,20 +58,123 @@ def nodeList(user_id=None, corpus_id=None, typeList='MiamList'):
else:
print("Usage (Warning): Need corpus_id and user_id")
def stopList(user_id=None, corpus_id=None,
# Some functions to manage ngrams according to the lists
def listNgramIds(list_id=None, typeList=None,
corpus_id=None, doc_id=None, user_id=None):
'''
listNgramsIds :: Int | String, Int, Int, Int -> [(Int, Int)]
return has types: [(ngram_id, occurrences)]
Return the list of tuples of
ngram_id and its occurrences according to node_id level.
list_id : Node.id of the list expected
typeList : needed if no list_id, use typeList such as 'MiamList' or 'StopList'
corpus_id : needed to get list_id
doc_id : to get specific ngrams related to a document with Node.id=doc_id
user_id : needed to create list if it does not exist
'''
if list_id is None :
if corpus_id is not None :
if typeList is not None :
if user_id is not None :
try:
list_id = listIds(user_id=user_id,
corpus_id=corpus_id,
typeList=typeList)[0][0]
except:
PrintException()
else:
print('Need a user_id to create list if needed')
sys.exit()
else:
print('Need a typeList parameter')
sys.exit()
else:
print('Need a node_id to take default list of type' + typeList)
sys.exit()
else:
ListNgram = aliased(NodeNgram)
query = (session.query(Ngram.id, Ngram.terms, func.count())
.join(ListNgram, ListNgram.ngram_id == Ngram.id)
.filter(ListNgram.node_id == list_id)
.group_by(Ngram.id)
)
if doc_id is not None :
Doc = aliased(Node)
DocNgram = aliased(NodeNgram)
query = (query
.join(DocNgram, DocNgram.ngram_id == Ngram.id)
.join(Doc, Doc.id == doc_id)
.filter(DocNgram.node_id == Doc.id)
)
return(query.all())
def ngramList(do=None, ngram_ids=[], list_id=None) :
'''
,gramList :: ([Int], Int, String) -> Bool
Do (delete | add) [ngram_id] (from | to) the list_id
options:
do = String : action 'del' or 'add'
ngram_id = [Int] : list of Ngrams id (Ngrams.id)
list_id = Int : list id (Node.id)
'''
if do is None or ngram_ids == [] or list_id is None :
print('Need more options: do, ngram_id, list_id')
sys.exit(0)
else:
try:
node_type_id = (session.query(Node.type_id)
.filter(Node.id == list_id)
.first()
)
for ngram_id in ngram_ids:
# Need to be optimized with list of ids
ngram = (session.query(NodeNgram)
.filter(NodeNgram.ngram_id == ngram_id)
.filter(NodeNgram.node_id == list_id)
.first()
)
if do == 'add':
session.add(ngram)
elif do == 'del':
session.delete(ngram)
session.commit()
return(True)
except:
PrintException()
return(False)
# Some functions to manage automatically the lists
def doStopList(user_id=None, corpus_id=None,
stop_id=None,
reset=False, limit=None
):
'''
Compute the stopList and returns its Node.id
Compute automatically the stopList and returns its Node.id
Algo: TODO tfidf according type of corpora
'''
if stop_id is None:
stop_id = nodeList(user_id=user_id,
stop_id = nodeListIds(user_id=user_id,
corpus_id=corpus_id,
typeList='StopList')
typeList='StopList')[0]
# according to type of corpus, choose the right default stopList
def doList(
type_list='miam',
user_id=None, corpus_id=None,
......@@ -119,7 +222,7 @@ def doList(
for list_ in list_dict.keys():
if list_dict[list_]['id'] is None:
list_dict[list_]['id'] = nodeList(user_id=user_id,
list_dict[list_]['id'] = nodeListIds(user_id=user_id,
corpus_id=corpus_id,
typeList=list_dict[list_]['type'])[0][0]
# Delete previous List ?
......
......@@ -36,6 +36,9 @@ if project is None:
corpus = session.query(Node).filter(Node.parent_id == project.id,
Node.type_id == cache.NodeType['Corpus'].id).first()
doc_id = session.query(Node.id).filter(Node.parent_id == corpus.id,
Node.type_id == cache.NodeType['Document'].id).all()[1]
if corpus is None:
corpus = Node(
parent_id = project.id,
......@@ -56,26 +59,35 @@ if corpus is None:
extract_ngrams(corpus, ('title', 'abstract'))
compute_tfidf(corpus)
print('Miam list', listIds(typeList='MiamList', corpus_id=corpus.id, user_id=user.id)[0][0])
# Stemming the corpus
print('Working on corpus:', corpus.id, corpus.name)
stem_id = stem_corpus(corpus_id=corpus.id)
print('Stem Node.id is', stem_id)
for typeList in ['MiamList', 'StopList', 'MainList', 'Stem']:
n = nodeList(user_id=user.id,
for typeList in ['MiamList', 'StopList', 'MainList', 'GroupList']:
n = listIds(user_id=user.id,
corpus_id=corpus.id,
typeList=typeList)
print(n)
type_list='miam'
try:
d = doList(type_list=type_list, user_id = user.id, corpus_id = corpus.id, stem_id=stem_id, limit=150)
print('Size of the ' + type_list + ' list:',
session.query(NodeNgram).filter(NodeNgram.node_id == d).count()
)
except:
PrintException()
#print(n[0][0])
print('Test having list_id')
print(n, listNgramIds(list_id=n[0][0])[:3])
#
print('Test having typeList and corpus.id')
print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, user_id=user.id)[:3])
#
# print('Test having typeList and corpus.id and doc_id')
# print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, doc_id=doc_id, user_id=user.id)[:3])
#
#
#type_list='miam'
#try:
# d = doList(type_list=type_list, user_id = user.id, corpus_id = corpus.id, stem_id=stem_id, limit=150)
# print('Size of the ' + type_list + ' list:',
# session.query(NodeNgram).filter(NodeNgram.node_id == d).count()
# )
#except:
# PrintException()
#
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment