Commit 4efb2168 authored by delanoe's avatar delanoe

[FIX] merge.

parent 3fa340d6
......@@ -63,4 +63,3 @@ def empty_trash(corpus_id):
node.delete()
print("Nodes deleted")
......@@ -276,5 +276,3 @@ def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hy
session.commit()
#print(parent_id, n.parent_id, n.id, n.name)
return(node)
......@@ -25,7 +25,7 @@ def ngram_workflow(corpus, n=5000):
compute_cvalue(corpus,limit=1000) # size
part = round(part * 0.8)
print('spec part:', part)
#print('spec part:', part)
compute_specificity(corpus,limit=part)
......@@ -33,7 +33,7 @@ def ngram_workflow(corpus, n=5000):
limit_inf = round(part * 1)
limit_sup = round(part * 5)
print(limit_inf,limit_sup)
#print(limit_inf,limit_sup)
compute_groups(corpus,limit_inf=limit_inf, limit_sup=limit_sup)
compute_mapList(corpus,limit=1000) # size
......
# Without this, we couldn't use the Django environment
from admin.env import *
from ngram.stemLem import *
from ngram.lists import *
#user = session.query(User).all()[0]
user = session.query(User).filter(User.username=='alexandre').first()
print('Current user is:', user.username)
project = session.query(Node).filter(Node.name == 'Test').first()
if project is None:
project = Node(
name = 'Test',
type_id = cache.NodeType['Project'].id,
user_id = user.id
)
session.add(project)
session.commit()
#corpora = session.query(Node).filter(Node.parent_id == project.id,
# Node.type_id == cache.NodeType['Corpus'].id
# ).delete()
#
#models.Node.objects(parent_id = project.id, type_id = cache.NodeType['Corpus']).all().delete()
#
corpus = session.query(Node).filter(Node.parent_id == project.id,
Node.type_id == cache.NodeType['Corpus'].id).first()
print('Corpus is', corpus)
if corpus is None:
corpus = Node(
parent_id = project.id,
name = 'Test Corpus',
type_id = cache.NodeType['Corpus'].id,
user_id = user.id
)
session.add(corpus)
session.commit()
add_resource(corpus,
file = '/srv/gargantext_lib/data_samples/pubmed.zip',
# #file = '/srv/gargantext_lib/data_samples/pubmed_2013-04-01_HoneyBeesBeeBees.xml',
type_id = cache.ResourceType['Pubmed (xml format)'].id,
)
parse_resources(corpus)
extract_ngrams(corpus, ('title', 'abstract'))
compute_tfidf(corpus)
doc_id = session.query(Node.id).filter(Node.parent_id == corpus.id,
Node.type_id == cache.NodeType['Document'].id).all()[2]
print('Miam list', listIds(typeList='MiamList', corpus_id=corpus.id, user_id=user.id)[0][0])
# Stemming the corpus
print('Working on corpus:', corpus.id, corpus.name)
stem_id = stem_corpus(corpus_id=corpus.id)
print('Stem Node.id is', stem_id)
#for typeList in ['MiamList', 'StopList', 'MainList', 'Group']:
# n = listIds(user_id=user.id,
# corpus_id=corpus.id,
# typeList=typeList)
# #print(n[0][0])
# print('Test having list_id')
# print(n, listNgramIds(list_id=n[0][0])[:3])
#
stop_list_id = listIds(user_id=user.id,
corpus_id=corpus.id,
typeList='StopList')[0][0]
miam_list_id = listIds(user_id=user.id,
corpus_id=corpus.id,
typeList='MiamList')[0][0]
print('StopList', stop_list_id)
print('MiamList', miam_list_id)
print(session.query(Node.id).filter(Node.parent_id==corpus.id, Node.type_id==cache.NodeType['WhiteList'].id).first())
#ngrams2miam(user_id=user.id, corpus_id=corpus.id)
doc_ngram_list = listNgramIds(corpus_id=corpus.id, doc_id=doc_id, user_id=user.id)
print(doc_ngram_list)
#print(listNgramIds(list_id=stop_list_id, user_id=user.id, corpus_id=corpus.id))
#type_list='MiamList'
#try:
# d = doList(type_list=type_list, user_id = user.id, corpus_id = corpus.id, limit=150)
## print('Size of the ' + type_list + ' list:',
## session.query(NodeNgram).filter(NodeNgram.node_id == d).count()
## )
#except:
# PrintException()
##
#print(listNgramIds(list_id=miam_list_id, user_id=user.id, corpus_id=corpus.id))
#
#ngram_id = listNgramIds(list_id=miam_list_id, user_id=user.id, corpus_id=corpus.id)[0][0]
#print('ngram_id', ngram_id)
#
#ngramList(do='add', ngram_ids=[ngram_id,], list_id=stop_list_id)
# print('Test having typeList and corpus.id')
# print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, user_id=user.id)[:3])
##
# print('Test having typeList and corpus.id and doc_id')
# print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, doc_id=doc_id, user_id=user.id)[:3])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment