Commit 08dc8379 authored by PkSM3's avatar PkSM3

Merge branch 'unstable' of ssh://delanoe.org:1979/gargantext into samuel

parents 7cd20adb 5fc5153d
...@@ -237,5 +237,21 @@ class Ngram(APIView): ...@@ -237,5 +237,21 @@ class Ngram(APIView):
doc_id = request.GET.get('docId') doc_id = request.GET.get('docId')
annotationDict = json.loads(request.POST.get("annotation")) annotationDict = json.loads(request.POST.get("annotation"))
print(annotationDict) print(annotationDict)
# There is 2 main actions:
# 1) add ngram to the miamList : this step is tricky if the ngram does
# exist yet, it is experimental in this case.
# But according to your function, you have the ngram_id already
# The function is:
ngramList(do='add', ngram_ids=[ngram_id,], list_id=list_id)
#ngramList(do='add', ngram_ids=[ngram_id,], list_id=list_id)
# Note : depending on the list, maybe I should adapt the function to
# delete from a list when added to a specific type of list
# 2) get the list of ngrams of one miamList: for this step see above
# Use the ngramList function in ngram.lists.py for that
# TODO DB query # TODO DB query
return Response(annotationDict) return Response(annotationDict)
...@@ -6,22 +6,46 @@ ...@@ -6,22 +6,46 @@
import random import random
def get_team(): def get_team():
''' '''
Function to get list of each member as dict of personal informations. Function to get list of each member as dict of personal informations.
You are free to fill the form which is verbose indeed but clear enough for You are free to fill the form which is verbose indeed but clear enough for
manual entries (I could zip lists but not clear enough). manual entries (I could zip lists but not clear enough).
For your picture, please ask Alexandre to take your picture with his camera For your picture, please ask Alexandre to take your picture with his camera
in order to follow the design shape of the website. in order to follow the design shape of the website.
''' '''
team = [ team = [
{ 'first_name' : 'Alexandre', 'last_name' : 'Delanoë', 'mail' : 'alexandre+gargantextATdelanoe.org', 'website' : 'http://alexandre.delanoe.org', 'picture' : 'alexandre.jpg', 'role' : 'project manager, scientific board, developer'}, { 'first_name' : 'Alexandre', 'last_name' : 'Delanoë',
{ 'first_name' : 'David', 'last_name' : 'Chavalarias', 'mail' : 'david.chavalariasATiscpif.fr', 'website' : 'http://chavalarias.com', 'picture' : 'david.jpg', 'role':'scientific board'}, 'mail' : 'alexandre+gargantextATdelanoe.org',
{ 'first_name' : 'Mathieu', 'last_name' : 'Rodic', 'mail' : '', 'website' : 'http://rodic.fr', 'picture' : 'mathieu.jpg', 'role' : 'developer'}, 'website' : 'http://alexandre.delanoe.org',
{ 'first_name' : 'Samuel', 'last_name' : 'Castillo J.', 'mail' : 'kaisleanATgmail.com', 'website' : 'http://www.pksm3.droppages.com', 'picture' : 'samuel.jpg', 'role' : 'developer'}, 'picture' : 'alexandre.jpg',
{ 'first_name' : 'Elias', 'last_name' : 'Showk', 'mail' : '', 'website' : 'https://github.com/elishowk', 'picture' : '', 'role' : 'developer'}, 'role' : 'project investigator, developer'},
{ 'first_name' : 'David', 'last_name' : 'Chavalarias',
'mail' : 'david.chavalariasATiscpif.fr',
'website' : 'http://chavalarias.com',
'picture' : 'david.jpg',
'role':'project investigator'},
{ 'first_name' : 'Mathieu', 'last_name' : 'Rodic',
'mail' : '',
'website' : 'http://rodic.fr',
'picture' : 'mathieu.jpg',
'role' : 'developer'},
{ 'first_name' : 'Samuel', 'last_name' : 'Castillo J.',
'mail' : 'kaisleanATgmail.com',
'website' : 'http://www.pksm3.droppages.com',
'picture' : 'samuel.jpg',
'role' : 'developer'},
{ 'first_name' : 'Elias', 'last_name' : 'Showk',
'mail' : '',
'website' : 'https://github.com/elishowk',
'picture' : '', 'role' : 'developer'},
#{ 'first_name' : '', 'name' : '', 'mail' : '', 'website' : '', 'picture' : ''}, #{ 'first_name' : '', 'name' : '', 'mail' : '', 'website' : '', 'picture' : ''},
# copy paste the line above and write your informations please # copy paste the line above and write your informations please
] ]
...@@ -30,7 +54,7 @@ def get_team(): ...@@ -30,7 +54,7 @@ def get_team():
return(team) return(team)
def get_sponsors(): def get_sponsors():
''' '''
Function to get list of each sponsor as dict of institutional informations. Function to get list of each sponsor as dict of institutional informations.
''' '''
......
...@@ -11,7 +11,6 @@ from gargantext_web.views import move_to_trash ...@@ -11,7 +11,6 @@ from gargantext_web.views import move_to_trash
from gargantext_web.db import * from gargantext_web.db import *
from node import models from node import models
def DebugHttpResponse(data): def DebugHttpResponse(data):
return HttpResponse('<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), )) return HttpResponse('<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), ))
...@@ -44,20 +43,17 @@ _ngrams_order_columns = { ...@@ -44,20 +43,17 @@ _ngrams_order_columns = {
} }
from rest_framework.authentication import SessionAuthentication, BasicAuthentication from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from rest_framework.views import APIView from rest_framework.views import APIView
from rest_framework.response import Response from rest_framework.response import Response
from rest_framework.exceptions import APIException as _APIException from rest_framework.exceptions import APIException as _APIException
class APIException(_APIException): class APIException(_APIException):
def __init__(self, message, code=500): def __init__(self, message, code=500):
self.status_code = code self.status_code = code
self.detail = message self.detail = message
_operators = { _operators = {
"=": lambda field, value: (field == value), "=": lambda field, value: (field == value),
"!=": lambda field, value: (field != value), "!=": lambda field, value: (field != value),
...@@ -71,6 +67,7 @@ _operators = { ...@@ -71,6 +67,7 @@ _operators = {
} }
from rest_framework.decorators import api_view from rest_framework.decorators import api_view
@api_view(('GET',)) @api_view(('GET',))
def Root(request, format=None): def Root(request, format=None):
return Response({ return Response({
...@@ -78,7 +75,6 @@ def Root(request, format=None): ...@@ -78,7 +75,6 @@ def Root(request, format=None):
'snippets': reverse('snippet-list', request=request, format=format) 'snippets': reverse('snippet-list', request=request, format=format)
}) })
class NodesChildrenNgrams(APIView): class NodesChildrenNgrams(APIView):
def get(self, request, node_id): def get(self, request, node_id):
...@@ -121,7 +117,6 @@ class NodesChildrenNgrams(APIView): ...@@ -121,7 +117,6 @@ class NodesChildrenNgrams(APIView):
], ],
}) })
class NodesChildrenDuplicates(APIView): class NodesChildrenDuplicates(APIView):
def _fetch_duplicates(self, request, node_id, extra_columns=None, min_count=1): def _fetch_duplicates(self, request, node_id, extra_columns=None, min_count=1):
...@@ -210,8 +205,6 @@ class NodesChildrenDuplicates(APIView): ...@@ -210,8 +205,6 @@ class NodesChildrenDuplicates(APIView):
'deleted': count 'deleted': count
}) })
class NodesChildrenMetatadata(APIView): class NodesChildrenMetatadata(APIView):
def get(self, request, node_id): def get(self, request, node_id):
...@@ -271,8 +264,6 @@ class NodesChildrenMetatadata(APIView): ...@@ -271,8 +264,6 @@ class NodesChildrenMetatadata(APIView):
'data': collection, 'data': collection,
}) })
class NodesChildrenQueries(APIView): class NodesChildrenQueries(APIView):
def _parse_filter(self, filter): def _parse_filter(self, filter):
...@@ -551,8 +542,6 @@ class NodesChildrenQueries(APIView): ...@@ -551,8 +542,6 @@ class NodesChildrenQueries(APIView):
"results": results, "results": results,
}, 201) }, 201)
class NodesList(APIView): class NodesList(APIView):
authentication_classes = (SessionAuthentication, BasicAuthentication) authentication_classes = (SessionAuthentication, BasicAuthentication)
...@@ -573,7 +562,6 @@ class NodesList(APIView): ...@@ -573,7 +562,6 @@ class NodesList(APIView):
for node in query.all() for node in query.all()
]}) ]})
class Nodes(APIView): class Nodes(APIView):
def get(self, request, node_id): def get(self, request, node_id):
...@@ -609,7 +597,6 @@ class Nodes(APIView): ...@@ -609,7 +597,6 @@ class Nodes(APIView):
except Exception as error: except Exception as error:
msgres ="error deleting : " + node_id + str(error) msgres ="error deleting : " + node_id + str(error)
class CorpusController: class CorpusController:
@classmethod @classmethod
...@@ -665,3 +652,39 @@ class CorpusController: ...@@ -665,3 +652,39 @@ class CorpusController:
) )
else: else:
raise ValidationError('Unrecognized "format=%s", should be "csv" or "json"' % (format, )) raise ValidationError('Unrecognized "format=%s", should be "csv" or "json"' % (format, ))
from ngram.lists import listIds, ngramList
class ListManagement(APIView):
#authentication_classes = (SessionAuthentication, BasicAuthentication)
# TODO: Be carefull need authentication!
def get(self, request, corpus_id):
user_id = session.query(User.id).filter(User.username==str(request.user)).first()[0]
lists = dict()
for list_type in ['MiamList', 'StopList']:
list_id = list()
list_id = listIds(user_id=user_id, corpus_id=int(corpus_id), typeList=list_type)
lists[list_type] = int(list_id[0][0])
# lists[list_type]['id']['name'] = r[0][1]
return JsonHttpResponse({
'MiamList' : lists['MiamList'],
'StopList' : lists['StopList']
})
def post(self, request, corpus_id):
list_id = request.POST.get('list_id')
ngram_ids = request.POST.get('ngram_ids')
ngramList(do='add', ngram_ids=ngram_ids, list_id=list_id)
def delete(self, request, corpus_id):
list_id = request.POST.get('list_id')
ngram_ids = request.POST.get('ngram_ids')
ngramList(do='del', ngram_ids=ngram_ids, list_id=list_id)
...@@ -64,6 +64,8 @@ urlpatterns = patterns('', ...@@ -64,6 +64,8 @@ urlpatterns = patterns('',
url(r'^api/nodes/(\d+)/children/duplicates$', gargantext_web.api.NodesChildrenDuplicates.as_view()), url(r'^api/nodes/(\d+)/children/duplicates$', gargantext_web.api.NodesChildrenDuplicates.as_view()),
# url(r'^api/nodes/(\d+)/children/duplicates/delete$', gargantext_web.api.NodesChildrenDuplicates.delete ), # url(r'^api/nodes/(\d+)/children/duplicates/delete$', gargantext_web.api.NodesChildrenDuplicates.delete ),
url(r'^api/corpus/(\d+)/lists$', gargantext_web.api.ListManagement.as_view()),
url(r'^api/nodes/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams), url(r'^api/nodes/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),
url(r'^annotations/', include(annotations_urls)), url(r'^annotations/', include(annotations_urls)),
......
...@@ -556,7 +556,11 @@ def corpus_csv(request, project_id, corpus_id): ...@@ -556,7 +556,11 @@ def corpus_csv(request, project_id, corpus_id):
type_document_id = cache.NodeType['Document'].id type_document_id = cache.NodeType['Document'].id
documents = session.query(Node).filter(Node.parent_id==corpus_id, Node.type_id==type_document_id).all() documents = session.query(Node).filter(Node.parent_id==corpus_id, Node.type_id==type_document_id).all()
keys = list(documents[0].hyperdata.keys()) keys_list = list()
for d in documents[:10]:
keys_list += d.hyperdata.keys()
keys = list(set(keys_list))
writer.writerow(keys) writer.writerow(keys)
for doc in documents: for doc in documents:
......
#
from admin.env import *
from admin.utils import PrintException
#from gargantext_web.settings import
#
#
## Django models
##from node import models
#
## SQLA models
from gargantext_web.db import *
################################################################################
## If you need to reset all data
## use : ./manage.py flush
################################################################################
################################################################################
print('Initialize hyperdata...')
################################################################################
hyperdata = {
'publication_date': 'datetime',
'authors': 'string',
'language_fullname': 'string',
'abstract': 'text',
'title': 'string',
'source': 'string',
'volume': 'string',
'text': 'text',
'page': 'string',
'doi': 'string',
'journal': 'string',
}
for name_, type_ in hyperdata.items():
data = (session.query(Hyperdata).filter(
Hyperdata.name == str(name_),
Hyperdata.type == str(type_)
).first()
)
if hyperdata is None:
print('Hyper Data' + name + 'does not existe, creating it')
hyperdata = Hyperdata(name=name, type=type_name)
session.add(hyperdata)
session.commit()
## Integration: languages
##
################################################################################
print('Initialize languages...')
################################################################################
import pycountry
##Language.objects.all().delete()
for language in pycountry.languages:
pass
if 'alpha2' in language.__dict__:
lang = Language(
iso2 = language.alpha2,
iso3 = language.bibliographic,
fullname = language.name,
implemented = True if language.alpha2 in ['en', 'fr'] else False
)
l = session.query(Language).filter(Language.iso2 == lang.iso2).first()
if l is None:
session.add(lang)
session.commit()
################################################################################
print('Initialize users...')
################################################################################
gargantua = session.query(User).filter(User.username=='gargantua').first()
if gargantua is None:
from node.models import User as U
gargantua = U()
gargantua.username = 'gargantua'
# Read specific email address here:
gargantua.email = 'contact@gargantext.org'
gargantua.active_user = True
password = U.objects.make_random_password()
print('Gargantua, password: ', password)
gargantua.set_password(password)
gargantua.save()
################################################################################
print('Initialize node types...')
################################################################################
node_types = [
'Root', 'Trash',
'Project', 'Corpus', 'Document',
'MiamList', 'StopList', 'MainList',
'Stem', 'Lem', 'Group', 'Tfidf',
'Cooccurrence', 'WhiteList', 'BlackList'
]
for node_type in node_types:
nt = NodeType(name=node_type)
if session.query(NodeType).filter(NodeType.name==nt.name).first() is None:
session.add(nt)
session.commit()
################################################################################
print('Initialize main nodes...')
################################################################################
nodes = []
node_root = Node(user_id=gargantua.id, type_id=cache.NodeType['Root'].id, name='Root')
nodes.append(node_root)
node_stem = Node(user_id=gargantua.id, type_id=cache.NodeType['Stem'].id, name='Stem', parent_id=node_root.id)
nodes.append(node_stem)
node_lem = Node(user_id=gargantua.id, type_id=cache.NodeType['Lem'].id, name='Lem', parent_id=node_root.id)
nodes.append(node_lem)
for node in nodes:
if session.query(Node).filter(Node.name==node.name, Node.user_id==node.user_id).first() is None:
session.add(node)
session.commit()
################################################################################
print('Initialize resource...')
################################################################################
from parsing.parsers_config import parsers
for parser in parsers.keys():
resource = ResourceType(name=parser)
if session.query(ResourceType).filter(ResourceType.name==resource.name).first() is None:
session.add(resource)
session.commit()
################################################################################
#### Instantiante table NgramTag:
################################################################################
###f = open("part_of_speech_labels.txt", 'r')
###
###for line in f.readlines():
### name, description = line.strip().split('\t')
### _tag = Tag(name=name, description=description)
### session.add(_tag)
###session.commit()
###
###f.close()
##
##
#exit()
# to be executed like this:
# ./manage.py shell < init.py
#NodeType.objects.all().delete()
from node.models import *
import pycountry
for language in pycountry.languages:
try:
implemented = 1 if language.alpha2 in ['en', 'fr'] else 0
Language(iso2=language.alpha2, iso3=language.terminology, fullname=language.name, implemented=implemented).save()
except:
pass
english = Language.objects.get(iso2='en')
french = Language.objects.get(iso2='fr')
try:
me = User.objects.get(username='pksm3')
except:
me = User(username='pksm3')
me.save()
for node_type in ['Trash', 'Root', ]:
NodeType.objects.get_or_create(name=node_type)
try:
typeProject = NodeType.objects.get(name='Project')
except Exception as error:
print(error)
typeProject = NodeType(name='Project')
typeProject.save()
try:
typeCorpus = NodeType.objects.get(name='Corpus')
except Exception as error:
print(error)
typeCorpus = NodeType(name='Corpus')
typeCorpus.save()
try:
typeDoc = NodeType.objects.get(name='Document')
except Exception as error:
print(error)
typeDoc = NodeType(name='Document')
typeDoc.save()
try:
typeStem = NodeType.objects.get(name='Stem')
except Exception as error:
print(error)
typeStem = NodeType(name='Stem')
typeStem.save()
try:
typeTfidf = NodeType.objects.get(name='Tfidf')
except Exception as error:
print(error)
typeTfidf = NodeType(name='Tfidf')
typeTfidf.save()
try:
typeDoc = NodeType.objects.get(name='WhiteList')
except Exception as error:
print(error)
typeDoc = NodeType(name='WhiteList')
typeDoc.save()
try:
typeDoc = NodeType.objects.get(name='BlackList')
except Exception as error:
print(error)
typeDoc = NodeType(name='BlackList')
typeDoc.save()
try:
typeDoc = NodeType.objects.get(name='Synonyme')
except Exception as error:
print(error)
typeDoc = NodeType(name='Synonyme')
typeDoc.save()
try:
typeDoc = NodeType.objects.get(name='Cooccurrence')
except Exception as error:
print(error)
typeDoc = NodeType(name='Cooccurrence')
typeDoc.save()
# In[33]:
from parsing.parsers_config import parsers
ResourceType.objects.all().delete()
for key in parsers.keys():
try:
ResourceType.objects.get_or_create(name=key)
except Exception as error:
print("Ressource Error: ", error)
# In[34]:
#Node.objects.all().delete()
try:
stem = Node.objects.get(name='Stem')
except:
stem = Node(name='Stem', type=typeStem, user=me)
stem.save()
from gargantext_web.db import *
# Instantiante table NgramTag:
f = open("part_of_speech_labels.txt", 'r')
for line in f.readlines():
name, description = line.strip().split('\t')
_tag = Tag(name=name, description=description)
session.add(_tag)
session.commit()
f.close()
#!/bin/bash
psql -d gargandb -f init.sql
sleep 2
../manage.py syncdb
psql -d gargandb -f init2.sql
sleep 2
#../manage.py shell < init.py
../manage.py shell < init_gargantext.py
#psql -d gargandb -f hstore2jsonb.sql
# Without this, we couldn't use the Django environment
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")
# We're gonna use all the models!
# Django models
from node import models
# SQLA models
from gargantext_web.db import *
# Reset: all data
#
#tables_to_empty = [
# Node,
# Node_Hyperdata,
# Hyperdata,
# NodeType,
# ResourceType,
# Resource,
#]
#for table in tables_to_empty:
# print('Empty table "%s"...' % (table._meta.db_table, ))
# table.objects.all().delete()
# Integration: hyperdata types
print('Initialize hyperdata...')
hyperdata = {
'publication_date': 'datetime',
'authors': 'string',
'language_fullname': 'string',
'abstract': 'text',
'title': 'string',
'source': 'string',
'volume': 'string',
'text': 'text',
'page': 'string',
'doi': 'string',
'journal': 'string',
}
for name, type_name in hyperdata.items():
models.Hyperdata(name=name, type=type_name).save()
# Integration: languages
print('Initialize languages...')
import pycountry
Language.objects.all().delete()
for language in pycountry.languages:
if 'alpha2' in language.__dict__:
models.Language(
iso2 = language.alpha2,
iso3 = language.bibliographic,
fullname = language.name,
implemented = 1 if language.alpha2 in ['en', 'fr'] else 0,
).save()
# Integration: users
<<<<<<< Updated upstream
print('Initialize users...')
me = models.User.objects.get_or_create(username='alexandre')
gargantua, created = models.User.objects.get_or_create(username='gargantua')
node_root = Node(user_id=gargantua.id, type_id=cache.NodeType['Root'].id, name='Root')
node_stem = Node(user_id=gargantua.id, type_id=cache.NodeType['Stem'].id, name='Stem', parent_id=node_root.id)
node_lem = Node(user_id=gargantua.id, type_id=cache.NodeType['Lem'].id, name='Lem', parent_id=node_root.id)
session.add(node_root)
session.add(node_stem)
session.add(node_lem)
session.commit()
# Integration: node types
print('Initialize node types...')
node_types = [
'Root', 'Trash',
'Project', 'Corpus', 'Document',
'MiamList', 'StopList', 'MainList',
'Stem', 'Lem', 'Group', 'Tfidf',
'Cooccurrence', 'WhiteList', 'BlackList'
]
for node_type in node_types:
models.NodeType.objects.get_or_create(name=node_type)
# Integration: resource types
print('Initialize users...')
me = session.query(User).filter(User.username=='alexandre').first()
gargantua = session.query(User).filter(User.username=='gargantua').first()
node_root = Node(user_id=gargantua.id, type_id=cache.NodeType['Root'].id, name='Root')
node_stem = Node(user_id=gargantua.id, type_id=cache.NodeType['Stem'].id, name='Stem', parent_id=node_root.id)
node_lem = Node(user_id=gargantua.id, type_id=cache.NodeType['Lem'].id, name='Lem', parent_id=node_root.id)
session.add(node_root)
session.add(node_stem)
session.add(node_lem)
session.commit()
print('Initialize resource...')
from parsing.parsers_config import parsers
for parser in parsers.keys():
models.ResourceType.objects.get_or_create(name=parser)
# Instantiante table NgramTag:
f = open("part_of_speech_labels.txt", 'r')
for line in f.readlines():
name, description = line.strip().split('\t')
_tag = Tag(name=name, description=description)
session.add(_tag)
session.commit()
f.close()
exit()
...@@ -136,16 +136,22 @@ def ngramList(do=None, ngram_ids=[], list_id=None) : ...@@ -136,16 +136,22 @@ def ngramList(do=None, ngram_ids=[], list_id=None) :
) )
for ngram_id in ngram_ids: for ngram_id in ngram_ids:
# First we test to know if ngram exist in database already
#ngram = (session.query(Ngram).filter(Ngram.id == ngram_id).first()
# Need to be optimized with list of ids # Need to be optimized with list of ids
ngram = (session.query(NodeNgram) node_ngram = (session.query(NodeNgram)
.filter(NodeNgram.ngram_id == ngram_id) .filter(NodeNgram.ngram_id == ngram_id)
.filter(NodeNgram.node_id == list_id) .filter(NodeNgram.node_id == list_id)
.first() .first()
) )
if do == 'add': if node_ngram is None :
session.add(ngram) node_ngram = NodeNgram(node_id = list_id,
elif do == 'del': ngram_id=ngram_id,
session.delete(ngram) weight=1)
if do == 'add' :
session.add(node_ngram)
elif do == 'del' :
session.delete(node_ngram)
session.commit() session.commit()
return(True) return(True)
......
...@@ -9,7 +9,7 @@ parsers = { ...@@ -9,7 +9,7 @@ parsers = {
#'Europress' : EuropressFileParser, #'Europress' : EuropressFileParser,
'Europress (French)' : EuropressFileParser, 'Europress (French)' : EuropressFileParser,
'Europress (English)' : EuropressFileParser, 'Europress (English)' : EuropressFileParser,
'CSVParser' : CSVParser, 'CSVParser' : CSVParser,
'ISTex' : ISTex, 'ISTex' : ISTex,
} }
...@@ -36,9 +36,6 @@ if project is None: ...@@ -36,9 +36,6 @@ if project is None:
corpus = session.query(Node).filter(Node.parent_id == project.id, corpus = session.query(Node).filter(Node.parent_id == project.id,
Node.type_id == cache.NodeType['Corpus'].id).first() Node.type_id == cache.NodeType['Corpus'].id).first()
doc_id = session.query(Node.id).filter(Node.parent_id == corpus.id,
Node.type_id == cache.NodeType['Document'].id).all()[1]
if corpus is None: if corpus is None:
corpus = Node( corpus = Node(
parent_id = project.id, parent_id = project.id,
...@@ -59,6 +56,9 @@ if corpus is None: ...@@ -59,6 +56,9 @@ if corpus is None:
extract_ngrams(corpus, ('title', 'abstract')) extract_ngrams(corpus, ('title', 'abstract'))
compute_tfidf(corpus) compute_tfidf(corpus)
doc_id = session.query(Node.id).filter(Node.parent_id == corpus.id,
Node.type_id == cache.NodeType['Document'].id).all()[1]
print('Miam list', listIds(typeList='MiamList', corpus_id=corpus.id, user_id=user.id)[0][0]) print('Miam list', listIds(typeList='MiamList', corpus_id=corpus.id, user_id=user.id)[0][0])
# Stemming the corpus # Stemming the corpus
...@@ -66,17 +66,40 @@ print('Working on corpus:', corpus.id, corpus.name) ...@@ -66,17 +66,40 @@ print('Working on corpus:', corpus.id, corpus.name)
stem_id = stem_corpus(corpus_id=corpus.id) stem_id = stem_corpus(corpus_id=corpus.id)
print('Stem Node.id is', stem_id) print('Stem Node.id is', stem_id)
for typeList in ['MiamList', 'StopList', 'MainList', 'GroupList']: for typeList in ['MiamList', 'StopList', 'MainList', 'Group']:
n = listIds(user_id=user.id, n = listIds(user_id=user.id,
corpus_id=corpus.id, corpus_id=corpus.id,
typeList=typeList) typeList=typeList)
#print(n[0][0]) #print(n[0][0])
print('Test having list_id') print('Test having list_id')
print(n, listNgramIds(list_id=n[0][0])[:3]) print(n, listNgramIds(list_id=n[0][0])[:3])
stop_list_id = listIds(user_id=user.id,
corpus_id=corpus.id,
typeList='StopList')[0][0]
miam_list_id = listIds(user_id=user.id,
corpus_id=corpus.id,
typeList='MiamList')[0][0]
print('Stop List', stop_list_id)
print('Miam List', miam_list_id)
ngram_id = listNgramIds(list_id=miam_list_id)[0][0]
print('ngram_id', ngram_id)
ngramList(do='add', ngram_ids=[ngram_id,], list_id=stop_list_id)
# #
print('Test having typeList and corpus.id') # print('Test having typeList and corpus.id')
print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, user_id=user.id)[:3]) # print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, user_id=user.id)[:3])
# ##
# print('Test having typeList and corpus.id and doc_id') # print('Test having typeList and corpus.id and doc_id')
# print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, doc_id=doc_id, user_id=user.id)[:3]) # print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, doc_id=doc_id, user_id=user.id)[:3])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment