Commit 08dc8379 authored by PkSM3's avatar PkSM3

Merge branch 'unstable' of ssh://delanoe.org:1979/gargantext into samuel

parents 7cd20adb 5fc5153d
......@@ -237,5 +237,21 @@ class Ngram(APIView):
doc_id = request.GET.get('docId')
annotationDict = json.loads(request.POST.get("annotation"))
print(annotationDict)
# There is 2 main actions:
# 1) add ngram to the miamList : this step is tricky if the ngram does
# exist yet, it is experimental in this case.
# But according to your function, you have the ngram_id already
# The function is:
ngramList(do='add', ngram_ids=[ngram_id,], list_id=list_id)
#ngramList(do='add', ngram_ids=[ngram_id,], list_id=list_id)
# Note : depending on the list, maybe I should adapt the function to
# delete from a list when added to a specific type of list
# 2) get the list of ngrams of one miamList: for this step see above
# Use the ngramList function in ngram.lists.py for that
# TODO DB query
return Response(annotationDict)
......@@ -17,11 +17,35 @@ def get_team():
'''
team = [
{ 'first_name' : 'Alexandre', 'last_name' : 'Delanoë', 'mail' : 'alexandre+gargantextATdelanoe.org', 'website' : 'http://alexandre.delanoe.org', 'picture' : 'alexandre.jpg', 'role' : 'project manager, scientific board, developer'},
{ 'first_name' : 'David', 'last_name' : 'Chavalarias', 'mail' : 'david.chavalariasATiscpif.fr', 'website' : 'http://chavalarias.com', 'picture' : 'david.jpg', 'role':'scientific board'},
{ 'first_name' : 'Mathieu', 'last_name' : 'Rodic', 'mail' : '', 'website' : 'http://rodic.fr', 'picture' : 'mathieu.jpg', 'role' : 'developer'},
{ 'first_name' : 'Samuel', 'last_name' : 'Castillo J.', 'mail' : 'kaisleanATgmail.com', 'website' : 'http://www.pksm3.droppages.com', 'picture' : 'samuel.jpg', 'role' : 'developer'},
{ 'first_name' : 'Elias', 'last_name' : 'Showk', 'mail' : '', 'website' : 'https://github.com/elishowk', 'picture' : '', 'role' : 'developer'},
{ 'first_name' : 'Alexandre', 'last_name' : 'Delanoë',
'mail' : 'alexandre+gargantextATdelanoe.org',
'website' : 'http://alexandre.delanoe.org',
'picture' : 'alexandre.jpg',
'role' : 'project investigator, developer'},
{ 'first_name' : 'David', 'last_name' : 'Chavalarias',
'mail' : 'david.chavalariasATiscpif.fr',
'website' : 'http://chavalarias.com',
'picture' : 'david.jpg',
'role':'project investigator'},
{ 'first_name' : 'Mathieu', 'last_name' : 'Rodic',
'mail' : '',
'website' : 'http://rodic.fr',
'picture' : 'mathieu.jpg',
'role' : 'developer'},
{ 'first_name' : 'Samuel', 'last_name' : 'Castillo J.',
'mail' : 'kaisleanATgmail.com',
'website' : 'http://www.pksm3.droppages.com',
'picture' : 'samuel.jpg',
'role' : 'developer'},
{ 'first_name' : 'Elias', 'last_name' : 'Showk',
'mail' : '',
'website' : 'https://github.com/elishowk',
'picture' : '', 'role' : 'developer'},
#{ 'first_name' : '', 'name' : '', 'mail' : '', 'website' : '', 'picture' : ''},
# copy paste the line above and write your informations please
]
......
......@@ -11,7 +11,6 @@ from gargantext_web.views import move_to_trash
from gargantext_web.db import *
from node import models
def DebugHttpResponse(data):
return HttpResponse('<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), ))
......@@ -44,20 +43,17 @@ _ngrams_order_columns = {
}
from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework.exceptions import APIException as _APIException
class APIException(_APIException):
def __init__(self, message, code=500):
self.status_code = code
self.detail = message
_operators = {
"=": lambda field, value: (field == value),
"!=": lambda field, value: (field != value),
......@@ -71,6 +67,7 @@ _operators = {
}
from rest_framework.decorators import api_view
@api_view(('GET',))
def Root(request, format=None):
return Response({
......@@ -78,7 +75,6 @@ def Root(request, format=None):
'snippets': reverse('snippet-list', request=request, format=format)
})
class NodesChildrenNgrams(APIView):
def get(self, request, node_id):
......@@ -121,7 +117,6 @@ class NodesChildrenNgrams(APIView):
],
})
class NodesChildrenDuplicates(APIView):
def _fetch_duplicates(self, request, node_id, extra_columns=None, min_count=1):
......@@ -210,8 +205,6 @@ class NodesChildrenDuplicates(APIView):
'deleted': count
})
class NodesChildrenMetatadata(APIView):
def get(self, request, node_id):
......@@ -271,8 +264,6 @@ class NodesChildrenMetatadata(APIView):
'data': collection,
})
class NodesChildrenQueries(APIView):
def _parse_filter(self, filter):
......@@ -551,8 +542,6 @@ class NodesChildrenQueries(APIView):
"results": results,
}, 201)
class NodesList(APIView):
authentication_classes = (SessionAuthentication, BasicAuthentication)
......@@ -573,7 +562,6 @@ class NodesList(APIView):
for node in query.all()
]})
class Nodes(APIView):
def get(self, request, node_id):
......@@ -609,7 +597,6 @@ class Nodes(APIView):
except Exception as error:
msgres ="error deleting : " + node_id + str(error)
class CorpusController:
@classmethod
......@@ -665,3 +652,39 @@ class CorpusController:
)
else:
raise ValidationError('Unrecognized "format=%s", should be "csv" or "json"' % (format, ))
from ngram.lists import listIds, ngramList
class ListManagement(APIView):
#authentication_classes = (SessionAuthentication, BasicAuthentication)
# TODO: Be carefull need authentication!
def get(self, request, corpus_id):
user_id = session.query(User.id).filter(User.username==str(request.user)).first()[0]
lists = dict()
for list_type in ['MiamList', 'StopList']:
list_id = list()
list_id = listIds(user_id=user_id, corpus_id=int(corpus_id), typeList=list_type)
lists[list_type] = int(list_id[0][0])
# lists[list_type]['id']['name'] = r[0][1]
return JsonHttpResponse({
'MiamList' : lists['MiamList'],
'StopList' : lists['StopList']
})
def post(self, request, corpus_id):
list_id = request.POST.get('list_id')
ngram_ids = request.POST.get('ngram_ids')
ngramList(do='add', ngram_ids=ngram_ids, list_id=list_id)
def delete(self, request, corpus_id):
list_id = request.POST.get('list_id')
ngram_ids = request.POST.get('ngram_ids')
ngramList(do='del', ngram_ids=ngram_ids, list_id=list_id)
......@@ -64,6 +64,8 @@ urlpatterns = patterns('',
url(r'^api/nodes/(\d+)/children/duplicates$', gargantext_web.api.NodesChildrenDuplicates.as_view()),
# url(r'^api/nodes/(\d+)/children/duplicates/delete$', gargantext_web.api.NodesChildrenDuplicates.delete ),
url(r'^api/corpus/(\d+)/lists$', gargantext_web.api.ListManagement.as_view()),
url(r'^api/nodes/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),
url(r'^annotations/', include(annotations_urls)),
......
......@@ -556,7 +556,11 @@ def corpus_csv(request, project_id, corpus_id):
type_document_id = cache.NodeType['Document'].id
documents = session.query(Node).filter(Node.parent_id==corpus_id, Node.type_id==type_document_id).all()
keys = list(documents[0].hyperdata.keys())
keys_list = list()
for d in documents[:10]:
keys_list += d.hyperdata.keys()
keys = list(set(keys_list))
writer.writerow(keys)
for doc in documents:
......
#
from admin.env import *
from admin.utils import PrintException
#from gargantext_web.settings import
#
#
## Django models
##from node import models
#
## SQLA models
from gargantext_web.db import *
################################################################################
## If you need to reset all data
## use : ./manage.py flush
################################################################################
################################################################################
print('Initialize hyperdata...')
################################################################################
hyperdata = {
'publication_date': 'datetime',
'authors': 'string',
'language_fullname': 'string',
'abstract': 'text',
'title': 'string',
'source': 'string',
'volume': 'string',
'text': 'text',
'page': 'string',
'doi': 'string',
'journal': 'string',
}
for name_, type_ in hyperdata.items():
data = (session.query(Hyperdata).filter(
Hyperdata.name == str(name_),
Hyperdata.type == str(type_)
).first()
)
if hyperdata is None:
print('Hyper Data' + name + 'does not existe, creating it')
hyperdata = Hyperdata(name=name, type=type_name)
session.add(hyperdata)
session.commit()
## Integration: languages
##
################################################################################
print('Initialize languages...')
################################################################################
import pycountry
##Language.objects.all().delete()
for language in pycountry.languages:
pass
if 'alpha2' in language.__dict__:
lang = Language(
iso2 = language.alpha2,
iso3 = language.bibliographic,
fullname = language.name,
implemented = True if language.alpha2 in ['en', 'fr'] else False
)
l = session.query(Language).filter(Language.iso2 == lang.iso2).first()
if l is None:
session.add(lang)
session.commit()
################################################################################
print('Initialize users...')
################################################################################
gargantua = session.query(User).filter(User.username=='gargantua').first()
if gargantua is None:
from node.models import User as U
gargantua = U()
gargantua.username = 'gargantua'
# Read specific email address here:
gargantua.email = 'contact@gargantext.org'
gargantua.active_user = True
password = U.objects.make_random_password()
print('Gargantua, password: ', password)
gargantua.set_password(password)
gargantua.save()
################################################################################
print('Initialize node types...')
################################################################################
node_types = [
'Root', 'Trash',
'Project', 'Corpus', 'Document',
'MiamList', 'StopList', 'MainList',
'Stem', 'Lem', 'Group', 'Tfidf',
'Cooccurrence', 'WhiteList', 'BlackList'
]
for node_type in node_types:
nt = NodeType(name=node_type)
if session.query(NodeType).filter(NodeType.name==nt.name).first() is None:
session.add(nt)
session.commit()
################################################################################
print('Initialize main nodes...')
################################################################################
nodes = []
node_root = Node(user_id=gargantua.id, type_id=cache.NodeType['Root'].id, name='Root')
nodes.append(node_root)
node_stem = Node(user_id=gargantua.id, type_id=cache.NodeType['Stem'].id, name='Stem', parent_id=node_root.id)
nodes.append(node_stem)
node_lem = Node(user_id=gargantua.id, type_id=cache.NodeType['Lem'].id, name='Lem', parent_id=node_root.id)
nodes.append(node_lem)
for node in nodes:
if session.query(Node).filter(Node.name==node.name, Node.user_id==node.user_id).first() is None:
session.add(node)
session.commit()
################################################################################
print('Initialize resource...')
################################################################################
from parsing.parsers_config import parsers
for parser in parsers.keys():
resource = ResourceType(name=parser)
if session.query(ResourceType).filter(ResourceType.name==resource.name).first() is None:
session.add(resource)
session.commit()
################################################################################
#### Instantiante table NgramTag:
################################################################################
###f = open("part_of_speech_labels.txt", 'r')
###
###for line in f.readlines():
### name, description = line.strip().split('\t')
### _tag = Tag(name=name, description=description)
### session.add(_tag)
###session.commit()
###
###f.close()
##
##
#exit()
# to be executed like this:
# ./manage.py shell < init.py
#NodeType.objects.all().delete()
from node.models import *
import pycountry
for language in pycountry.languages:
try:
implemented = 1 if language.alpha2 in ['en', 'fr'] else 0
Language(iso2=language.alpha2, iso3=language.terminology, fullname=language.name, implemented=implemented).save()
except:
pass
english = Language.objects.get(iso2='en')
french = Language.objects.get(iso2='fr')
try:
me = User.objects.get(username='pksm3')
except:
me = User(username='pksm3')
me.save()
for node_type in ['Trash', 'Root', ]:
NodeType.objects.get_or_create(name=node_type)
try:
typeProject = NodeType.objects.get(name='Project')
except Exception as error:
print(error)
typeProject = NodeType(name='Project')
typeProject.save()
try:
typeCorpus = NodeType.objects.get(name='Corpus')
except Exception as error:
print(error)
typeCorpus = NodeType(name='Corpus')
typeCorpus.save()
try:
typeDoc = NodeType.objects.get(name='Document')
except Exception as error:
print(error)
typeDoc = NodeType(name='Document')
typeDoc.save()
try:
typeStem = NodeType.objects.get(name='Stem')
except Exception as error:
print(error)
typeStem = NodeType(name='Stem')
typeStem.save()
try:
typeTfidf = NodeType.objects.get(name='Tfidf')
except Exception as error:
print(error)
typeTfidf = NodeType(name='Tfidf')
typeTfidf.save()
try:
typeDoc = NodeType.objects.get(name='WhiteList')
except Exception as error:
print(error)
typeDoc = NodeType(name='WhiteList')
typeDoc.save()
try:
typeDoc = NodeType.objects.get(name='BlackList')
except Exception as error:
print(error)
typeDoc = NodeType(name='BlackList')
typeDoc.save()
try:
typeDoc = NodeType.objects.get(name='Synonyme')
except Exception as error:
print(error)
typeDoc = NodeType(name='Synonyme')
typeDoc.save()
try:
typeDoc = NodeType.objects.get(name='Cooccurrence')
except Exception as error:
print(error)
typeDoc = NodeType(name='Cooccurrence')
typeDoc.save()
# In[33]:
from parsing.parsers_config import parsers
ResourceType.objects.all().delete()
for key in parsers.keys():
try:
ResourceType.objects.get_or_create(name=key)
except Exception as error:
print("Ressource Error: ", error)
# In[34]:
#Node.objects.all().delete()
try:
stem = Node.objects.get(name='Stem')
except:
stem = Node(name='Stem', type=typeStem, user=me)
stem.save()
from gargantext_web.db import *
# Instantiante table NgramTag:
f = open("part_of_speech_labels.txt", 'r')
for line in f.readlines():
name, description = line.strip().split('\t')
_tag = Tag(name=name, description=description)
session.add(_tag)
session.commit()
f.close()
#!/bin/bash
psql -d gargandb -f init.sql
sleep 2
../manage.py syncdb
psql -d gargandb -f init2.sql
sleep 2
#../manage.py shell < init.py
../manage.py shell < init_gargantext.py
#psql -d gargandb -f hstore2jsonb.sql
# Without this, we couldn't use the Django environment
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")
# We're gonna use all the models!
# Django models
from node import models
# SQLA models
from gargantext_web.db import *
# Reset: all data
#
#tables_to_empty = [
# Node,
# Node_Hyperdata,
# Hyperdata,
# NodeType,
# ResourceType,
# Resource,
#]
#for table in tables_to_empty:
# print('Empty table "%s"...' % (table._meta.db_table, ))
# table.objects.all().delete()
# Integration: hyperdata types
print('Initialize hyperdata...')
hyperdata = {
'publication_date': 'datetime',
'authors': 'string',
'language_fullname': 'string',
'abstract': 'text',
'title': 'string',
'source': 'string',
'volume': 'string',
'text': 'text',
'page': 'string',
'doi': 'string',
'journal': 'string',
}
for name, type_name in hyperdata.items():
models.Hyperdata(name=name, type=type_name).save()
# Integration: languages
print('Initialize languages...')
import pycountry
Language.objects.all().delete()
for language in pycountry.languages:
if 'alpha2' in language.__dict__:
models.Language(
iso2 = language.alpha2,
iso3 = language.bibliographic,
fullname = language.name,
implemented = 1 if language.alpha2 in ['en', 'fr'] else 0,
).save()
# Integration: users
<<<<<<< Updated upstream
print('Initialize users...')
me = models.User.objects.get_or_create(username='alexandre')
gargantua, created = models.User.objects.get_or_create(username='gargantua')
node_root = Node(user_id=gargantua.id, type_id=cache.NodeType['Root'].id, name='Root')
node_stem = Node(user_id=gargantua.id, type_id=cache.NodeType['Stem'].id, name='Stem', parent_id=node_root.id)
node_lem = Node(user_id=gargantua.id, type_id=cache.NodeType['Lem'].id, name='Lem', parent_id=node_root.id)
session.add(node_root)
session.add(node_stem)
session.add(node_lem)
session.commit()
# Integration: node types
print('Initialize node types...')
node_types = [
'Root', 'Trash',
'Project', 'Corpus', 'Document',
'MiamList', 'StopList', 'MainList',
'Stem', 'Lem', 'Group', 'Tfidf',
'Cooccurrence', 'WhiteList', 'BlackList'
]
for node_type in node_types:
models.NodeType.objects.get_or_create(name=node_type)
# Integration: resource types
print('Initialize users...')
me = session.query(User).filter(User.username=='alexandre').first()
gargantua = session.query(User).filter(User.username=='gargantua').first()
node_root = Node(user_id=gargantua.id, type_id=cache.NodeType['Root'].id, name='Root')
node_stem = Node(user_id=gargantua.id, type_id=cache.NodeType['Stem'].id, name='Stem', parent_id=node_root.id)
node_lem = Node(user_id=gargantua.id, type_id=cache.NodeType['Lem'].id, name='Lem', parent_id=node_root.id)
session.add(node_root)
session.add(node_stem)
session.add(node_lem)
session.commit()
print('Initialize resource...')
from parsing.parsers_config import parsers
for parser in parsers.keys():
models.ResourceType.objects.get_or_create(name=parser)
# Instantiante table NgramTag:
f = open("part_of_speech_labels.txt", 'r')
for line in f.readlines():
name, description = line.strip().split('\t')
_tag = Tag(name=name, description=description)
session.add(_tag)
session.commit()
f.close()
exit()
......@@ -136,16 +136,22 @@ def ngramList(do=None, ngram_ids=[], list_id=None) :
)
for ngram_id in ngram_ids:
# First we test to know if ngram exist in database already
#ngram = (session.query(Ngram).filter(Ngram.id == ngram_id).first()
# Need to be optimized with list of ids
ngram = (session.query(NodeNgram)
node_ngram = (session.query(NodeNgram)
.filter(NodeNgram.ngram_id == ngram_id)
.filter(NodeNgram.node_id == list_id)
.first()
)
if do == 'add':
session.add(ngram)
elif do == 'del':
session.delete(ngram)
if node_ngram is None :
node_ngram = NodeNgram(node_id = list_id,
ngram_id=ngram_id,
weight=1)
if do == 'add' :
session.add(node_ngram)
elif do == 'del' :
session.delete(node_ngram)
session.commit()
return(True)
......
......@@ -36,9 +36,6 @@ if project is None:
corpus = session.query(Node).filter(Node.parent_id == project.id,
Node.type_id == cache.NodeType['Corpus'].id).first()
doc_id = session.query(Node.id).filter(Node.parent_id == corpus.id,
Node.type_id == cache.NodeType['Document'].id).all()[1]
if corpus is None:
corpus = Node(
parent_id = project.id,
......@@ -59,6 +56,9 @@ if corpus is None:
extract_ngrams(corpus, ('title', 'abstract'))
compute_tfidf(corpus)
doc_id = session.query(Node.id).filter(Node.parent_id == corpus.id,
Node.type_id == cache.NodeType['Document'].id).all()[1]
print('Miam list', listIds(typeList='MiamList', corpus_id=corpus.id, user_id=user.id)[0][0])
# Stemming the corpus
......@@ -66,17 +66,40 @@ print('Working on corpus:', corpus.id, corpus.name)
stem_id = stem_corpus(corpus_id=corpus.id)
print('Stem Node.id is', stem_id)
for typeList in ['MiamList', 'StopList', 'MainList', 'GroupList']:
for typeList in ['MiamList', 'StopList', 'MainList', 'Group']:
n = listIds(user_id=user.id,
corpus_id=corpus.id,
typeList=typeList)
#print(n[0][0])
print('Test having list_id')
print(n, listNgramIds(list_id=n[0][0])[:3])
stop_list_id = listIds(user_id=user.id,
corpus_id=corpus.id,
typeList='StopList')[0][0]
miam_list_id = listIds(user_id=user.id,
corpus_id=corpus.id,
typeList='MiamList')[0][0]
print('Stop List', stop_list_id)
print('Miam List', miam_list_id)
ngram_id = listNgramIds(list_id=miam_list_id)[0][0]
print('ngram_id', ngram_id)
ngramList(do='add', ngram_ids=[ngram_id,], list_id=stop_list_id)
#
print('Test having typeList and corpus.id')
print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, user_id=user.id)[:3])
#
# print('Test having typeList and corpus.id')
# print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, user_id=user.id)[:3])
##
# print('Test having typeList and corpus.id and doc_id')
# print(n, listNgramIds(typeList=typeList, corpus_id=corpus.id, doc_id=doc_id, user_id=user.id)[:3])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment