Commit d61d3cea authored by Administrator's avatar Administrator

[FEAT] Init of Gargantext cleaned

	nouveau fichier : init.py
	This file has to be executed in environment to init fresh
	installation or to get last NodeTypes. Execute it after big
	merges please.

	supprimé :        init/init.py
	File useless now.

	supprimé :        init/init.sh
	File useless now.

	supprimé :        init/init_gargantext.py
	File useless now.

	modifié :         test-list-management.py
	test file that will evolve towards a big test of all the
	workflow
parent 0348908c
#
from admin.env import *
from admin.utils import PrintException
#from gargantext_web.settings import
#
#
## Django models
##from node import models
#
## SQLA models
from gargantext_web.db import *
################################################################################
## If you need to reset all data
## use : ./manage.py flush
################################################################################
################################################################################
print('Initialize hyperdata...')
################################################################################
hyperdata = {
'publication_date': 'datetime',
'authors': 'string',
'language_fullname': 'string',
'abstract': 'text',
'title': 'string',
'source': 'string',
'volume': 'string',
'text': 'text',
'page': 'string',
'doi': 'string',
'journal': 'string',
}
for name_, type_ in hyperdata.items():
data = (session.query(Hyperdata).filter(
Hyperdata.name == str(name_),
Hyperdata.type == str(type_)
).first()
)
if hyperdata is None:
print('Hyper Data' + name + 'does not existe, creating it')
hyperdata = Hyperdata(name=name, type=type_name)
session.add(hyperdata)
session.commit()
## Integration: languages
##
################################################################################
print('Initialize languages...')
################################################################################
import pycountry
##Language.objects.all().delete()
for language in pycountry.languages:
pass
if 'alpha2' in language.__dict__:
lang = Language(
iso2 = language.alpha2,
iso3 = language.bibliographic,
fullname = language.name,
implemented = True if language.alpha2 in ['en', 'fr'] else False
)
l = session.query(Language).filter(Language.iso2 == lang.iso2).first()
if l is None:
session.add(lang)
session.commit()
################################################################################
print('Initialize users...')
################################################################################
gargantua = session.query(User).filter(User.username=='gargantua').first()
if gargantua is None:
from node.models import User as U
gargantua = U()
gargantua.username = 'gargantua'
# Read specific email address here:
gargantua.email = 'contact@gargantext.org'
gargantua.active_user = True
password = U.objects.make_random_password()
print('Gargantua, password: ', password)
gargantua.set_password(password)
gargantua.save()
################################################################################
print('Initialize node types...')
################################################################################
node_types = [
'Root', 'Trash',
'Project', 'Corpus', 'Document',
'MiamList', 'StopList', 'MainList',
'Stem', 'Lem', 'Group', 'Tfidf',
'Cooccurrence', 'WhiteList', 'BlackList'
]
for node_type in node_types:
nt = NodeType(name=node_type)
if session.query(NodeType).filter(NodeType.name==nt.name).first() is None:
session.add(nt)
session.commit()
################################################################################
print('Initialize main nodes...')
################################################################################
nodes = []
node_root = Node(user_id=gargantua.id, type_id=cache.NodeType['Root'].id, name='Root')
nodes.append(node_root)
node_stem = Node(user_id=gargantua.id, type_id=cache.NodeType['Stem'].id, name='Stem', parent_id=node_root.id)
nodes.append(node_stem)
node_lem = Node(user_id=gargantua.id, type_id=cache.NodeType['Lem'].id, name='Lem', parent_id=node_root.id)
nodes.append(node_lem)
for node in nodes:
if session.query(Node).filter(Node.name==node.name, Node.user_id==node.user_id).first() is None:
session.add(node)
session.commit()
################################################################################
print('Initialize resource...')
################################################################################
from parsing.parsers_config import parsers
for parser in parsers.keys():
resource = ResourceType(name=parser)
if session.query(ResourceType).filter(ResourceType.name==resource.name).first() is None:
session.add(resource)
session.commit()
################################################################################
#### Instantiante table NgramTag:
################################################################################
###f = open("part_of_speech_labels.txt", 'r')
###
###for line in f.readlines():
### name, description = line.strip().split('\t')
### _tag = Tag(name=name, description=description)
### session.add(_tag)
###session.commit()
###
###f.close()
##
##
#exit()
# to be executed like this:
# ./manage.py shell < init.py
#NodeType.objects.all().delete()
from node.models import *
import pycountry
for language in pycountry.languages:
try:
implemented = 1 if language.alpha2 in ['en', 'fr'] else 0
Language(iso2=language.alpha2, iso3=language.terminology, fullname=language.name, implemented=implemented).save()
except:
pass
english = Language.objects.get(iso2='en')
french = Language.objects.get(iso2='fr')
try:
me = User.objects.get(username='pksm3')
except:
me = User(username='pksm3')
me.save()
for node_type in ['Trash', 'Root', ]:
NodeType.objects.get_or_create(name=node_type)
try:
typeProject = NodeType.objects.get(name='Project')
except Exception as error:
print(error)
typeProject = NodeType(name='Project')
typeProject.save()
try:
typeCorpus = NodeType.objects.get(name='Corpus')
except Exception as error:
print(error)
typeCorpus = NodeType(name='Corpus')
typeCorpus.save()
try:
typeDoc = NodeType.objects.get(name='Document')
except Exception as error:
print(error)
typeDoc = NodeType(name='Document')
typeDoc.save()
try:
typeStem = NodeType.objects.get(name='Stem')
except Exception as error:
print(error)
typeStem = NodeType(name='Stem')
typeStem.save()
try:
typeTfidf = NodeType.objects.get(name='Tfidf')
except Exception as error:
print(error)
typeTfidf = NodeType(name='Tfidf')
typeTfidf.save()
try:
typeDoc = NodeType.objects.get(name='WhiteList')
except Exception as error:
print(error)
typeDoc = NodeType(name='WhiteList')
typeDoc.save()
try:
typeDoc = NodeType.objects.get(name='BlackList')
except Exception as error:
print(error)
typeDoc = NodeType(name='BlackList')
typeDoc.save()
try:
typeDoc = NodeType.objects.get(name='Synonyme')
except Exception as error:
print(error)
typeDoc = NodeType(name='Synonyme')
typeDoc.save()
try:
typeDoc = NodeType.objects.get(name='Cooccurrence')
except Exception as error:
print(error)
typeDoc = NodeType(name='Cooccurrence')
typeDoc.save()
# In[33]:
from parsing.parsers_config import parsers
ResourceType.objects.all().delete()
for key in parsers.keys():
try:
ResourceType.objects.get_or_create(name=key)
except Exception as error:
print("Ressource Error: ", error)
# In[34]:
#Node.objects.all().delete()
try:
stem = Node.objects.get(name='Stem')
except:
stem = Node(name='Stem', type=typeStem, user=me)
stem.save()
from gargantext_web.db import *
# Instantiante table NgramTag:
f = open("part_of_speech_labels.txt", 'r')
for line in f.readlines():
name, description = line.strip().split('\t')
_tag = Tag(name=name, description=description)
session.add(_tag)
session.commit()
f.close()
#!/bin/bash
psql -d gargandb -f init.sql
sleep 2
../manage.py syncdb
psql -d gargandb -f init2.sql
sleep 2
#../manage.py shell < init.py
../manage.py shell < init_gargantext.py
#psql -d gargandb -f hstore2jsonb.sql
# Without this, we couldn't use the Django environment
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")
# We're gonna use all the models!
# Django models
from node import models
# SQLA models
from gargantext_web.db import *
# Reset: all data
#
#tables_to_empty = [
# Node,
# Node_Hyperdata,
# Hyperdata,
# NodeType,
# ResourceType,
# Resource,
#]
#for table in tables_to_empty:
# print('Empty table "%s"...' % (table._meta.db_table, ))
# table.objects.all().delete()
# Integration: hyperdata types
print('Initialize hyperdata...')
hyperdata = {
'publication_date': 'datetime',
'authors': 'string',
'language_fullname': 'string',
'abstract': 'text',
'title': 'string',
'source': 'string',
'volume': 'string',
'text': 'text',
'page': 'string',
'doi': 'string',
'journal': 'string',
}
for name, type_name in hyperdata.items():
models.Hyperdata(name=name, type=type_name).save()
# Integration: languages
print('Initialize languages...')
import pycountry
Language.objects.all().delete()
for language in pycountry.languages:
if 'alpha2' in language.__dict__:
models.Language(
iso2 = language.alpha2,
iso3 = language.bibliographic,
fullname = language.name,
implemented = 1 if language.alpha2 in ['en', 'fr'] else 0,
).save()
# Integration: users
<<<<<<< Updated upstream
print('Initialize users...')
me = models.User.objects.get_or_create(username='alexandre')
gargantua, created = models.User.objects.get_or_create(username='gargantua')
node_root = Node(user_id=gargantua.id, type_id=cache.NodeType['Root'].id, name='Root')
node_stem = Node(user_id=gargantua.id, type_id=cache.NodeType['Stem'].id, name='Stem', parent_id=node_root.id)
node_lem = Node(user_id=gargantua.id, type_id=cache.NodeType['Lem'].id, name='Lem', parent_id=node_root.id)
session.add(node_root)
session.add(node_stem)
session.add(node_lem)
session.commit()
# Integration: node types
print('Initialize node types...')
node_types = [
'Root', 'Trash',
'Project', 'Corpus', 'Document',
'MiamList', 'StopList', 'MainList',
'Stem', 'Lem', 'Group', 'Tfidf',
'Cooccurrence', 'WhiteList', 'BlackList'
]
for node_type in node_types:
models.NodeType.objects.get_or_create(name=node_type)
# Integration: resource types
print('Initialize users...')
me = session.query(User).filter(User.username=='alexandre').first()
gargantua = session.query(User).filter(User.username=='gargantua').first()
node_root = Node(user_id=gargantua.id, type_id=cache.NodeType['Root'].id, name='Root')
node_stem = Node(user_id=gargantua.id, type_id=cache.NodeType['Stem'].id, name='Stem', parent_id=node_root.id)
node_lem = Node(user_id=gargantua.id, type_id=cache.NodeType['Lem'].id, name='Lem', parent_id=node_root.id)
session.add(node_root)
session.add(node_stem)
session.add(node_lem)
session.commit()
print('Initialize resource...')
from parsing.parsers_config import parsers
for parser in parsers.keys():
models.ResourceType.objects.get_or_create(name=parser)
# Instantiante table NgramTag:
f = open("part_of_speech_labels.txt", 'r')
for line in f.readlines():
name, description = line.strip().split('\t')
_tag = Tag(name=name, description=description)
session.add(_tag)
session.commit()
f.close()
exit()
...@@ -36,9 +36,6 @@ if project is None: ...@@ -36,9 +36,6 @@ if project is None:
corpus = session.query(Node).filter(Node.parent_id == project.id, corpus = session.query(Node).filter(Node.parent_id == project.id,
Node.type_id == cache.NodeType['Corpus'].id).first() Node.type_id == cache.NodeType['Corpus'].id).first()
doc_id = session.query(Node.id).filter(Node.parent_id == corpus.id,
Node.type_id == cache.NodeType['Document'].id).all()[1]
if corpus is None: if corpus is None:
corpus = Node( corpus = Node(
parent_id = project.id, parent_id = project.id,
...@@ -59,6 +56,9 @@ if corpus is None: ...@@ -59,6 +56,9 @@ if corpus is None:
extract_ngrams(corpus, ('title', 'abstract')) extract_ngrams(corpus, ('title', 'abstract'))
compute_tfidf(corpus) compute_tfidf(corpus)
doc_id = session.query(Node.id).filter(Node.parent_id == corpus.id,
Node.type_id == cache.NodeType['Document'].id).all()[1]
print('Miam list', listIds(typeList='MiamList', corpus_id=corpus.id, user_id=user.id)[0][0]) print('Miam list', listIds(typeList='MiamList', corpus_id=corpus.id, user_id=user.id)[0][0])
# Stemming the corpus # Stemming the corpus
...@@ -66,7 +66,7 @@ print('Working on corpus:', corpus.id, corpus.name) ...@@ -66,7 +66,7 @@ print('Working on corpus:', corpus.id, corpus.name)
stem_id = stem_corpus(corpus_id=corpus.id) stem_id = stem_corpus(corpus_id=corpus.id)
print('Stem Node.id is', stem_id) print('Stem Node.id is', stem_id)
for typeList in ['MiamList', 'StopList', 'MainList', 'GroupList']: for typeList in ['MiamList', 'StopList', 'MainList', 'Group']:
n = listIds(user_id=user.id, n = listIds(user_id=user.id,
corpus_id=corpus.id, corpus_id=corpus.id,
typeList=typeList) typeList=typeList)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment