# Without this, we couldn't use the Django environment

import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")


# We're gonna use all the models!

from node.models import *


# Reset: all data

tables_to_empty = [
    Node,
    Node_Metadata,
    Metadata,
    NodeType,
    ResourceType,
    Resource,
]
for table in tables_to_empty:
    print('Empty table "%s"...' % (table._meta.db_table, ))
    table.objects.all().delete()


# Integration: metadata types

print('Initialize metadata...')
metadata = {
    'publication_date': 'datetime',
    'authors': 'string',
    'language_fullname': 'string',
    'abstract': 'text',
    'title': 'string',
    'source': 'string',
    'volume': 'string',
    'text': 'text',
    'page': 'string',
    'doi': 'string',
    'journal': 'string',
}
for name, type in metadata.items():
    Metadata(name=name, type=type).save()


# Integration: languages

print('Initialize languages...')
import pycountry
Language.objects.all().delete()
for language in pycountry.languages:
    if 'alpha2' in language.__dict__:
        Language(
            iso2 = language.alpha2,
            iso3 = language.bibliographic,
            fullname = language.name,
            implemented = 1 if language.alpha2 in ['en', 'fr'] else 0,
        ).save()

english = Language.objects.get(iso2='en')
french  = Language.objects.get(iso2='fr')


# Integration: users

print('Initialize users...')
try:
    me = User.objects.get(username='alexandre')
except:
    me = User(username='alexandre')
    me.save()


# Integration: node types

print('Initialize node types...')

try:
    typeProject = NodeType.objects.get(name='Root')
except Exception as error:
    print(error)
    typeProject = NodeType(name='Root')
    typeProject.save()  

try:
    typeProject = NodeType.objects.get(name='Project')
except Exception as error:
    print(error)
    typeProject = NodeType(name='Project')
    typeProject.save()  

try:
    typeCorpus  = NodeType.objects.get(name='Corpus')
except Exception as error:
    print(error)
    typeCorpus  = NodeType(name='Corpus')
    typeCorpus.save()
    
try:
    typeDoc     = NodeType.objects.get(name='Document')
except Exception as error:
    print(error)
    typeDoc     = NodeType(name='Document')
    typeDoc.save()

try:
    typeStem     = NodeType.objects.get(name='Stem')
except Exception as error:
    print(error)
    typeStem     = NodeType(name='Stem')
    typeStem.save()

try:
    typeTfidf     = NodeType.objects.get(name='Tfidf')
except Exception as error:
    print(error)
    typeTfidf     = NodeType(name='Tfidf')
    typeTfidf.save()

try:
    typeDoc     = NodeType.objects.get(name='WhiteList')
except Exception as error:
    print(error)
    typeDoc     = NodeType(name='WhiteList')
    typeDoc.save()

try:
    typeDoc     = NodeType.objects.get(name='BlackList')
except Exception as error:
    print(error)
    typeDoc     = NodeType(name='BlackList')
    typeDoc.save()

try:
    typeDoc     = NodeType.objects.get(name='Synonyme')
except Exception as error:
    print(error)
    typeDoc     = NodeType(name='Synonyme')
    typeDoc.save()

try:
    typeDoc     = NodeType.objects.get(name='Cooccurrence')
except Exception as error:
    print(error)
    typeDoc     = NodeType(name='Cooccurrence')
    typeDoc.save()



# Integration: resource types

print('Initialize resource...')
try:
    typePubmed      = ResourceType.objects.get(name='pubmed')
    typeIsi         = ResourceType.objects.get(name='isi')
    typeRis         = ResourceType.objects.get(name='ris')
    typePresseFr    = ResourceType.objects.get(name='europress_french')
    typePresseEn    = ResourceType.objects.get(name='europress_english')

except Exception as error:
    print(error)
    
    typePubmed = ResourceType(name='pubmed')
    typePubmed.save()  
    
    typeIsi    = ResourceType(name='isi')
    typeIsi.save()
    
    typeRis    = ResourceType(name='ris')
    typeRis.save()
    
    typePresseFr = ResourceType(name='europress_french')
    typePresseFr.save()
    
    typePresseEn = ResourceType(name='europress_english')
    typePresseEn.save()

# Integration Node Stem
try:
    stem = Node.objects.get(name='Stem')
except:
    stem = Node(name='Stem', type=typeStem, user=me)
    stem.save()



# Integration: project

print('Initialize project...')
try:
    project = Node.objects.get(name='Bees project')
except:
    project = Node(name='Bees project', type=typeProject, user=me)
    project.save()


# Integration: corpus

print('Initialize corpus...')
try:
    corpus_pubmed = Node.objects.get(name='PubMed corpus')
except:
    corpus_pubmed = Node(parent=project, name='PubMed corpus', type=typeCorpus, user=me)
    corpus_pubmed.save()

print('Initialize resource...')
corpus_pubmed.add_resource(
    # file='./data_samples/pubmed.zip',
    #file='./data_samples/pubmed_2013-04-01_HoneyBeesBeeBees.xml',
    file='/srv/gargantext_lib/data_samples/pubmed.xml',
    type=typePubmed,
    user=me
)

for resource in corpus_pubmed.get_resources():
    print('Resource #%d - %s - %s' % (resource.id, resource.digest, resource.file))
    
print('Parse corpus #%d...' % (corpus_pubmed.id, ))
corpus_pubmed.parse_resources(verbose=True)
print('Extract corpus #%d...' % (corpus_pubmed.id, ))
corpus_pubmed.children.all().extract_ngrams(['title',])
print('Parsed corpus #%d.' % (corpus_pubmed.id, ))

exit()