from admin.utils import PrintException

from gargantext_web.db import NodeNgram
from gargantext_web.db import *
from parsing.corpustools import *

import sqlalchemy
from sqlalchemy.sql import func
from sqlalchemy import desc, asc, or_, and_, Date, cast, select
from sqlalchemy import literal_column
from sqlalchemy.orm import aliased


def listIds(typeList=None, user_id=None, corpus_id=None):
    '''
    nodeList : get or create NodeList.
    nodeList :: Integer -> Integer -> String -> [Node]
    user_id   :: Integer
    corpus_id :: Integer
    typeList  :: String, Type of the Node that should be created
    [Node]      :: List of Int, returned or created by the function
    '''
    if typeList is None:
        typeList = 'MiamList'

    if corpus_id is not None and user_id is not None:

        # Nodes are either in root_list or user_list
        root_list = ['Stem', 'Lem']
        user_list   = ['MiamList', 'StopList', 'MapList', 'Group']

        if typeList in user_list:
            nodes = session.query(Node).filter(
                                    Node.user_id == user_id,
                                    Node.parent_id==corpus_id,
                                    Node.type_id == cache.NodeType[typeList].id
                                    ).order_by(desc(Node.id)).all()
        elif typeList in root_list:
            nodes = session.query(Node).filter(
                                    Node.type_id == cache.NodeType[typeList].id
                                    ).order_by(desc(Node.id)).all()
        else:
            raise Exception("typeList %s not supported yet" % typeList)

        if nodes == []:
            node = Node(user_id = user_id,
                        parent_id=corpus_id,
                        type_id=cache.NodeType[typeList].id,
                        name="First default Node " + str(typeList))

            session.add(node)
            session.commit()
            return([(node.id, node.name),])
        else:
            return([(node.id, node.name) for node in nodes])

    else:
        raise Exception("Usage (Warning): Need corpus_id and user_id")

# Some functions to manage ngrams according to the lists

def listNgramIds(list_id=None, typeList=None,
                 corpus_id=None, doc_id=None, user_id=None):
    '''
    listNgramsIds :: Int | String, Int, Int, Int -> [(Int, String, Int)]
    return has types: [(ngram_id, ngram_terms, occurrences)]

    Return the list of tuples of
    ngram_id and its occurrences according to node_id level.

    list_id   : Node.id of the list expected
    typeList  : needed if no list_id, use typeList such as 'MiamList' or 'StopList'
    corpus_id : needed to get list_id
    doc_id    : to get specific ngrams related to a document with Node.id=doc_id
    user_id   : needed to create list if it does not exist
    '''
    if typeList is None:
        typeList = ['MiamList', 'StopList']
    elif isinstance(typeList, string):
        typeList = [typeList]

    if list_id is None and corpus_id is None:
        raise Exception('Need a listId or corpusId to query')

    if user_id is None:
        raise Exception("Need a user_id to create list if needed")

    # iterate over every list in a corpus
    try:
        allLists = []
        for aType in typeList:
            allLists += listIds(user_id=user_id, corpus_id=corpus_id, typeList=aType)
    except Exception as exc:
        PrintException()
        raise exc

    ListNgram = aliased(NodeNgram)
    or_args = [ListNgram.node_id == l[0] for l in allLists]
    query = (session.query(Ngram.id, Ngram.terms, func.sum(ListNgram.weight), ListNgram.node_id)
            .join(ListNgram, ListNgram.ngram_id == Ngram.id)
            .filter(or_(*or_args))
            .group_by(Ngram.id, ListNgram.node_id)
            )

    if doc_id is not None:
        Doc      = aliased(Node)
        DocNgram = aliased(NodeNgram)
        query = (query
                     .join(DocNgram, DocNgram.ngram_id == Ngram.id)
                     .join(Doc, Doc.id == doc_id)
                     .filter(DocNgram.node_id == Doc.id)
                )

    return(query.all())

def ngramList(do, list_id, ngram_ids=None) :
    '''
    ngramList :: ([Int], Int, String) -> Bool
    Do (delete | add) [ngram_id] (from | to) the list_id

    options:
        do        = String : action 'del' or 'add'
        ngram_id  = [Int]  : list of Ngrams id (Ngrams.id)
        list_id   = Int    : list id (Node.id)
    '''
    results = []

    if do == 'create':
        terms = copy(ngram_ids)
        ngram_ids = []
        for ngram_term in terms:
            # TODO set the language correctly
            ngram = Ngram.objects.get_or_create(terms=ngram_term, n=len(terms.split()),
                                                language='en')
            ngram_ids += [ngram.id]

    for ngram_id in ngram_ids:
        # Fetch the ngram from database
        ngram = session.query(Ngram.id, Ngram.terms, func.count()).filter(Ngram.id == ngram_id).first()
        # Need to be optimized with list of ids
        node_ngram = (session.query(NodeNgram)
                .filter(NodeNgram.ngram_id == ngram_id)
                .filter(NodeNgram.node_id  == list_id)
                .first()
                )
        # create NodeNgram if does not exists
        if node_ngram is None :
            node_ngram = NodeNgram(node_id = list_id, ngram_id=ngram_id,
                                    weight=1)
        if do == 'add' :
            session.add(node_ngram)
            results += [ngram]

        elif do == 'del' :
            session.delete(node_ngram)

    session.commit()
    return(results)

# Some functions to manage automatically the lists
def doStopList(user_id=None, corpus_id=None, stop_id=None, reset=False, limit=None):
    '''
    Compute automatically the stopList and returns its Node.id
    Algo: TODO tfidf according type of corpora
    '''

    if stop_id is None:
        stop_id = listNgramIds(user_id=user_id,
                            corpus_id=corpus_id,
                            typeList='StopList')[0]
    # according to type of corpus, choose the right default stopList

def ngrams2miam(user_id=None, corpus_id=None):
    '''
    Create a Miam List only
    '''

    miam_id = listIds(typeList='MiamList', user_id=user_id, corpus_id=corpus_id)[0][0]
    print(miam_id)

    query = (session.query(
                literal_column(str(miam_id)).label("node_id"),
                Ngram.id,
                func.count(),
                )
                .select_from(Ngram)
                .join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
                .join(Node, NodeNgram.node_id == Node.id)
                .filter(Node.parent_id == corpus_id)
                .filter(Node.type_id == cache.NodeType['Document'].id)

                .group_by(Ngram.id)
                #.limit(10)
                .all()
                )
    bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], query)

from gargantext_web.db import get_or_create_node
from analysis.lists import Translations, UnweightedList

def ngrams2miamBis(corpus):
    '''
    Create a Miam List only
    '''

    miam_id = get_or_create_node(corpus=corpus, nodetype='MiamList')
    stop_id = get_or_create_node(corpus=corpus,nodetype='StopList')

    query = (session.query(
                literal_column(str(miam_id)).label("node_id"),
                Ngram.id,
                func.count(),
                )
                .select_from(Ngram)
                .join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
                .join(Node, NodeNgram.node_id == Node.id)
                .filter(Node.parent_id == corpus_id)
                .filter(Node.type_id == cache.NodeType['Document'].id)

                .group_by(Ngram.id)
                #.limit(10)
                .all()
                )
    bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], query)

def doList(
        type_list='MiamList',
        user_id=None, corpus_id=None,
        miam_id=None, stop_id=None, main_id=None,
        lem_id=None, stem_id=None, cvalue_id=None, group_id=None,
        reset=True, limit=None
    ):
    '''
    Compute the miamList and returns its Node.id
    miamList = allList - stopList
    where:
        allList  = all Ngrams
        stopList = all Stop Ngrams

    OR

    Compute the mainList : main Forms
    mainList = miamList - (stem|lem|group|cvalue) List
    where:
        group   = Words grouped manually by user
        stem    = equivalent Words which are stemmed (but the main form)
        lem     = equivalent Words which are lemmatized (but the main form)
        cvalue  = equivalent N-Words according to C-Value (but the main form)
    '''

    if type_list not in ['MiamList', 'MainList']:
        raise Exception("Type List (%s) not supported, try: \'MiamList\' or \'MainList\'" % type_list)

    try:
        list_dict = {
            'miam' : { 'type' : 'MiamList', 'id' : miam_id},
            'stop' : { 'type' : 'StopList', 'id' : stop_id},
                    }

        if 'main' == type_list:
            list_dict.update(
            {
                'main' : { 'type' : 'MainList', 'id' : main_id},
                'stem' : { 'type' : 'Stem', 'id' : stem_id},
                #'lem' : { 'type' : 'LemList', 'id' : lem_id},
                #'group' : { 'type' : 'Group', 'id' : group_id},
            }
            )

        for list_ in list_dict.keys():
            if  list_dict[list_]['id'] is None:
                list_dict[list_]['id'] = listNgramIds(user_id=user_id,
                                        corpus_id=corpus_id,
                                        typeList=list_dict[list_]['type'])[0][0]
        # Delete previous List ?
        # By default, miamList is computed each time
        if reset is True:
            session.query(NodeNgram).filter(
                    NodeNgram.node_id == list_dict[type_list]['id']
                    ).delete()

    except:
        PrintException()

    stopNgram = aliased(NodeNgram)

    if type_list == 'MiamList' :
        query = (session.query(
                literal_column(str(list_dict['miam']['id'])).label("node_id"),
                Ngram.id,
                func.count(),
                )
                .select_from(Ngram)
                .join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
                .join(Node, NodeNgram.node_id == Node.id)
                .outerjoin(stopNgram,
                            and_(stopNgram.ngram_id == Ngram.id,
                                stopNgram.node_id == list_dict['stop']['id']))

                .filter(Node.parent_id == corpus_id)
                .filter(Node.type_id == cache.NodeType['Document'].id)
                .filter(stopNgram.id == None )

                .group_by(Ngram.id)
                )

    elif type_list == 'MainList' :
        # Query to get Ngrams for main list
        query = (session.query(
                literal_column(str(list_dict['main']['id'])).label("node_id"),
                Ngram.id,
                func.count(),
                )
                .select_from(Ngram)
                .join(NodeNgram, NodeNgram.ngram_id == Ngram.id)

                .filter(NodeNgram.node_id == list_dict['miam']['id'])
                )

        if stem_id is not None:
        # Query with Stems Result need to be checked before prod
            snn1   = aliased(NodeNgramNgram)
            snn2   = aliased(NodeNgramNgram)
            query = (query.outerjoin(snn1,
                          and_(snn1.ngramx_id == Ngram.id,
                               snn1.node_id   == list_dict['stem']['id']
                              )
                                    )
                          .outerjoin(snn2,
                          and_(snn1.ngramy_id == snn2.ngramy_id,
                               snn2.node_id   == list_dict['stem']['id'],
                               snn1.ngramx_id < snn2.ngramx_id
                              )
                                    )

                    .filter(snn2.id == None)
                    )
    # Specific group by:
    if stem_id is not None:
        query = query.group_by(Ngram.id, snn1.ngramx_id)
    else:
        query = query.group_by(Ngram.id)

    # here add filter for size of the ngram

    # Order result by occurrences descending
    query = query.order_by(desc(func.count()))
    # Adding specific filters
    if limit is not None:
        query = query.limit(limit)
    else:
        query = query.all()

    bulk_insert(NodeNgram, ['node_id', 'ngram_id', 'weight'], query)

    return(list_dict[type_list]['id'])