from admin.utils import PrintException
from gargantext_web.db import *

from collections import defaultdict
from django.db import connection, transaction

import math
from math import log

import scipy

from gargantext_web.db import get_or_create_node

from analysis.cooccurrences import do_cooc
from analysis.distance import do_distance

import pandas as pd
from copy import copy,deepcopy
import numpy as np
import scipy
import networkx as nx
from networkx.readwrite import json_graph
from rest_v1_0.api import JsonHttpResponse

from analysis.louvain import best_partition, generate_dendogram, partition_at_level

from ngram.lists import listIds
from sqlalchemy.orm import aliased


def get_cooc(request=None, corpus=None
        , field1='ngrams', field2='ngrams'
        , cooc_id=None, type='node_link', size=1000
        , start=None, end=None
        , hapax=1
        , distance='conditional'
        , bridgeness=5
        ):
    '''
    get_ccoc : to compute the graph.
    '''
    data = {}
    #if session.query(Node).filter(Node.type_id==type_cooc_id, Node.parent_id==corpus_id).first() is None:
    print("Cooccurrences do not exist yet, creating it.")
    miam_id = get_or_create_node(nodetype='MapList', corpus=corpus).id
    stop_id = get_or_create_node(nodetype='StopList', corpus=corpus).id
    group_id = get_or_create_node(nodetype='Group', corpus=corpus).id
    
    SamuelFlag = False
    # if field1 == field2 == 'ngrams' :
    #     isMonopartite = True
    #     SamuelFlag = True
    # else:
    #     isMonopartite = False
    isMonopartite = True # Always. So, calcule the graph B and from these B-nodes, build the graph-A
    # data deleted each time
    #cooc_id = get_or_create_node(nodetype='Cooccurrence', corpus=corpus).id
    cooc_id = do_cooc(corpus=corpus, field1="ngrams", field2="ngrams"
            , miam_id=miam_id, group_id=group_id, stop_id=stop_id, limit=size
            , isMonopartite=True, start=start , end=end , hapax=hapax)
    
    G, partition, ids, weight = do_distance(cooc_id, field1="ngrams", field2="ngrams"
                                            , isMonopartite=True, distance=distance)
    if type == "node_link":
        nodesB_dict = {}
        for node_id in G.nodes():
            try:
                #node,type(labels[node])
                G.node[node_id]['pk'] = ids[node_id][1]
                nodesB_dict [ ids[node_id][1] ] = True
                # TODO the query below is not optimized (do it do_distance).
                the_label = session.query(Ngram.terms).filter(Ngram.id==node_id).first()
                the_label = ", ".join(the_label)
                G.node[node_id]['label']   = the_label
                
                G.node[node_id]['size']    = weight[node_id]
                G.node[node_id]['type']    = ids[node_id][0].replace("ngrams","terms")
                G.node[node_id]['attributes'] = { "clust_default": partition[node_id]} # new format
                # G.add_edge(node, "cluster " + str(partition[node]), weight=3)
            except Exception as error:
                pass #PrintException()
                #print("error01: ",error)

        

        links = []
        i=1
        

        if bridgeness > 0:
            com_link = defaultdict(lambda: defaultdict(list))
            com_ids = defaultdict(list)
            
            for k, v in partition.items():
                com_ids[v].append(k)
        

        for e in G.edges_iter():
            s = e[0]
            t = e[1]
            weight = G[ids[s][1]][ids[t][1]]["weight"]
            
            if bridgeness < 0:
                info = { 
                        "s": ids[s][1] ,
                        "t": ids[t][1] ,
                        "w": weight
                    }
                links.append(info)
            
            else:
                if partition[s] == partition[t]:

                    info = { 
                        "s": ids[s][1] ,
                        "t": ids[t][1] ,
                        "w": weight
                    }
                    links.append(info)
                
                if bridgeness > 0:
                    if partition[s] < partition[t]:
                        com_link[partition[s]][partition[t]].append((s,t,weight))
        
        if bridgeness > 0:
            for c1 in com_link.keys():
                for c2 in com_link[c1].keys():
                    index = round(bridgeness*len(com_link[c1][c2]) / (len(com_ids[c1]) + len(com_ids[c2])))
                    #print((c1,len(com_ids[c1])), (c2,len(com_ids[c2])), index)
                    if index > 0:
                        for link in sorted(com_link[c1][c2], key=lambda x: x[2], reverse=True)[:index]:
                            #print(c1, c2, link[2])
                            info = {"s": link[0], "t": link[1], "w": link[2]}
                            links.append(info)


        B = json_graph.node_link_data(G)
        B["links"] = []
        B["links"] = links
        if field1 == field2 == 'ngrams' :
            data["nodes"] = B["nodes"]
            data["links"] = B["links"]
        else:
            A = get_graphA( "journal" , nodesB_dict , B["links"] , corpus )
            print("#nodesA:",len(A["nodes"]))
            print("#linksAA + #linksAB:",len(A["links"]))
            print("#nodesB:",len(B["nodes"]))
            print("#linksBB:",len(B["links"]))
            data["nodes"] = A["nodes"] + B["nodes"]
            data["links"] = A["links"] + B["links"]
            print("  total nodes :",len(data["nodes"]))
            print("  total links :",len(data["links"]))
            print("")

    elif type == "adjacency":
        for node in G.nodes():
            try:
                #node,type(labels[node])
                #G.node[node]['label']   = node
                G.node[node]['name']    = node
                #G.node[node]['size']    = weight[node]
                G.node[node]['group']   = partition[node]
                #G.add_edge(node, partition[node], weight=3)
            except Exception as error:
                print("error02: ",error)
        data = json_graph.node_link_data(G)
    elif type == 'bestpartition':
        return(partition)

    return(data)

def get_graphA( nodeA_type , NodesB , links , corpus ):
    from analysis.InterUnion import Utils
    print(" = = = == = = = ")
    print("In get_graphA(), corpus id:",corpus.id)

    nodeA_type_id = cache.Hyperdata[nodeA_type].id
    threshold_cotainf = 0.02
    max_nodeid = -1
    for nodeid in NodesB:
    	if nodeid > max_nodeid:
    		max_nodeid = nodeid

    # = = = = [ 01. Getting ALL documents of the Corpus c ] = = = =  #
    Docs = {}
    document_type_id = cache.NodeType['Document'].id
    sql_query = 'select id from node_node where parent_id='+str(corpus.id)+' and type_id='+str(document_type_id)
    cursor = connection.cursor()
    cursor.execute(sql_query)
    results = cursor.fetchall()
    for i in results:
        Docs[i[0]] = True
    print("docs:",len(Docs.keys()))
    # = = = = [ / 01. Getting ALL documents of the Corpus c ] = = = =  #


    # = = = = [ 02. Getting ALL Documents related with Ngrams of the carte semantic  ] = = = =  #
    sql_query = 'select nodey_id,ngram_id from node_nodenodengram where ngram_id IN (' + ','.join(map(str, NodesB.keys())) + ")"
    cursor = connection.cursor()
    cursor.execute(sql_query)
    results = cursor.fetchall()
    # = = = = [ / 02. Getting ALL Documents related with Ngrams of the carte semantic  ] = = = =  #


    # = = = = [ 03. Now we limit the retrieved Documents(step 02) to those belonging to the Corpus c ] = = = = ]
    Docs_and_ = {
        "nodesA":{},
        "nodesB":{}
    }
    NodesB_and_Docs = {}
    for i in results:
        doc_id = i[0]
        ngram_id = i[1]
        if ngram_id in NodesB and doc_id in Docs:
            if doc_id not in Docs_and_["nodesB"]:
                Docs_and_["nodesB"][doc_id] = []
            Docs_and_["nodesB"][doc_id].append( ngram_id )
            if ngram_id not in NodesB_and_Docs:
                NodesB_and_Docs[ngram_id] = []
            NodesB_and_Docs[ngram_id].append( doc_id )
    # = = = = [ / 03. Now we limit the retrieved Documents(step 02) to those belonging to the Corpus c ] = = = = ]

    # # = = = = [ Getting Authors ] = = = = ]
    # Authors = {}
    # sql_query = 'select node_id,value_string from node_node_hyperdata where node_id IN (' + ','.join(map(str, Docs_and_["nodesB"].keys())) + ")"+' and hyperdata_id=10'# 10 -> authors
    # cursor = connection.cursor()
    # cursor.execute(sql_query)
    # results = cursor.fetchall()
    # for i in results:
    #     doc_id = i[0]
    #     authors = i[1].split(",")
    #     for a in authors:
    #         if a not in Authors:
    #             Authors[a] = 0
    #         Authors[a] += 1
    # print("")
    # print("#authors:")
    # import pprint
    # pprint.pprint(Authors)
    # print("")
    # # = = = = [ / Getting Authors ] = = = = ]


    # = = = = [ 04. Getting A-elems and making the dictionaries] = = = = ]
    sql_query = 'select node_id,value_string from node_node_hyperdata where node_id IN (' + \
            ','.join(map(str, Docs_and_["nodesB"].keys())) + ")"+' and hyperdata_id='+str(nodeA_type_id)
    cursor = connection.cursor()
    cursor.execute(sql_query)
    results = cursor.fetchall()

    A_Freq = {}
    A_int2str = {}
    A_str2int = {}
    counter = max_nodeid+1
    for i in results:
        doc_id = i[0]
        a = i[1]
        if a not in A_str2int:
            A_str2int[ a ] = counter
            A_int2str[counter] = a
            counter += 1
    for i in results:
        doc_id = i[0]
        a = A_str2int[i[1]]
        Docs_and_["nodesA"][doc_id] = a
        if a not in A_Freq:
            A_Freq[ a ] = 0
        A_Freq[ a ] += 1
    # = = = = [ / 04. Getting A-elems and making the dictionaries ] = = = = ]


    # = = = = [ Filling graph-A ] = = = = ]
    Graph_A = Utils()
    for i in NodesB_and_Docs:
        ngram = i
        docs = NodesB_and_Docs[i]
        k_A_clique = {}
        for doc in docs:
            k_A = Docs_and_["nodesA"][doc]
            k_A_clique[k_A] = True
        if len(k_A_clique.keys())>1:
            Graph_A.addCompleteSubGraph( k_A_clique.keys() )
    # = = = = [ / Filling graph-A ] = = = = ]


    # = = = = [ graph-A to JSON ] = = = = ]
    A = Graph_A.G
    for node_id in A.nodes():
        A.node[node_id]['label']   = A_int2str[node_id]
        A.node[node_id]['size']    = A_Freq[node_id]
        A.node[node_id]['type']    = nodeA_type
        A.node[node_id]['attributes'] = { "clust_default": 1 }

    A_links = []
    min_weight = 999999
    max_weight = -1
    Weights_Dist = {}
    for e in A.edges_iter():
        s = e[0]
        t = e[1]
        w = A[s][t]["weight"]
        if w not in Weights_Dist:
            Weights_Dist[ w ] = { "freq": 0 , "deleted":0 }
        Weights_Dist[ w ]["freq"] += 1
        if min_weight > w:
            min_weight = w
        if max_weight < w:
            max_weight = w

    edges2remove = []
    for e in A.edges_iter():
        s = e[0]
        t = e[1]
        w = A[s][t]["weight"]
        if Weights_Dist [ w ]["freq"] < ( len(A)*3 ): # weight-threshold
            info = { 
                "s":s , 
                "t":t ,
                "w": w / max_weight # normalization
            }
            A_links.append(info)
        else:
            # if Weights_Dist [ w ]["deleted"] < round(Weights_Dist [ w ]["freq"]*0.95):
            atuple = (s,t)
            edges2remove.append(atuple)
            Weights_Dist [ w ]["deleted"] += 1

    A.remove_edges_from( edges2remove )
    A.remove_nodes_from(nx.isolates(A))

    data = json_graph.node_link_data(A) # saving nodesA

    AB = nx.Graph()
    for i in NodesB_and_Docs:
        b = i
        docs = NodesB_and_Docs[i]
        for doc in docs:
            a = Docs_and_["nodesA"][doc]
            if A.has_node(a):
                AB.add_edge( a , b )
    AB_links = []
    for e in AB.edges_iter():
        info = { "s": e[0], "t": e[1], "w": 1 }
        AB_links.append(info)

    data["links"] = A_links + AB_links # saving AA-links and AB-links
            

    # = = = = [ / graph-A to JSON ] = = = = ]

    return data