"""MMD Evaluation on graph structure statistics. Modified from https://github.com/uoguelph-mlrg/GGM-metrics"""

import numpy as np
import networkx as nx
import numpy as np
# from scipy.linalg import toeplitz
# import pyemd
import concurrent.futures
from scipy.linalg import eigvalsh
from functools import partial


class Descriptor():
    def __init__(self, is_parallel=False, bins=100, kernel='rbf', sigma_type='single', **kwargs):
        self.is_parallel = is_parallel
        self.bins = bins
        self.max_workers = kwargs.get('max_workers')

        if kernel == 'rbf':
            self.distance = self.l2
            self.name += '_rbf'
        else:
            ValueError

        if sigma_type == 'argmax':
            log_sigmas = np.linspace(-5., 5., 50)
            # the first 30 sigma values is usually enough
            log_sigmas = log_sigmas[:30]
            self.sigmas = [np.exp(log_sigma) for log_sigma in log_sigmas]
        elif sigma_type == 'single':
            self.sigmas = kwargs['sigma']
        else:
            raise ValueError

    def evaluate(self, graph_ref_list, graph_pred_list):
        """Compute the distance between the distributions of two unordered sets of graphs.
        Args:
            graph_ref_list, graph_pred_list: two lists of networkx graphs to be evaluated.
        """

        graph_pred_list = [G for G in graph_pred_list if not G.number_of_nodes() == 0]

        sample_pred = self.extract_features(graph_pred_list)
        sample_ref = self.extract_features(graph_ref_list)

        GG = self.disc(sample_pred, sample_pred, distance_scaling=self.distance_scaling)
        GR = self.disc(sample_pred, sample_ref, distance_scaling=self.distance_scaling)
        RR = self.disc(sample_ref, sample_ref, distance_scaling=self.distance_scaling)

        sigmas = self.sigmas
        max_mmd = 0
        mmd_dict = []
        for sigma in sigmas:
            gamma = 1 / (2 * sigma ** 2)

            K_GR = np.exp(-gamma * GR)
            K_GG = np.exp(-gamma * GG)
            K_RR = np.exp(-gamma * RR)

            mmd = K_GG.mean() + K_RR.mean() - (2 * K_GR.mean())
            mmd_dict.append((sigma, mmd))
            max_mmd = mmd if mmd > max_mmd else max_mmd

        # print(self.name, mmd_dict)

        return max_mmd

    def pad_histogram(self, x, y):
        # convert histogram values x and y to float, and pad them for equal length
        support_size = max(len(x), len(y))
        x = x.astype(np.float)
        y = y.astype(np.float)
        if len(x) < len(y):
            x = np.hstack((x, [0.] * (support_size - len(x))))
        elif len(y) < len(x):
            y = np.hstack((y, [0.] * (support_size - len(y))))

        return x, y

    # def emd(self, x, y, distance_scaling=1.0):
    #     support_size = max(len(x), len(y))
    #     x, y = self.pad_histogram(x, y)
    #
    #     d_mat = toeplitz(range(support_size)).astype(np.float)
    #     distance_mat = d_mat / distance_scaling
    #
    #     dist = pyemd.emd(x, y, distance_mat)
    #     return dist ** 2

    def l2(self, x, y, **kwargs):
        # gaussian rbf
        x, y = self.pad_histogram(x, y)
        dist = np.linalg.norm(x - y, 2)
        return dist ** 2

    def kernel_parallel_unpacked(self, x, samples2, kernel):
        dist = []
        for s2 in samples2:
            dist += [kernel(x, s2)]
        return dist

    def kernel_parallel_worker(self, t):
        return self.kernel_parallel_unpacked(*t)

    def disc(self, samples1, samples2, **kwargs):
        # Discrepancy between 2 samples
        tot_dist = []
        if not self.is_parallel:
            for s1 in samples1:
                for s2 in samples2:
                    tot_dist += [self.distance(s1, s2)]
        else:
            with concurrent.futures.ProcessPoolExecutor(max_workers=self.max_workers) as executor:
                for dist in executor.map(self.kernel_parallel_worker,
                                         [(s1, samples2, partial(self.distance, **kwargs)) for s1 in samples1]):
                    tot_dist += [dist]
        return np.array(tot_dist)


class degree(Descriptor):
    def __init__(self, *args, **kwargs):
        self.name = 'degree'
        self.sigmas = [kwargs.get('sigma', 1.0)]
        self.distance_scaling = 1.0
        super().__init__(*args, **kwargs)

    def extract_features(self, dataset):
        res = []
        if self.is_parallel:
            with concurrent.futures.ProcessPoolExecutor(max_workers=self.max_workers) as executor:
                for deg_hist in executor.map(self.degree_worker, dataset):
                    res.append(deg_hist)
        else:
            for g in dataset:
                degree_hist = self.degree_worker(g)
                res.append(degree_hist)

        res = [s1 / np.sum(s1) for s1 in res]
        return res

    def degree_worker(self, G):
        return np.array(nx.degree_histogram(G))


class cluster(Descriptor):
    def __init__(self, *args, **kwargs):
        self.name = 'cluster'
        self.sigmas = [kwargs.get('sigma', [1.0 / 10])]
        super().__init__(*args, **kwargs)
        self.distance_scaling = self.bins

    def extract_features(self, dataset):
        res = []
        if self.is_parallel:
            with concurrent.futures.ProcessPoolExecutor(max_workers=self.max_workers) as executor:
                for clustering_hist in executor.map(self.clustering_worker, [(G, self.bins) for G in dataset]):
                    res.append(clustering_hist)
        else:
            for g in dataset:
                clustering_hist = self.clustering_worker((g, self.bins))
                res.append(clustering_hist)

        res = [s1 / np.sum(s1) for s1 in res]
        return res

    def clustering_worker(self, param):
        G, bins = param
        clustering_coeffs_list = list(nx.clustering(G).values())
        hist, _ = np.histogram(
            clustering_coeffs_list, bins=bins, range=(0.0, 1.0), density=False)
        return hist


class spectral(Descriptor):
    def __init__(self, *args, **kwargs):
        self.name = 'spectral'
        self.sigmas = [kwargs.get('sigma', 1.0)]
        self.distance_scaling = 1
        super().__init__(*args, **kwargs)

    def extract_features(self, dataset):
        res = []
        if self.is_parallel:
            with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
                for spectral_density in executor.map(self.spectral_worker, dataset):
                    res.append(spectral_density)
        else:
            for g in dataset:
                spectral_temp = self.spectral_worker(g)
                res.append(spectral_temp)
        return res

    def spectral_worker(self, G):
        eigs = eigvalsh(nx.normalized_laplacian_matrix(G).todense())
        spectral_pmf, _ = np.histogram(eigs, bins=200, range=(-1e-5, 2), density=False)
        spectral_pmf = spectral_pmf / spectral_pmf.sum()
        return spectral_pmf


def mmd_eval(graph_ref_list, graph_pred_list, methods):
    evaluators = []
    for (method, sigma, sigma_type) in methods:
        evaluators.append(eval(method)(sigma=sigma, sigma_type=sigma_type))

    results = {}
    for evaluator in evaluators:
        results[evaluator.name] = evaluator.evaluate(graph_ref_list, graph_pred_list)

    return results