#!/usr/bin/env python
import numpy as np
from scipy.interpolate import interp1d

class probability_of_multivariate_function:
    """
    Probability of a multivariate function.

    Calculates the PDF of `y`, which is some function of multiple random variables: ``y = f(x1, x2, x3, ...)``, given the continuous PDFs of each of its variables and the ranges where these PDFs are to be sampled.

    Parameters
    ----------
    func : callable
        A function of multiple variables f(x1, x2, x3, ...) it should be vectorizable at least on x1 (i.e. return elementwise result when x1 is a numpy vector and all the others are either scalars or vectors of the same length).
    pdfs : list
        A list of callables, each one is the PDF of one of the variables.
    ranges : list
        List of tuples defining the minimum and maximum values for each of the random variables. If the domain of a variable is infinite, one should set the limits such that a very large fraction of the probability distribution is contained between them. #TODO automate this choice.
    points : int or array_like
        The number of samples to take for each variable's distribution. The samples are equally spaced between the given limits. #TODO sample in a smarter way.
    y_min : float
        When constructing the PDF of `y`, values below y=y_min are set to zero.
    y_max : float
        When constructing the PDF of `y`, values above y=y_max are set to zero.
    y_points : int
         When constructing the PDF of `y`, y_points are used. A very large number leads to a noisy result, while a small number leads to inaccuracies.

    Methods
    -------
    cdf
    pdf
    """

    def __init__(self, func, pdfs, ranges, points=512, y_min=None, y_max=None, y_points=256):
        self.n = len(pdfs)
        points = np.array(points)
        if len(points)==1: points.repeat(self.n)
        self.arrays_domain = [np.linspace(ranges[i][0], ranges[i][1], points[i]) for i in range(self.n)]
        self.arrays_image  = [pdfs[i](self.arrays_domain[i]) for i in range(self.n)]
        self.func = func
        self.get_all_combinations()
        i = self.func_result.argsort()
        self.y_values = self.func_result[i] # That's the domain of the new probability space
        self.cdf_values = self.probability[i] # This will be the CDF
        self.cdf_values = np.cumsum(self.cdf_values)
        self.cdf_values /= self.cdf_values[-1]
        # Now attempt to estimate the PDF from the CDF
        self.cdf = interp1d(self.y_values, self.cdf_values)
        if y_min is None: y_min = self.y_values[0]
        if y_max is None: y_max = self.y_values[-1]
        y_new = np.linspace(max(y_min,self.y_values[0]), min(y_max,self.y_values[-1]), y_points)
        c_new = self.cdf(y_new)
        pdf_values = np.diff(c_new)/np.diff(y_new)
        self.pdf = interp1d(0.5*(y_new[1:]+y_new[:-1]), pdf_values, assume_sorted=True, bounds_error=False, fill_value=0)
        del i, self.func_result, self.probability

    def get_all_combinations(self):
        self.j = 0
        self.probability = np.empty(np.prod([len(arr) for arr in self.arrays_domain]))
        self.func_result = np.empty_like(self.probability)
        self.stored_values_domain = np.empty(self.n-1)
        self.stored_values_image  = np.empty(self.n-1)
        self.recursive_part(self.n-1)
        del self.stored_values_domain, self.stored_values_image

    def recursive_part(self, n):
        if n>0:
            for i in range(len(self.arrays_domain[n])):
                self.stored_values_domain[n-1] = self.arrays_domain[n][i]
                self.stored_values_image[n-1]  = self.arrays_image[n][i]
                self.recursive_part(n-1)
        if n==0:
            partial_product = np.prod(self.stored_values_image)
            prod_array = self.arrays_image[0]*partial_product # This is the probability
            self.probability[self.j:self.j+len(self.arrays_domain[0])] = prod_array
            args = [self.stored_values_domain[j] for j in range(self.n-1)]
            args.insert(0, self.arrays_domain[0])
            self.func_result[self.j:self.j+len(self.arrays_domain[0])] = self.func(*args)
            self.j += len(self.arrays_domain[0])