#!/usr/bin/env python import numpy as np from scipy.interpolate import interp1d class probability_of_multivariate_function: """ Probability of a multivariate function. Calculates the PDF of `y`, which is some function of multiple random variables: ``y = f(x1, x2, x3, ...)``, given the continuous PDFs of each of its variables and the ranges where these PDFs are to be sampled. Parameters ---------- func : callable A function of multiple variables f(x1, x2, x3, ...) it should be vectorizable at least on x1 (i.e. return elementwise result when x1 is a numpy vector and all the others are either scalars or vectors of the same length). pdfs : list A list of callables, each one is the PDF of one of the variables. ranges : list List of tuples defining the minimum and maximum values for each of the random variables. If the domain of a variable is infinite, one should set the limits such that a very large fraction of the probability distribution is contained between them. #TODO automate this choice. points : int or array_like The number of samples to take for each variable's distribution. The samples are equally spaced between the given limits. #TODO sample in a smarter way. y_min : float When constructing the PDF of `y`, values below y=y_min are set to zero. y_max : float When constructing the PDF of `y`, values above y=y_max are set to zero. y_points : int When constructing the PDF of `y`, y_points are used. A very large number leads to a noisy result, while a small number leads to inaccuracies. Methods ------- cdf pdf """ def __init__(self, func, pdfs, ranges, points=512, y_min=None, y_max=None, y_points=256): self.n = len(pdfs) points = np.array(points) if len(points)==1: points.repeat(self.n) self.arrays_domain = [np.linspace(ranges[i][0], ranges[i][1], points[i]) for i in range(self.n)] self.arrays_image = [pdfs[i](self.arrays_domain[i]) for i in range(self.n)] self.func = func self.get_all_combinations() i = self.func_result.argsort() self.y_values = self.func_result[i] # That's the domain of the new probability space self.cdf_values = self.probability[i] # This will be the CDF self.cdf_values = np.cumsum(self.cdf_values) self.cdf_values /= self.cdf_values[-1] # Now attempt to estimate the PDF from the CDF self.cdf = interp1d(self.y_values, self.cdf_values) if y_min is None: y_min = self.y_values[0] if y_max is None: y_max = self.y_values[-1] y_new = np.linspace(max(y_min,self.y_values[0]), min(y_max,self.y_values[-1]), y_points) c_new = self.cdf(y_new) pdf_values = np.diff(c_new)/np.diff(y_new) self.pdf = interp1d(0.5*(y_new[1:]+y_new[:-1]), pdf_values, assume_sorted=True, bounds_error=False, fill_value=0) del i, self.func_result, self.probability def get_all_combinations(self): self.j = 0 self.probability = np.empty(np.prod([len(arr) for arr in self.arrays_domain])) self.func_result = np.empty_like(self.probability) self.stored_values_domain = np.empty(self.n-1) self.stored_values_image = np.empty(self.n-1) self.recursive_part(self.n-1) del self.stored_values_domain, self.stored_values_image def recursive_part(self, n): if n>0: for i in range(len(self.arrays_domain[n])): self.stored_values_domain[n-1] = self.arrays_domain[n][i] self.stored_values_image[n-1] = self.arrays_image[n][i] self.recursive_part(n-1) if n==0: partial_product = np.prod(self.stored_values_image) prod_array = self.arrays_image[0]*partial_product # This is the probability self.probability[self.j:self.j+len(self.arrays_domain[0])] = prod_array args = [self.stored_values_domain[j] for j in range(self.n-1)] args.insert(0, self.arrays_domain[0]) self.func_result[self.j:self.j+len(self.arrays_domain[0])] = self.func(*args) self.j += len(self.arrays_domain[0])