Source code for cyclum.evaluation

import numpy as np
import scipy as sp
import scipy.stats


[docs]def parzen_estimate(x, lim, half_granularity=100, window=lambda x, rho: sp.stats.norm.pdf(x, 0, rho), scale=0.5): """ Calculate parzen window estimation (a non-parametric density estimation method) :param x: instances :param lim: limit of domain :param half_granularity: :param window: :param scale: :return: """ assert scale < 1 and scale > 0, "scale must be in (0, 1) to perform a unbiased estimation" gran = half_granularity * 2 + 1; n = len(x) rho = n ** -scale l = lim[1] - lim[0] grid = np.linspace(lim[0] - l / 2, lim[1] + l / 2, half_granularity * 4 + 1) discretized_window = window(grid, rho) discretized_window = discretized_window / np.sum(discretized_window) def individual(offset): indi = np.roll(discretized_window, offset - half_granularity) if offset > 0: indi[0: offset] = 0 else: indi[offset:] = 0 return indi xx = np.round((x - lim[0] + l) / l / 2 * gran) res = sum(map(individual, xx.astype(int).tolist())) / n return grid, res
[docs]def periodic_parzen_estimate(x, period=3.14, half_granularity=100, window=lambda x, rho: sp.stats.norm.pdf(x, 0, rho), scale=0.5): """ Calculate parzen window estimation specifically for periodic domain :param x: :param period: :param half_granularity: :param window: :param scale: :return: """ assert scale < 1 and scale > 0, "scale must be in (0, 1) to perform a unbiased estimation" gran = half_granularity * 2 + 1 n = len(x) rho = n ** -scale discretized_window = window(np.linspace(-period / 2, period / 2, gran), rho) discretized_window = discretized_window / np.sum(discretized_window) individual = lambda offset: np.roll(discretized_window, offset - half_granularity) xx = np.round((x % period) / period * gran) % gran res = sum(map(individual, xx.astype(int))) / n return np.linspace(0, period, gran), res
[docs]def precision_estimate(distr_vector_list, label_vector, possible_label_list): """ :param distr_vector_list: :param label_vector: :param possible_label_list: :return: """ distr_vector_list = list(map(np.squeeze, distr_vector_list)) label_vector = np.reshape(label_vector, [-1, 1]) n = np.sum(label_vector == np.array(possible_label_list), axis=0) n = n / np.sum(n) prob = np.vstack(distr_vector_list) * np.reshape(n, [-1, 1]) return np.sum(np.max(prob, axis=0))