Source code for libuplift.metrics.bins

"""Measures based on comparing treatment and control statistics within
bins, such as quantiles."""

import numpy as np

from sklearn.utils.validation import check_array, check_consistent_length

from ..utils.validation import check_trt
from ..utils.stats import quantile

[docs] def iter_quantiles(scores, trt, n_trt, n=10, joint=False, sample_weight=None): """Iterate simultaneously over quantiles of score vectors for all treatments. Returns a generator which, for each quantile, returns a list of index arrays for scores within each treatment. If joint is True, quantiles are computed jointly for all treatments. If sample_weight is not None, weighted quantiles are used. """ # sort by treatment t_idx = np.argsort(trt) t_counts = np.r_[[0], np.cumsum(np.bincount(trt, minlength=n_trt+1))] counts = [] idxs = [] if joint: q = quantile(scores, np.linspace(0,1,n, endpoint=False), weights=sample_weight) for ti in range(n_trt+1): s = scores[t_idx[t_counts[ti]:t_counts[ti+1]]] if not joint: if sample_weight is not None: w = sample_weight[t_idx[t_counts[ti]:t_counts[ti+1]]] else: w = None q = quantile(s, np.linspace(0,1,n, endpoint=False), weights=w) b = (np.digitize(s, q)-1).astype(np.min_scalar_type(n+1)) c = np.r_[[0], np.cumsum(np.bincount(b, minlength=n))] idx = np.argsort(b) counts.append(c) idxs.append(t_idx[t_counts[ti]:t_counts[ti+1]][idx]) for i in range(n-1, -1, -1): yield [idx[c[i]:c[i+1]] for idx, c in zip(idxs, counts)]
def _binned_measure(per_q_func, aggreg_func, name, y_true, y_pred, trt, n_trt=None, sample_weight=None, n_bins=10, allow_nans=False, joint_quantiles=False): y_true = check_array(y_true, ensure_2d=False) y_pred = check_array(y_pred, ensure_2d=False) trt, n_trt = check_trt(trt, n_trt) if sample_weight is None: check_consistent_length(y_true, y_pred, trt) else: sample_weight = check_array(sample_weight, ensure_2d=False) check_consistent_length(y_true, y_pred, trt, sample_weight) if n_trt > 1: raise ValueError(name + " only supported for a single treatment.") stat_q_s = [] for q_idxs in iter_quantiles(y_pred, trt, n_trt, n_bins, joint=joint_quantiles): idx_c, idx_t = q_idxs stat_q = per_q_func(y_true, y_pred, idx_t, idx_c) stat_q_s.append(stat_q) stat = aggreg_func(stat_q_s, allow_nans) if np.isnan(stat): raise RuntimeError("NaN's present in " + name + " computation") return stat def _q_aggreg_mean(stats_q_s, allow_nans): if allow_nans: stat = np.nanmean(stats_q_s) else: stat = np.mean(stats_q_s) return stat def _q_aggreg_max(stats_q_s, allow_nans): if allow_nans: stat = np.nanmax(stats_q_s) else: stat = np.max(stats_q_s) return stat
[docs] def QMSE(y_true, y_pred, trt, n_trt=None, sample_weight=None, n_bins=10, allow_nans=False, joint_quantiles=False): """The per-quantile MSE measure by Rudaś, Jaroszewicz.""" def _per_q_qmse(y_true, y_pred, idx_t, idx_c): if len(idx_c) > 0: mi_c = y_true[idx_c].mean() else: return np.nan if len(idx_t) > 0: mi_t = y_true[idx_t].mean() else: return np.nan if len(idx_t) > 0: mse_j = np.mean((y_pred[idx_t] - (mi_t-mi_c))**2) else: mse_j = np.nan return mse_j return _binned_measure(_per_q_qmse, _q_aggreg_mean, "QMSE", y_true, y_pred, trt, n_trt=n_trt, sample_weight=sample_weight, n_bins=10, allow_nans=allow_nans, joint_quantiles=joint_quantiles)
[docs] def QMSE_j(y_true, y_pred, trt, n_trt=None, sample_weight=None, n_bins=10, allow_nans=False): return QMSE(y_true, y_pred, trt, n_trt=None, sample_weight=None, n_bins=10, allow_nans=False, joint_quantiles=True)
def _per_q_euce(y_true, y_pred, idx_t, idx_c): if len(idx_c) > 0: mi_c = y_true[idx_c].mean() else: return np.nan if len(idx_t) > 0: mi_t = y_true[idx_t].mean() else: return np.nan idx = np.concatenate([idx_c, idx_t]) mi_p = np.mean(y_pred[idx]) d_j = np.abs(mi_p - (mi_t-mi_c)) return d_j
[docs] def EUCE(y_true, y_pred, trt, n_trt=None, sample_weight=None, n_bins=100, allow_nans=False, joint_quantiles=True): """The EUCE measure by Nyberg and Klami.""" return _binned_measure(_per_q_euce, _q_aggreg_mean, "EUCE", y_true, y_pred, trt, n_trt=n_trt, sample_weight=sample_weight, n_bins=10, allow_nans=allow_nans, joint_quantiles=joint_quantiles)
[docs] def MUCE(y_true, y_pred, trt, n_trt=None, sample_weight=None, n_bins=100, allow_nans=False, joint_quantiles=True): """The MUCE measure by Nyberg and Klami.""" return _binned_measure(_per_q_euce, _q_aggreg_max, "MUCE", y_true, y_pred, trt, n_trt=n_trt, sample_weight=sample_weight, n_bins=10, allow_nans=allow_nans, joint_quantiles=joint_quantiles)