Source code for libuplift.meta.target_transform

"""Uplift models based on target transform."""

import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression

from .base import UpliftMetaModelBase
from ..base import UpliftRegressorMixin
from ..base import UpliftClassifierMixin


class _TargetTransformUpliftModelBase(UpliftMetaModelBase):
    def _get_model_names_list(self, X=None, y=None, trt=None):
        m_names = []
        for i in range(self.n_trt_):
            name = "model_ct"
            if self.n_trt_ > 1:
                name += str(i)
            m_names.append(name)
        return m_names
    def _iter_training_subsets(self, X, y, trt, n_trt, sample_weight):
        c_mask = (trt==0)
        for i in range(self.n_models_):
            t_mask = (trt==(i+1))
            mask = t_mask|c_mask
            if sample_weight is None:
                w_i = None
            else:
                w_i = sample_weight[mask]
            X_i = X[mask]
            y_i = y[mask]
            trt_i = trt[mask]
            X_i, y_i, w_i = self._transform(X_i, y_i, trt_i, n_trt, w_i, y)
            yield X_i, y_i, w_i

    def _transform(self, X, y, trt, n_trt, sample_weight, full_y):
        """Transform target for model building.

        full_y is passed to allow tests to avoid overwriting."""
        raise NotImplementedError()


[docs]
class TargetTransformUpliftRegressor(UpliftRegressorMixin,
                                     _TargetTransformUpliftModelBase):
    def __init__(self, base_estimator=LinearRegression()):
        super().__init__(base_estimator=base_estimator)
    def _transform(self, X, y, trt, n_trt, sample_weight, full_y):
        """Transform target for model building.

        full_y is passed to allow tests to avoid overwriting."""
        mask_c = (trt==0)
        mask_t = ~mask_c
        if sample_weight is None:
            nt = mask_t.sum()
            nc = mask_c.sum()
        else:
            nt = sample_weight[mask_t].sum()
            nc = sample_weight[mask_c].sum()
        n = nt + nc
        y = np.asarray(y, float) # allow classification problems
        if np.may_share_memory(y, full_y):
            y = y.copy()
        y[mask_c] *= (-n/nc)
        y[mask_t] *= (n/nt)
        return X, y, sample_weight

[docs]
    def predict(self, X):
        preds = [m_i.predict(X) for _, m_i in self.models_]
        if self.n_trt_ == 1:
            y = preds[0]
        else:
            y = np.column_stack(preds)
        return y





[docs]
class TargetTransformUpliftClassifier(UpliftClassifierMixin,
                                      _TargetTransformUpliftModelBase):
    def __init__(self, base_estimator=LogisticRegression(),
                 balance_treatments=True):
        """Target transform based uplift classifier.

        If balance_treatments is True sample weights are
        introduced/modified to ensure weighted probability of both
        treatments is 1/2.  This is necessary for correct uplift
        estimation.  For multiple treatments this is ensured in all
        submodels.

        """
        super().__init__(base_estimator=base_estimator)
        self.balance_treatments = balance_treatments
    def _transform(self, X, y, trt, n_trt, sample_weight, full_y):
        """Transform target for model building.

        full_y is passed to allow tests to avoid overwriting."""
        if self.balance_treatments:
            if sample_weight is None:
                n = X.shape[0]
                nt = trt.sum()
            else:
                n = sample_weight.sum()
                nt = sample_weight[trt==1].sum()
            nc = n-nt
            pt = nt/n
            pc = nc/n
            k = pc/pt
            if sample_weight is None:
                sample_weight = np.ones(n)
            sample_weight[trt==1] *= k
        if np.may_share_memory(y, full_y):
            y = y.copy()
        y[trt == 0] = 1-y[trt == 0]
        return X, y, sample_weight

[docs]
    def predict(self, X):
        preds = [2 * m_i.predict_proba(X) - 1 for _, m_i in self.models_]
        if self.n_trt_ == 1:
            y = preds[0]
        else:
            y = np.dstack(preds)
        return y