Source code for libuplift.classifiers.memoized

"""A memoized classifier class.

Used to avoid recomputing the same classifier twice e.g. when both
T-learner and Response models are computed.

"""

import os
from tempfile import gettempdir

from sklearn.base import BaseEstimator
from sklearn.base import clone
from sklearn.utils.validation import check_memory

def _do_fit(estimator, model_params, *args, **kwargs):
    return clone(estimator).fit(*args, **kwargs)


[docs]
class MemoizedClassifier(BaseEstimator):
    """Creates a memoized version of an estimator.

    Subsequent calls to fit with the same arguments will reuse a
    prefitted model.

    memory is either a path or a joblib.Memory object.  If None a
    default path is used: "libuplift_cache" in systems default
    temporary directory.

    Parameters
    ----------

    estimator : a scikit-klearn 
        EstimatClassifier to wrap in a regessor interface.

    memory : a joblib.Memory object, default=None
    
    """

    def __init__(self, estimator, memory=None):
        self.estimator = estimator
        self.memory = memory

[docs]
    def fit(self, *args, **kwargs):
        memory = self.memory
        if memory is None:
            memory = os.path.join(gettempdir(), "libuplift_cache")
        self.memory_ = check_memory(memory)
        if not hasattr(self, "do_fit_cached_"):
            self.do_fit_cached_ = self.memory_.cache(_do_fit, ignore=["estimator"])
        self.fitted_estimator_ = self.do_fit_cached_(self.estimator,
                                                     self.estimator.get_params(),
                                                     *args, **kwargs)
        return self

    

[docs]
    def __getattr__(self, name):
        if name in ["fitted_estimator_", "do_fit_cached_"]:
            try:
                return self.__dict__[name]
            except:
                raise AttributeError(f"MemoizedClassifier has no attribute {name}")
        if "fitted_estimator_" not in self.__dict__:
            return getattr(self.estimator, name)
        return getattr(self.fitted_estimator_, name)