Source code for libuplift.classifiers.memoized

"""A memoized classifier class.

Used to avoid recomputing the same classifier twice e.g. when both
T-learner and Response models are computed.

"""

import os
from tempfile import gettempdir

from sklearn.base import BaseEstimator
from sklearn.base import clone
from sklearn.utils.validation import check_memory

def _do_fit(estimator, model_params, *args, **kwargs):
    return clone(estimator).fit(*args, **kwargs)

[docs] class MemoizedClassifier(BaseEstimator): """Creates a memoized version of an estimator. Subsequent calls to fit with the same arguments will reuse a prefitted model. memory is either a path or a joblib.Memory object. If None a default path is used: "libuplift_cache" in systems default temporary directory. Parameters ---------- estimator : a scikit-klearn EstimatClassifier to wrap in a regessor interface. memory : a joblib.Memory object, default=None """ def __init__(self, estimator, memory=None): self.estimator = estimator self.memory = memory
[docs] def fit(self, *args, **kwargs): memory = self.memory if memory is None: memory = os.path.join(gettempdir(), "libuplift_cache") self.memory_ = check_memory(memory) if not hasattr(self, "do_fit_cached_"): self.do_fit_cached_ = self.memory_.cache(_do_fit, ignore=["estimator"]) self.fitted_estimator_ = self.do_fit_cached_(self.estimator, self.estimator.get_params(), *args, **kwargs) return self
[docs] def __getattr__(self, name): if name in ["fitted_estimator_", "do_fit_cached_"]: try: return self.__dict__[name] except: raise AttributeError(f"MemoizedClassifier has no attribute {name}") if "fitted_estimator_" not in self.__dict__: return getattr(self.estimator, name) return getattr(self.fitted_estimator_, name)