diff --git a/doubleml/data/__init__.py b/doubleml/data/__init__.py index 8343c228..73fd71d3 100644 --- a/doubleml/data/__init__.py +++ b/doubleml/data/__init__.py @@ -11,6 +11,7 @@ from .ssm_data import DoubleMLSSMData +# TODO: Remove DoubleMLClusterData with version 0.12.0 class DoubleMLClusterData(DoubleMLData): """ Backwards compatibility wrapper for DoubleMLData with cluster_cols. diff --git a/doubleml/data/did_data.py b/doubleml/data/did_data.py index 57d486a3..30cc2900 100644 --- a/doubleml/data/did_data.py +++ b/doubleml/data/did_data.py @@ -1,4 +1,5 @@ import io +import warnings import pandas as pd from sklearn.utils import assert_all_finite @@ -7,6 +8,7 @@ from doubleml.data.base_data import DoubleMLData +# TODO: Remove DoubleMLDIDData with version 0.12.0 class DoubleMLDIDData(DoubleMLData): """Double machine learning data-backend for Difference-in-Differences models. @@ -81,7 +83,13 @@ def __init__( use_other_treat_as_covariate=True, force_all_x_finite=True, force_all_d_finite=True, - ): # Initialize _t_col to None first to avoid AttributeError during parent init + ): + warnings.warn( + "DoubleMLDIDData is deprecated and will be removed with version 0.12.0." "Use DoubleMLPanelData instead.", + FutureWarning, + stacklevel=2, + ) + # Initialize _t_col to None first to avoid AttributeError during parent init self._t_col = None # Store whether x_cols was originally None to reset it later diff --git a/doubleml/did/did.py b/doubleml/did/did.py index 9307ae78..87eb4aaa 100644 --- a/doubleml/did/did.py +++ b/doubleml/did/did.py @@ -7,11 +7,11 @@ from doubleml.data.did_data import DoubleMLDIDData from doubleml.double_ml import DoubleML from doubleml.double_ml_score_mixins import LinearScoreMixin -from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score, _check_trimming +from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls -from doubleml.utils._propensity_score import _trimm +# TODO: Remove DoubleMLDIDData with version 0.12.0 class DoubleMLDID(LinearScoreMixin, DoubleML): """Double machine learning for difference-in-differences models with panel data (two time periods). @@ -50,12 +50,8 @@ class DoubleMLDID(LinearScoreMixin, DoubleML): Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020). Default is ``True``. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. - - trimming_threshold : float - The threshold used for trimming. + clipping_threshold : float + The threshold used for clipping. Default is ``1e-2``. draw_sample_splitting : bool @@ -89,10 +85,14 @@ def __init__( n_rep=1, score="observational", in_sample_normalization=True, - trimming_rule="truncate", - trimming_threshold=1e-2, + clipping_threshold=1e-2, draw_sample_splitting=True, ): + warnings.warn( + "DoubleMLDID is deprecated and will be removed with version 0.12.0. " "Please use DoubleMLDIDBinary instead.", + DeprecationWarning, + stacklevel=2, + ) super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) self._check_data(self._dml_data) @@ -142,9 +142,7 @@ def __init__( self._predict_method["ml_m"] = "predict_proba" self._initialize_ml_nuisance_params() - self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._clipping_threshold = clipping_threshold self._sensitivity_implemented = True self._external_predictions_implemented = True @@ -156,18 +154,11 @@ def in_sample_normalization(self): return self._in_sample_normalization @property - def trimming_rule(self): + def clipping_threshold(self): """ - Specifies the used trimming rule. + Specifies the used clipping threshold. """ - return self._trimming_rule - - @property - def trimming_threshold(self): - """ - Specifies the used trimming threshold. - """ - return self._trimming_threshold + return self._clipping_threshold def _initialize_ml_nuisance_params(self): if self.score == "observational": @@ -269,9 +260,10 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa method=self._predict_method["ml_m"], return_models=return_models, ) + _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12) - m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) + m_hat["preds"] = np.clip(m_hat["preds"], self.clipping_threshold, 1 - self.clipping_threshold) # nuisance estimates of the uncond. treatment prob. p_hat = np.full_like(d, d.mean(), dtype="float64") diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py index 6e3a95f2..d92ebf19 100644 --- a/doubleml/did/did_binary.py +++ b/doubleml/did/did_binary.py @@ -1,4 +1,5 @@ import warnings +from typing import Optional import numpy as np from sklearn.utils import check_X_y @@ -19,14 +20,13 @@ from doubleml.utils._checks import ( _check_bool, _check_finite_predictions, - _check_is_propensity, _check_score, - _check_trimming, ) from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls -from doubleml.utils._propensity_score import _trimm +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). class DoubleMLDIDBinary(LinearScoreMixin, DoubleML): """Double machine learning for difference-in-differences models with panel data (binary setting in terms of group and time combinations). @@ -83,13 +83,16 @@ class DoubleMLDIDBinary(LinearScoreMixin, DoubleML): Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020). Default is ``True``. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. + + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -115,8 +118,9 @@ def __init__( n_rep=1, score="observational", in_sample_normalization=True, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, print_periods=False, ): @@ -232,9 +236,12 @@ def __init__( self._predict_method["ml_m"] = "predict_proba" self._initialize_ml_nuisance_params() + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold self._sensitivity_implemented = True self._external_predictions_implemented = True @@ -321,19 +328,44 @@ def in_sample_normalization(self): """ return self._in_sample_normalization + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold @property def n_obs_subset(self): @@ -499,9 +531,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa method=self._predict_method["ml_m"], return_models=return_models, ) + _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) - _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12) - m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) + m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls, learner_name="ml_m") # nuisance estimates of the uncond. treatment prob. p_hat = np.full_like(d, d.mean(), dtype="float64") diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py index 11c467b5..da833fd5 100644 --- a/doubleml/did/did_cs.py +++ b/doubleml/did/did_cs.py @@ -7,11 +7,11 @@ from doubleml.data.did_data import DoubleMLDIDData from doubleml.double_ml import DoubleML from doubleml.double_ml_score_mixins import LinearScoreMixin -from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score, _check_trimming +from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d -from doubleml.utils._propensity_score import _trimm +# TODO: Remove DoubleMLDIDData with version 0.12.0 class DoubleMLDIDCS(LinearScoreMixin, DoubleML): """Double machine learning for difference-in-difference with repeated cross-sections. @@ -50,12 +50,8 @@ class DoubleMLDIDCS(LinearScoreMixin, DoubleML): Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020). Default is ``True``. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. - - trimming_threshold : float - The threshold used for trimming. + clipping_threshold : float + The threshold used for clipping. Default is ``1e-2``. draw_sample_splitting : bool @@ -87,10 +83,14 @@ def __init__( n_rep=1, score="observational", in_sample_normalization=True, - trimming_rule="truncate", - trimming_threshold=1e-2, + clipping_threshold=1e-2, draw_sample_splitting=True, ): + warnings.warn( + "DoubleMLDIDCS is deprecated and will be removed with version 0.12.0. " "Please use DoubleMLDIDCSBinary instead.", + DeprecationWarning, + stacklevel=2, + ) super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) self._check_data(self._dml_data) @@ -140,10 +140,7 @@ def __init__( self._predict_method["ml_m"] = "predict_proba" self._initialize_ml_nuisance_params() - self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) - + self._clipping_threshold = clipping_threshold self._sensitivity_implemented = True self._external_predictions_implemented = True @@ -155,18 +152,11 @@ def in_sample_normalization(self): return self._in_sample_normalization @property - def trimming_rule(self): - """ - Specifies the used trimming rule. - """ - return self._trimming_rule - - @property - def trimming_threshold(self): + def clipping_threshold(self): """ - Specifies the used trimming threshold. + Specifies the used clipping threshold. """ - return self._trimming_threshold + return self._clipping_threshold def _initialize_ml_nuisance_params(self): if self.score == "observational": @@ -312,9 +302,10 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa method=self._predict_method["ml_m"], return_models=return_models, ) - _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) - _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12) - m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) + + _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) + _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12) + m_hat["preds"] = np.clip(m_hat["preds"], self.clipping_threshold, 1 - self.clipping_threshold) psi_a, psi_b = self._score_elements( y, diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py index 5375011d..c547ff40 100644 --- a/doubleml/did/did_cs_binary.py +++ b/doubleml/did/did_cs_binary.py @@ -1,4 +1,5 @@ import warnings +from typing import Optional import numpy as np from sklearn.utils import check_X_y @@ -19,14 +20,13 @@ from doubleml.utils._checks import ( _check_bool, _check_finite_predictions, - _check_is_propensity, _check_score, - _check_trimming, ) from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d -from doubleml.utils._propensity_score import _trimm +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). class DoubleMLDIDCSBinary(LinearScoreMixin, DoubleML): """Double machine learning for difference-in-differences models with repeated cross sections (binary setting in terms of group and time combinations). @@ -83,13 +83,16 @@ class DoubleMLDIDCSBinary(LinearScoreMixin, DoubleML): Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020). Default is ``True``. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. + + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -115,8 +118,9 @@ def __init__( n_rep=1, score="observational", in_sample_normalization=True, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, print_periods=False, ): @@ -221,9 +225,12 @@ def __init__( self._predict_method["ml_m"] = "predict_proba" self._initialize_ml_nuisance_params() + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold self._sensitivity_implemented = True self._external_predictions_implemented = True @@ -312,19 +319,44 @@ def in_sample_normalization(self): """ return self._in_sample_normalization + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold @property def n_obs_subset(self): @@ -480,8 +512,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa ) _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) - _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12) - m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) + m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls, learner_name="ml_m") psi_a, psi_b = self._score_elements( y, diff --git a/doubleml/did/did_multi.py b/doubleml/did/did_multi.py index 66e7f837..a9e9e790 100644 --- a/doubleml/did/did_multi.py +++ b/doubleml/did/did_multi.py @@ -1,5 +1,6 @@ import copy import warnings +from typing import Optional import matplotlib.pyplot as plt import numpy as np @@ -33,11 +34,13 @@ from doubleml.did.utils._plot import add_jitter from doubleml.double_ml import DoubleML from doubleml.double_ml_framework import concat -from doubleml.utils._checks import _check_bool, _check_score, _check_trimming +from doubleml.utils._checks import _check_bool, _check_score from doubleml.utils._descriptive import generate_summary from doubleml.utils.gain_statistics import gain_statistics +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). class DoubleMLDIDMulti: """Double machine learning for multi-period difference-in-differences models. @@ -96,13 +99,16 @@ class DoubleMLDIDMulti: A str (``'truncate'`` is the only choice) specifying the trimming approach. Default is ``'truncate'``. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. - draw_sample_splitting : bool - Indicates whether the sample splitting should be drawn during initialization. - Default is ``True``. + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. + + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). print_periods : bool Indicates whether to print information about the evaluated periods. @@ -165,8 +171,9 @@ def __init__( score="observational", panel=True, in_sample_normalization=True, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, print_periods=False, ): @@ -214,10 +221,12 @@ def __init__( # initialize framework which is constructed after the fit method is called self._framework = None - # initialize and check trimming + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold ml_g_is_classifier = DoubleML._check_learner(ml_g, "ml_g", regressor=True, classifier=True) if self.score == "observational": @@ -378,19 +387,44 @@ def in_sample_normalization(self): """ return self._in_sample_normalization + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold @property def n_folds(self): @@ -1355,8 +1389,7 @@ def _initialize_models(self): "score": self.score, "n_folds": self.n_folds, "n_rep": self.n_rep, - "trimming_rule": self.trimming_rule, - "trimming_threshold": self.trimming_threshold, + "ps_processor_config": self.ps_processor_config, "in_sample_normalization": self.in_sample_normalization, "draw_sample_splitting": True, "print_periods": self._print_periods, diff --git a/doubleml/did/tests/_utils_did_cs_manual.py b/doubleml/did/tests/_utils_did_cs_manual.py index ce6f8870..0353efef 100644 --- a/doubleml/did/tests/_utils_did_cs_manual.py +++ b/doubleml/did/tests/_utils_did_cs_manual.py @@ -21,7 +21,7 @@ def fit_did_cs( g_d1_t0_params=None, g_d1_t1_params=None, m_params=None, - trimming_threshold=1e-2, + clipping_threshold=1e-2, ): n_obs = len(y) @@ -54,7 +54,7 @@ def fit_did_cs( g_d1_t0_params=g_d1_t0_params, g_d1_t1_params=g_d1_t1_params, m_params=m_params, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) ) @@ -149,7 +149,7 @@ def fit_nuisance_did_cs( g_d1_t0_params=None, g_d1_t1_params=None, m_params=None, - trimming_threshold=1e-12, + clipping_threshold=1e-12, ): ml_g_d0_t0 = clone(learner_g) ml_g_d0_t1 = clone(learner_g) @@ -169,7 +169,7 @@ def fit_nuisance_did_cs( g_hat_d1_t1_list = fit_predict(y, x, ml_g_d1_t1, g_d1_t1_params, smpls, train_cond=train_cond_d1_t1) if score == "observational": ml_m = clone(learner_m) - m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold) + m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, clipping_threshold=clipping_threshold) else: assert score == "experimental" m_hat_list = list() diff --git a/doubleml/did/tests/_utils_did_manual.py b/doubleml/did/tests/_utils_did_manual.py index b067e44d..f0713332 100644 --- a/doubleml/did/tests/_utils_did_manual.py +++ b/doubleml/did/tests/_utils_did_manual.py @@ -18,7 +18,7 @@ def fit_did( g0_params=None, g1_params=None, m_params=None, - trimming_threshold=1e-2, + clipping_threshold=1e-2, ): n_obs = len(y) @@ -44,7 +44,7 @@ def fit_did( g0_params=g0_params, g1_params=g1_params, m_params=m_params, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) all_g_hat0.append(g_hat0_list) @@ -83,7 +83,7 @@ def fit_did( def fit_nuisance_did( - y, x, d, learner_g, learner_m, smpls, score, g0_params=None, g1_params=None, m_params=None, trimming_threshold=1e-12 + y, x, d, learner_g, learner_m, smpls, score, g0_params=None, g1_params=None, m_params=None, clipping_threshold=1e-12 ): ml_g0 = clone(learner_g) ml_g1 = clone(learner_g) @@ -101,7 +101,7 @@ def fit_nuisance_did( else: assert score == "observational" ml_m = clone(learner_m) - m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold) + m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, clipping_threshold=clipping_threshold) p_hat_list = [] for _ in smpls: diff --git a/doubleml/did/tests/test_did.py b/doubleml/did/tests/test_did.py index 79feb110..a476adb4 100644 --- a/doubleml/did/tests/test_did.py +++ b/doubleml/did/tests/test_did.py @@ -37,12 +37,12 @@ def in_sample_normalization(request): @pytest.fixture(scope="module", params=[0.1]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization, trimming_threshold): +def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -68,7 +68,7 @@ def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization, score=score, in_sample_normalization=in_sample_normalization, draw_sample_splitting=False, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) # synchronize the sample splitting @@ -85,7 +85,7 @@ def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization, all_smpls, score, in_sample_normalization, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/did/tests/test_did_binary_ps_processor.py b/doubleml/did/tests/test_did_binary_ps_processor.py new file mode 100644 index 00000000..226e6932 --- /dev/null +++ b/doubleml/did/tests/test_did_binary_ps_processor.py @@ -0,0 +1,67 @@ +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +from doubleml.did import DoubleMLDIDBinary +from doubleml.utils.propensity_score_processing import PSProcessorConfig + + +@pytest.mark.ci +@pytest.mark.parametrize( + "ps_config", + [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ], +) +def test_did_binary_ml_m_predictions_ps_processor(generate_data_did_binary, ps_config): + dml_data = generate_data_did_binary + np.random.seed(3141) + dml_did = DoubleMLDIDBinary( + obj_dml_data=dml_data, + g_value=1, + t_value_pre=0, + t_value_eval=1, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ps_processor_config=ps_config, + n_rep=1, + score="observational", + ) + dml_did.fit(store_predictions=True) + ml_m_preds = dml_did.predictions["ml_m"][:, 0, 0] + # Just check that predictions are within [clipping_threshold, 1-clipping_threshold] + assert np.all(ml_m_preds >= ps_config.clipping_threshold) + assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold) + + +@pytest.mark.ci +def test_did_binary_ml_m_predictions_ps_processor_differences(generate_data_did_binary): + dml_data = generate_data_did_binary + np.random.seed(3141) + configs = [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ] + preds = [] + for cfg in configs: + dml_did = DoubleMLDIDBinary( + obj_dml_data=dml_data, + g_value=1, + t_value_pre=0, + t_value_eval=1, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ps_processor_config=cfg, + n_rep=1, + score="observational", + ) + dml_did.fit(store_predictions=True) + preds.append(dml_did.predictions["ml_m"][:, 0, 0]) + # Check that at least two configurations yield different predictions (element-wise) + diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))] + assert any(diffs) diff --git a/doubleml/did/tests/test_did_binary_vs_did_panel.py b/doubleml/did/tests/test_did_binary_vs_did_panel.py index 2eddccaf..9abee475 100644 --- a/doubleml/did/tests/test_did_binary_vs_did_panel.py +++ b/doubleml/did/tests/test_did_binary_vs_did_panel.py @@ -9,6 +9,7 @@ import doubleml as dml from doubleml.did.datasets import make_did_CS2021 from doubleml.did.utils._did_utils import _get_id_positions +from doubleml.utils import PSProcessorConfig @pytest.fixture( @@ -36,7 +37,7 @@ def in_sample_normalization(request): @pytest.fixture(scope="module", params=[0.1]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -46,7 +47,7 @@ def time_type(request): @pytest.fixture(scope="module") -def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normalization, trimming_threshold): +def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normalization, clipping_threshold): n_obs = 500 dpg = 1 @@ -65,7 +66,6 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza "n_folds": 3, "score": score, "in_sample_normalization": in_sample_normalization, - "trimming_threshold": trimming_threshold, "draw_sample_splitting": True, } @@ -74,6 +74,7 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza g_value=dml_panel_data.g_values[0], t_value_pre=dml_panel_data.t_values[0], t_value_eval=dml_panel_data.t_values[1], + ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold), **dml_args, ) dml_did_binary_obj.fit() @@ -82,6 +83,7 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza dml_data = dml.data.DoubleMLDIDData(df_wide, y_col="y_diff", d_cols="G_indicator", x_cols=["Z1", "Z2", "Z3", "Z4"]) dml_did_obj = dml.DoubleMLDID( dml_data, + clipping_threshold=clipping_threshold, **dml_args, ) diff --git a/doubleml/did/tests/test_did_binary_vs_did_two_period.py b/doubleml/did/tests/test_did_binary_vs_did_two_period.py index 74575664..25416a20 100644 --- a/doubleml/did/tests/test_did_binary_vs_did_two_period.py +++ b/doubleml/did/tests/test_did_binary_vs_did_two_period.py @@ -37,12 +37,12 @@ def in_sample_normalization(request): @pytest.fixture(scope="module", params=[0.1]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_did_binary_vs_did_fixture(generate_data_did_binary, learner, score, in_sample_normalization, trimming_threshold): +def dml_did_binary_vs_did_fixture(generate_data_did_binary, learner, score, in_sample_normalization, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -68,7 +68,6 @@ def dml_did_binary_vs_did_fixture(generate_data_did_binary, learner, score, in_s "n_folds": n_folds, "score": score, "in_sample_normalization": in_sample_normalization, - "trimming_threshold": trimming_threshold, "draw_sample_splitting": False, } @@ -77,11 +76,13 @@ def dml_did_binary_vs_did_fixture(generate_data_did_binary, learner, score, in_s g_value=1, t_value_pre=0, t_value_eval=1, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), **dml_args, ) dml_did_obj = dml.DoubleMLDID( obj_dml_data, + clipping_threshold=clipping_threshold, **dml_args, ) @@ -107,7 +108,7 @@ def dml_did_binary_vs_did_fixture(generate_data_did_binary, learner, score, in_s all_smpls, score, in_sample_normalization, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/did/tests/test_did_cs.py b/doubleml/did/tests/test_did_cs.py index bc8e2da6..bc56def2 100644 --- a/doubleml/did/tests/test_did_cs.py +++ b/doubleml/did/tests/test_did_cs.py @@ -38,12 +38,12 @@ def in_sample_normalization(request): @pytest.fixture(scope="module", params=[0.1]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normalization, trimming_threshold): +def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normalization, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -70,7 +70,7 @@ def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normaliza score=score, in_sample_normalization=in_sample_normalization, draw_sample_splitting=False, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) # synchronize the sample splitting @@ -88,7 +88,7 @@ def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normaliza all_smpls, score, in_sample_normalization, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/did/tests/test_did_cs_binary_ps_processor.py b/doubleml/did/tests/test_did_cs_binary_ps_processor.py new file mode 100644 index 00000000..1594ee93 --- /dev/null +++ b/doubleml/did/tests/test_did_cs_binary_ps_processor.py @@ -0,0 +1,67 @@ +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +from doubleml.did import DoubleMLDIDCSBinary +from doubleml.utils.propensity_score_processing import PSProcessorConfig + + +@pytest.mark.ci +@pytest.mark.parametrize( + "ps_config", + [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ], +) +def test_did_cs_binary_ml_m_predictions_ps_processor(generate_data_did_binary, ps_config): + dml_data = generate_data_did_binary + np.random.seed(3141) + dml_did = DoubleMLDIDCSBinary( + obj_dml_data=dml_data, + g_value=1, + t_value_pre=0, + t_value_eval=1, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ps_processor_config=ps_config, + n_rep=1, + score="observational", + ) + dml_did.fit(store_predictions=True) + ml_m_preds = dml_did.predictions["ml_m"][:, 0, 0] + # Just check that predictions are within [clipping_threshold, 1-clipping_threshold] + assert np.all(ml_m_preds >= ps_config.clipping_threshold) + assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold) + + +@pytest.mark.ci +def test_did_cs_binary_ml_m_predictions_ps_processor_differences(generate_data_did_binary): + dml_data = generate_data_did_binary + np.random.seed(3141) + configs = [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ] + preds = [] + for cfg in configs: + dml_did = DoubleMLDIDCSBinary( + obj_dml_data=dml_data, + g_value=1, + t_value_pre=0, + t_value_eval=1, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ps_processor_config=cfg, + n_rep=1, + score="observational", + ) + dml_did.fit(store_predictions=True) + preds.append(dml_did.predictions["ml_m"][:, 0, 0]) + # Check that at least two configurations yield different predictions (element-wise) + diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))] + assert any(diffs) diff --git a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py index da7db085..eaedba49 100644 --- a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py +++ b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py @@ -36,7 +36,7 @@ def in_sample_normalization(request): @pytest.fixture(scope="module", params=[0.1]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -46,7 +46,7 @@ def time_type(request): @pytest.fixture(scope="module") -def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normalization, trimming_threshold): +def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normalization, clipping_threshold): n_obs = 500 dpg = 1 @@ -62,7 +62,6 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza "n_folds": 3, "score": score, "in_sample_normalization": in_sample_normalization, - "trimming_threshold": trimming_threshold, "draw_sample_splitting": True, } @@ -71,6 +70,7 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza g_value=dml_panel_data.g_values[0], t_value_pre=dml_panel_data.t_values[0], t_value_eval=dml_panel_data.t_values[1], + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), **dml_args, ) dml_did_binary_obj.fit() @@ -81,6 +81,7 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza ) dml_did_obj = dml.DoubleMLDIDCS( dml_data, + clipping_threshold=clipping_threshold, **dml_args, ) diff --git a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py index b9e267ce..a9ba726e 100644 --- a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py +++ b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py @@ -38,12 +38,12 @@ def in_sample_normalization(request): @pytest.fixture(scope="module", params=[0.1]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score, in_sample_normalization, trimming_threshold): +def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score, in_sample_normalization, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -70,7 +70,6 @@ def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score "n_folds": n_folds, "score": score, "in_sample_normalization": in_sample_normalization, - "trimming_threshold": trimming_threshold, "draw_sample_splitting": False, } @@ -79,11 +78,13 @@ def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score g_value=1, t_value_pre=0, t_value_eval=1, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), **dml_args, ) dml_did_obj = dml.DoubleMLDIDCS( obj_dml_data, + clipping_threshold=clipping_threshold, **dml_args, ) @@ -111,7 +112,7 @@ def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score all_smpls, score, in_sample_normalization, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/did/tests/test_did_deprecation_warnings.py b/doubleml/did/tests/test_did_deprecation_warnings.py new file mode 100644 index 00000000..75158fdb --- /dev/null +++ b/doubleml/did/tests/test_did_deprecation_warnings.py @@ -0,0 +1,29 @@ +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +from doubleml.data.did_data import DoubleMLDIDData +from doubleml.did.did import DoubleMLDID +from doubleml.did.did_cs import DoubleMLDIDCS + + +@pytest.mark.ci +def test_deprecation_DoubleMLDIDData(generate_data_did): + (x, y, d, _) = generate_data_did + with pytest.warns(FutureWarning, match="DoubleMLDIDData is deprecated"): + _ = DoubleMLDIDData.from_arrays(x, y, d) + + +@pytest.mark.ci +def test_deprecation_DoubleMLDID(generate_data_did): + (x, y, d, _) = generate_data_did + obj_dml_data = DoubleMLDIDData.from_arrays(x, y, d) + with pytest.warns(DeprecationWarning, match="DoubleMLDID is deprecated"): + _ = DoubleMLDID(obj_dml_data, ml_g=LinearRegression(), ml_m=LogisticRegression()) + + +@pytest.mark.ci +def test_deprecation_DoubleMLDIDCS(generate_data_did_cs): + (x, y, d, t) = generate_data_did_cs + obj_dml_data = DoubleMLDIDData.from_arrays(x, y, d, t=t) + with pytest.warns(DeprecationWarning, match="DoubleMLDIDCS is deprecated"): + _ = DoubleMLDIDCS(obj_dml_data, ml_g=LinearRegression(), ml_m=LogisticRegression()) diff --git a/doubleml/did/tests/test_did_multi_aggregation_single_gt.py b/doubleml/did/tests/test_did_multi_aggregation_single_gt.py index a6ffcd49..ede8ed74 100644 --- a/doubleml/did/tests/test_did_multi_aggregation_single_gt.py +++ b/doubleml/did/tests/test_did_multi_aggregation_single_gt.py @@ -38,7 +38,7 @@ def in_sample_normalization(request): @pytest.fixture(scope="module", params=[0.1]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -48,7 +48,7 @@ def time_type(request): @pytest.fixture(scope="module") -def dml_single_gt_aggregation(aggregation, time_type, learner, score, panel, in_sample_normalization, trimming_threshold): +def dml_single_gt_aggregation(aggregation, time_type, learner, score, panel, in_sample_normalization, clipping_threshold): n_obs = 500 dpg = 1 @@ -63,7 +63,7 @@ def dml_single_gt_aggregation(aggregation, time_type, learner, score, panel, in_ "score": score, "panel": panel, "in_sample_normalization": in_sample_normalization, - "trimming_threshold": trimming_threshold, + "ps_processor_config": dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), "draw_sample_splitting": True, } gt_combination = [(dml_panel_data.g_values[0], dml_panel_data.t_values[0], dml_panel_data.t_values[3])] diff --git a/doubleml/did/tests/test_did_multi_exceptions.py b/doubleml/did/tests/test_did_multi_exceptions.py index c53d79d3..a9e432a5 100644 --- a/doubleml/did/tests/test_did_multi_exceptions.py +++ b/doubleml/did/tests/test_did_multi_exceptions.py @@ -62,22 +62,6 @@ def test_input(): invalid_arguments = {"score": "test"} _ = dml.did.DoubleMLDIDMulti(**(valid_arguments | invalid_arguments)) - # trimming - msg = "Invalid trimming_rule discard. Valid trimming_rule truncate." - with pytest.raises(ValueError, match=msg): - invalid_arguments = {"trimming_rule": "discard"} - _ = dml.did.DoubleMLDIDMulti(**(valid_arguments | invalid_arguments)) - - msg = "trimming_threshold has to be a float. Object of type passed." - with pytest.raises(TypeError, match=msg): - invalid_arguments = {"trimming_threshold": "test"} - _ = dml.did.DoubleMLDIDMulti(**(valid_arguments | invalid_arguments)) - - msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5." - with pytest.raises(ValueError, match=msg): - invalid_arguments = {"trimming_threshold": 0.6} - _ = dml.did.DoubleMLDIDMulti(**(valid_arguments | invalid_arguments)) - @pytest.mark.ci def test_exception_learners(): diff --git a/doubleml/did/tests/test_did_multi_vs_binary.py b/doubleml/did/tests/test_did_multi_vs_binary.py index 15d3fd0c..86cb2ae4 100644 --- a/doubleml/did/tests/test_did_multi_vs_binary.py +++ b/doubleml/did/tests/test_did_multi_vs_binary.py @@ -35,7 +35,7 @@ def in_sample_normalization(request): @pytest.fixture(scope="module", params=[0.1]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -45,7 +45,7 @@ def time_type(request): @pytest.fixture(scope="module") -def dml_did_binary_vs_did_multi_fixture(time_type, learner, score, in_sample_normalization, trimming_threshold): +def dml_did_binary_vs_did_multi_fixture(time_type, learner, score, in_sample_normalization, clipping_threshold): n_obs = 500 dpg = 1 boot_methods = ["normal"] @@ -61,7 +61,7 @@ def dml_did_binary_vs_did_multi_fixture(time_type, learner, score, in_sample_nor "n_folds": 3, "score": score, "in_sample_normalization": in_sample_normalization, - "trimming_threshold": trimming_threshold, + "ps_processor_config": dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), "draw_sample_splitting": True, } gt_combination = [(dml_panel_data.g_values[0], dml_panel_data.t_values[0], dml_panel_data.t_values[1])] diff --git a/doubleml/did/tests/test_did_multi_vs_cs_binary.py b/doubleml/did/tests/test_did_multi_vs_cs_binary.py index 7af8d74d..a658aeee 100644 --- a/doubleml/did/tests/test_did_multi_vs_cs_binary.py +++ b/doubleml/did/tests/test_did_multi_vs_cs_binary.py @@ -35,7 +35,7 @@ def in_sample_normalization(request): @pytest.fixture(scope="module", params=[0.1]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -50,7 +50,7 @@ def lambda_t(request): @pytest.fixture(scope="module") -def dml_did_binary_vs_did_multi_fixture(time_type, lambda_t, learner, score, in_sample_normalization, trimming_threshold): +def dml_did_binary_vs_did_multi_fixture(time_type, lambda_t, learner, score, in_sample_normalization, clipping_threshold): n_obs = 500 dpg = 1 boot_methods = ["normal"] @@ -66,7 +66,7 @@ def dml_did_binary_vs_did_multi_fixture(time_type, lambda_t, learner, score, in_ "n_folds": 3, "score": score, "in_sample_normalization": in_sample_normalization, - "trimming_threshold": trimming_threshold, + "ps_processor_config": dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), "draw_sample_splitting": True, } gt_combination = [(dml_panel_data.g_values[0], dml_panel_data.t_values[0], dml_panel_data.t_values[1])] diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py index 93379e90..7441d112 100644 --- a/doubleml/irm/apo.py +++ b/doubleml/irm/apo.py @@ -1,4 +1,5 @@ import warnings +from typing import Optional import numpy as np import pandas as pd @@ -9,14 +10,13 @@ from doubleml.utils._checks import ( _check_binary_predictions, _check_finite_predictions, - _check_is_propensity, _check_score, - _check_trimming, _check_weights, ) from doubleml.utils._estimation import _cond_targets, _dml_cv_predict, _dml_tune, _get_cond_smpls -from doubleml.utils._propensity_score import _propensity_score_adjustment, _trimm +from doubleml.utils._propensity_score import _propensity_score_adjustment from doubleml.utils.blp import DoubleMLBLP +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor class DoubleMLAPO(LinearScoreMixin, DoubleML): @@ -66,13 +66,16 @@ class DoubleMLAPO(LinearScoreMixin, DoubleML): Indicates whether the inverse probability weights are normalized. Default is ``False``. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. + + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -91,8 +94,9 @@ def __init__( score="APO", weights=None, normalize_ipw=False, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, ): super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) @@ -131,9 +135,13 @@ def __init__( raise TypeError( "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed." ) + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold self._sensitivity_implemented = True self._external_predictions_implemented = True @@ -163,19 +171,44 @@ def normalize_ipw(self): """ return self._normalize_ipw + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold @property def weights(self): @@ -288,10 +321,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa return_models=return_models, ) _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) - _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12) # also trimm external predictions - m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) + m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], self.treated, cv=smpls) psi_a, psi_b = self._score_elements(y, treated, g_hat_d_lvl0["preds"], g_hat_d_lvl1["preds"], m_hat["preds"], smpls) psi_elements = {"psi_a": psi_a, "psi_b": psi_b} diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index 5a6d41fc..23e7085e 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -1,5 +1,7 @@ import copy +import warnings from collections.abc import Iterable +from typing import Optional import numpy as np import pandas as pd @@ -11,10 +13,11 @@ from doubleml.double_ml_framework import concat from doubleml.double_ml_sampling_mixins import SampleSplittingMixin from doubleml.irm.apo import DoubleMLAPO -from doubleml.utils._checks import _check_score, _check_trimming, _check_weights +from doubleml.utils._checks import _check_score, _check_weights from doubleml.utils._descriptive import generate_summary from doubleml.utils._sensitivity import _compute_sensitivity_bias from doubleml.utils.gain_statistics import gain_statistics +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor class DoubleMLAPOS(SampleSplittingMixin): @@ -31,8 +34,9 @@ def __init__( score="APO", weights=None, normalize_ipw=False, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, ): self._dml_data = obj_dml_data @@ -58,10 +62,12 @@ def __init__( # initialize framework which is constructed after the fit method is called self._framework = None - # initialize and check trimming + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold if not isinstance(self.normalize_ipw, bool): raise TypeError( @@ -131,19 +137,44 @@ def normalize_ipw(self): """ return self._normalize_ipw + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold @property def weights(self): @@ -819,8 +850,7 @@ def _initialize_models(self): "n_folds": self.n_folds, "n_rep": self.n_rep, "weights": self.weights, - "trimming_rule": self.trimming_rule, - "trimming_threshold": self.trimming_threshold, + "ps_processor_config": self.ps_processor_config, "normalize_ipw": self.normalize_ipw, "draw_sample_splitting": False, } diff --git a/doubleml/irm/cvar.py b/doubleml/irm/cvar.py index 6d29f5e2..64e82ad8 100644 --- a/doubleml/irm/cvar.py +++ b/doubleml/irm/cvar.py @@ -1,3 +1,6 @@ +import warnings +from typing import Optional + import numpy as np from sklearn.base import clone from sklearn.model_selection import StratifiedKFold, train_test_split @@ -11,7 +14,6 @@ _check_quantile, _check_score, _check_treatment, - _check_trimming, _check_zero_one_treatment, ) from doubleml.utils._estimation import ( @@ -22,9 +24,11 @@ _predict_zero_one_propensity, _solve_ipw_score, ) -from doubleml.utils._propensity_score import _normalize_ipw, _trimm +from doubleml.utils._propensity_score import _normalize_ipw +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). class DoubleMLCVAR(LinearScoreMixin, DoubleML): """Double machine learning for conditional value at risk for potential outcomes @@ -66,13 +70,16 @@ class DoubleMLCVAR(LinearScoreMixin, DoubleML): Indicates whether the inverse probability weights are normalized. Default is ``True``. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. + + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -107,8 +114,9 @@ def __init__( n_rep=1, score="CVaR", normalize_ipw=True, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, ): super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) @@ -139,10 +147,12 @@ def __init__( if draw_sample_splitting: self.draw_sample_splitting() - # initialize and check trimming + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold _ = self._check_learner(ml_g, "ml_g", regressor=True, classifier=False) _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True) @@ -172,19 +182,44 @@ def normalize_ipw(self): """ return self._normalize_ipw + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold def _compute_ipw_score(self, theta, d, y, prop): score = (d == self.treatment) / prop * (y <= theta) - self.quantile @@ -254,7 +289,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa "preds" ] - m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold) + m_hat_prelim = self._ps_processor.adjust_ps(m_hat_prelim, d_train_1, cv=smpls_prelim) if self._normalize_ipw: m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1) @@ -304,9 +339,7 @@ def ipw_score(theta): g_hat["models"] = fitted_models["ml_g"] m_hat["models"] = fitted_models["ml_m"] - # clip propensities and normalize ipw weights - m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) - + m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls) # this is not done in the score to be equivalent to PQ models if self._normalize_ipw: m_hat_adj = _normalize_ipw(m_hat["preds"], d) diff --git a/doubleml/irm/iivm.py b/doubleml/irm/iivm.py index 4eaa1d50..7f330cfb 100644 --- a/doubleml/irm/iivm.py +++ b/doubleml/irm/iivm.py @@ -1,3 +1,6 @@ +import warnings +from typing import Optional + import numpy as np from scipy.stats import norm from sklearn.utils import check_X_y @@ -11,10 +14,10 @@ _check_finite_predictions, _check_is_propensity, _check_score, - _check_trimming, ) from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls, _solve_quadratic_inequality -from doubleml.utils._propensity_score import _normalize_ipw, _trimm +from doubleml.utils._propensity_score import _normalize_ipw +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor class DoubleMLIIVM(LinearScoreMixin, DoubleML): @@ -64,13 +67,16 @@ class DoubleMLIIVM(LinearScoreMixin, DoubleML): Indicates whether the inverse probability weights are normalized. Default is ``False``. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. + + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -135,8 +141,9 @@ def __init__( score="LATE", subgroups=None, normalize_ipw=False, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, ): super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) @@ -172,9 +179,13 @@ def __init__( raise TypeError( "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed." ) + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold if subgroups is None: # this is the default for subgroups; via None to prevent a mutable default argument @@ -213,19 +224,44 @@ def normalize_ipw(self): """ return self._normalize_ipw + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold def _initialize_ml_nuisance_params(self): valid_learner = ["ml_g0", "ml_g1", "ml_m", "ml_r0", "ml_r1"] @@ -330,9 +366,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa return_models=return_models, ) _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) - _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12) - # also trimm external predictions - m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) + + m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], z, cv=smpls) # nuisance r r0 = external_predictions["ml_r0"] is not None diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py index 343b7878..01d288bd 100644 --- a/doubleml/irm/irm.py +++ b/doubleml/irm/irm.py @@ -1,4 +1,5 @@ import warnings +from typing import Optional import numpy as np import pandas as pd @@ -12,17 +13,17 @@ _check_binary_predictions, _check_finite_predictions, _check_integer, - _check_is_propensity, _check_score, - _check_trimming, _check_weights, ) from doubleml.utils._estimation import _cond_targets, _dml_cv_predict, _dml_tune, _get_cond_smpls -from doubleml.utils._propensity_score import _propensity_score_adjustment, _trimm +from doubleml.utils._propensity_score import _propensity_score_adjustment from doubleml.utils.blp import DoubleMLBLP from doubleml.utils.policytree import DoubleMLPolicyTree +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). class DoubleMLIRM(LinearScoreMixin, DoubleML): """Double machine learning for interactive regression models @@ -68,13 +69,16 @@ class DoubleMLIRM(LinearScoreMixin, DoubleML): Indicates whether the inverse probability weights are normalized. Default is ``False``. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. + + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -131,8 +135,9 @@ def __init__( score="ATE", weights=None, normalize_ipw=False, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, ): super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) @@ -167,9 +172,13 @@ def __init__( raise TypeError( "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed." ) + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold self._sensitivity_implemented = True self._external_predictions_implemented = True @@ -184,19 +193,44 @@ def normalize_ipw(self): """ return self._normalize_ipw + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold @property def weights(self): @@ -327,9 +361,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa return_models=return_models, ) _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) - _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12) - # also trimm external predictions - m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) + + m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls, learner_name="ml_m") psi_a, psi_b = self._score_elements(y, d, g_hat0["preds"], g_hat1["preds"], m_hat["preds"], smpls) psi_elements = {"psi_a": psi_a, "psi_b": psi_b} diff --git a/doubleml/irm/lpq.py b/doubleml/irm/lpq.py index 962b383b..bd62794c 100644 --- a/doubleml/irm/lpq.py +++ b/doubleml/irm/lpq.py @@ -1,3 +1,6 @@ +import warnings +from typing import Optional + import numpy as np from sklearn.base import clone from sklearn.model_selection import StratifiedKFold, train_test_split @@ -7,7 +10,7 @@ from doubleml.data.base_data import DoubleMLData from doubleml.double_ml import DoubleML from doubleml.double_ml_score_mixins import NonLinearScoreMixin -from doubleml.utils._checks import _check_quantile, _check_score, _check_treatment, _check_trimming, _check_zero_one_treatment +from doubleml.utils._checks import _check_quantile, _check_score, _check_treatment, _check_zero_one_treatment from doubleml.utils._estimation import ( _cond_targets, _default_kde, @@ -17,9 +20,11 @@ _predict_zero_one_propensity, _solve_ipw_score, ) -from doubleml.utils._propensity_score import _normalize_ipw, _trimm +from doubleml.utils._propensity_score import _normalize_ipw +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). class DoubleMLLPQ(NonLinearScoreMixin, DoubleML): """Double machine learning for local potential quantiles @@ -67,13 +72,16 @@ class DoubleMLLPQ(NonLinearScoreMixin, DoubleML): Default is ``'None'``, which uses :py:class:`statsmodels.nonparametric.kde.KDEUnivariate` with a gaussian kernel and silverman for bandwidth determination. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. + + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -108,8 +116,9 @@ def __init__( score="LPQ", normalize_ipw=True, kde=None, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, ): super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) @@ -148,10 +157,12 @@ def __init__( self._external_predictions_implemented = True - # initialize and check trimming + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold _ = self._check_learner(ml_g, "ml_g", regressor=False, classifier=True) _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True) @@ -200,19 +211,44 @@ def normalize_ipw(self): """ return self._normalize_ipw + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold @property def _score_element_names(self): @@ -386,7 +422,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa ml_m_z_prelim, x_train_1, z_train_1, method="predict_proba", smpls=smpls_prelim )["preds"] - m_z_hat_prelim = _trimm(m_z_hat_prelim, self.trimming_rule, self.trimming_threshold) + m_z_hat_prelim = self._ps_processor.adjust_ps(m_z_hat_prelim, z_train_1, cv=smpls_prelim) if self._normalize_ipw: m_z_hat_prelim = _normalize_ipw(m_z_hat_prelim, z_train_1) @@ -501,11 +537,12 @@ def ipw_score(theta): g_du_z0_hat["models"] = fitted_models["ml_g_du_z0"] g_du_z1_hat["models"] = fitted_models["ml_g_du_z1"] - # clip propensities - m_z_hat_adj = _trimm(m_z_hat["preds"], self.trimming_rule, self.trimming_threshold) - + # adjust propensity scores + m_z_hat["preds"] = self._ps_processor.adjust_ps(m_z_hat["preds"], z, cv=smpls) if self._normalize_ipw: - m_z_hat_adj = _normalize_ipw(m_z_hat_adj, z) + m_z_hat_adj = _normalize_ipw(m_z_hat["preds"], z) + else: + m_z_hat_adj = m_z_hat["preds"] # this could be adjusted to be compatible with dml1 # estimate final nuisance parameter diff --git a/doubleml/irm/pq.py b/doubleml/irm/pq.py index baf43b7e..f3b72e2c 100644 --- a/doubleml/irm/pq.py +++ b/doubleml/irm/pq.py @@ -1,3 +1,6 @@ +import warnings +from typing import Optional + import numpy as np from sklearn.base import clone from sklearn.model_selection import StratifiedKFold, train_test_split @@ -11,7 +14,6 @@ _check_quantile, _check_score, _check_treatment, - _check_trimming, _check_zero_one_treatment, ) from doubleml.utils._estimation import ( @@ -23,9 +25,11 @@ _predict_zero_one_propensity, _solve_ipw_score, ) -from doubleml.utils._propensity_score import _normalize_ipw, _trimm +from doubleml.utils._propensity_score import _normalize_ipw +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). class DoubleMLPQ(NonLinearScoreMixin, DoubleML): """Double machine learning for potential quantiles @@ -74,13 +78,16 @@ class DoubleMLPQ(NonLinearScoreMixin, DoubleML): Default is ``'None'``, which uses :py:class:`statsmodels.nonparametric.kde.KDEUnivariate` with a gaussian kernel and silverman for bandwidth determination. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. + + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -115,8 +122,9 @@ def __init__( score="PQ", normalize_ipw=True, kde=None, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, ): super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) @@ -155,10 +163,12 @@ def __init__( self._external_predictions_implemented = True - # initialize and check trimming + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold _ = self._check_learner(ml_g, "ml_g", regressor=False, classifier=True) _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True) @@ -195,19 +205,44 @@ def normalize_ipw(self): """ return self._normalize_ipw + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold @property def _score_element_names(self): @@ -326,7 +361,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa )["preds"] else: m_hat_prelim = m_hat["preds"][np.concatenate([test for _, test in smpls_prelim])] - m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold) + m_hat_prelim = self._ps_processor.adjust_ps(m_hat_prelim, d_train_1, cv=smpls_prelim) + if self._normalize_ipw: m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1) if self.treatment == 0: @@ -370,11 +406,10 @@ def ipw_score(theta): g_hat["models"] = fitted_models["ml_g"] m_hat["models"] = fitted_models["ml_m"] - # clip propensities and normalize ipw weights - m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) - + m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls) # this is not done in the score to save computation due to multiple score evaluations # to be able to evaluate the raw models the m_hat['preds'] are not changed + if self._normalize_ipw: m_hat_adj = _normalize_ipw(m_hat["preds"], d) else: diff --git a/doubleml/irm/qte.py b/doubleml/irm/qte.py index f896b078..46c8f316 100644 --- a/doubleml/irm/qte.py +++ b/doubleml/irm/qte.py @@ -1,3 +1,6 @@ +import warnings +from typing import Optional + import numpy as np import pandas as pd from joblib import Parallel, delayed @@ -9,11 +12,13 @@ from doubleml.irm.cvar import DoubleMLCVAR from doubleml.irm.lpq import DoubleMLLPQ from doubleml.irm.pq import DoubleMLPQ -from doubleml.utils._checks import _check_score, _check_trimming, _check_zero_one_treatment +from doubleml.utils._checks import _check_score, _check_zero_one_treatment from doubleml.utils._descriptive import generate_summary from doubleml.utils._estimation import _default_kde +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). class DoubleMLQTE(SampleSplittingMixin): """Double machine learning for quantile treatment effects @@ -56,13 +61,16 @@ class DoubleMLQTE(SampleSplittingMixin): Default is ``'None'``, which uses :py:class:`statsmodels.nonparametric.kde.KDEUnivariate` with a gaussian kernel and silverman for bandwidth determination. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. + + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -98,8 +106,9 @@ def __init__( score="PQ", normalize_ipw=True, kde=None, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, ): self._dml_data = obj_dml_data @@ -130,10 +139,12 @@ def __init__( # initialize framework which is constructed after the fit method is called self._framework = None - # initialize and check trimming + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold if not isinstance(self.normalize_ipw, bool): raise TypeError( @@ -250,19 +261,44 @@ def normalize_ipw(self): """ return self._normalize_ipw + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold @property def coef(self): @@ -530,8 +566,7 @@ def _initialize_models(self): "ml_m": self._learner["ml_m"], "n_folds": self.n_folds, "n_rep": self.n_rep, - "trimming_rule": self.trimming_rule, - "trimming_threshold": self.trimming_threshold, + "ps_processor_config": self.ps_processor_config, "normalize_ipw": self.normalize_ipw, "draw_sample_splitting": False, } diff --git a/doubleml/irm/ssm.py b/doubleml/irm/ssm.py index 00a49191..fdc2ab6e 100644 --- a/doubleml/irm/ssm.py +++ b/doubleml/irm/ssm.py @@ -1,5 +1,6 @@ import copy import warnings +from typing import Optional import numpy as np from sklearn.base import clone @@ -9,11 +10,12 @@ from doubleml.data.ssm_data import DoubleMLSSMData from doubleml.double_ml import DoubleML from doubleml.double_ml_score_mixins import LinearScoreMixin -from doubleml.utils._checks import _check_finite_predictions, _check_score, _check_trimming +from doubleml.utils._checks import _check_finite_predictions, _check_score from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d, _predict_zero_one_propensity -from doubleml.utils._propensity_score import _trimm +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). class DoubleMLSSM(LinearScoreMixin, DoubleML): """Double machine learning for sample selection models @@ -50,13 +52,16 @@ class DoubleMLSSM(LinearScoreMixin, DoubleML): Indicates whether the inverse probability weights are normalized. Default is ``False``. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. + + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -109,8 +114,9 @@ def __init__( n_rep=1, score="missing-at-random", normalize_ipw=False, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, ): super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) @@ -119,9 +125,12 @@ def __init__( self._sensitivity_implemented = False self._normalize_ipw = normalize_ipw + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold self._check_data(self._dml_data) self._is_cluster_data = self._dml_data.is_cluster_data @@ -165,19 +174,44 @@ def normalize_ipw(self): """ return self._normalize_ipw + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold def _initialize_ml_nuisance_params(self): valid_learner = ["ml_g_d0", "ml_g_d1", "ml_pi", "ml_m"] @@ -369,7 +403,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa pi_hat["models"] = fitted_models["ml_pi"] m_hat["models"] = fitted_models["ml_m"] - m_hat["preds"] = _trimm(m_hat["preds"], self._trimming_rule, self._trimming_threshold) + m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls) # treatment indicator dtreat = d == 1 diff --git a/doubleml/irm/tests/_utils_apo_manual.py b/doubleml/irm/tests/_utils_apo_manual.py index 0ec84417..8abcb029 100644 --- a/doubleml/irm/tests/_utils_apo_manual.py +++ b/doubleml/irm/tests/_utils_apo_manual.py @@ -21,7 +21,7 @@ def fit_apo( g1_params=None, m_params=None, normalize_ipw=False, - trimming_threshold=1e-2, + clipping_threshold=1e-2, ): n_obs = len(y) treated = d == treatment_level @@ -46,7 +46,7 @@ def fit_apo( g0_params=g0_params, g1_params=g1_params, m_params=m_params, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) all_g_hat0.append(g_hat0) @@ -83,7 +83,7 @@ def fit_nuisance_apo( g0_params=None, g1_params=None, m_params=None, - trimming_threshold=1e-12, + clipping_threshold=1e-12, ): ml_g0 = clone(learner_g) ml_g1 = clone(learner_g) @@ -102,7 +102,7 @@ def fit_nuisance_apo( g_hat1_list = fit_predict(y, x, ml_g1, g1_params, smpls, train_cond=train_cond1) ml_m = clone(learner_m) - m_hat_list = fit_predict_proba(treated, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold) + m_hat_list = fit_predict_proba(treated, x, ml_m, m_params, smpls, clipping_threshold=clipping_threshold) return g_hat0_list, g_hat1_list, m_hat_list diff --git a/doubleml/irm/tests/_utils_apos_manual.py b/doubleml/irm/tests/_utils_apos_manual.py index 88fc59c2..d1eb575f 100644 --- a/doubleml/irm/tests/_utils_apos_manual.py +++ b/doubleml/irm/tests/_utils_apos_manual.py @@ -1,6 +1,8 @@ import numpy as np from sklearn.base import clone +from doubleml.utils.propensity_score_processing import PSProcessorConfig + from ...data.base_data import DoubleMLData from ...tests._utils_boot import draw_weights from ..apo import DoubleMLAPO @@ -16,9 +18,8 @@ def fit_apos( all_smpls, score, n_rep=1, - trimming_rule="truncate", normalize_ipw=False, - trimming_threshold=1e-2, + clipping_threshold=1e-2, ): n_obs = len(y) n_treatments = len(treatment_levels) @@ -39,8 +40,7 @@ def fit_apos( n_folds=n_folds, n_rep=n_rep, score=score, - trimming_rule=trimming_rule, - trimming_threshold=trimming_threshold, + ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold), normalize_ipw=normalize_ipw, draw_sample_splitting=False, ) diff --git a/doubleml/irm/tests/_utils_cvar_manual.py b/doubleml/irm/tests/_utils_cvar_manual.py index dd6935b6..8d9f0120 100644 --- a/doubleml/irm/tests/_utils_cvar_manual.py +++ b/doubleml/irm/tests/_utils_cvar_manual.py @@ -18,7 +18,7 @@ def fit_cvar( treatment, normalize_ipw=True, n_rep=1, - trimming_threshold=1e-2, + clipping_threshold=1e-2, g_params=None, m_params=None, ): @@ -40,7 +40,7 @@ def fit_cvar( smpls, treatment, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, g_params=g_params, m_params=m_params, ) @@ -56,7 +56,7 @@ def fit_cvar( def fit_nuisance_cvar( - y, x, d, quantile, learner_g, learner_m, smpls, treatment, normalize_ipw, trimming_threshold, g_params, m_params + y, x, d, quantile, learner_g, learner_m, smpls, treatment, normalize_ipw, clipping_threshold, g_params, m_params ): n_folds = len(smpls) n_obs = len(y) @@ -95,7 +95,7 @@ def fit_nuisance_cvar( x_train_1 = x[train_inds_1, :] # todo change prediction method m_hat_prelim_list = fit_predict_proba( - d_train_1, x_train_1, ml_m, params=None, trimming_threshold=trimming_threshold, smpls=smpls_prelim + d_train_1, x_train_1, ml_m, params=None, clipping_threshold=clipping_threshold, smpls=smpls_prelim ) m_hat_prelim = np.full_like(y_train_1, np.nan, dtype="float64") @@ -104,8 +104,8 @@ def fit_nuisance_cvar( m_hat_prelim = _dml_cv_predict(ml_m, x_train_1, d_train_1, method="predict_proba", smpls=smpls_prelim)["preds"] - m_hat_prelim[m_hat_prelim < trimming_threshold] = trimming_threshold - m_hat_prelim[m_hat_prelim > 1 - trimming_threshold] = 1 - trimming_threshold + m_hat_prelim[m_hat_prelim < clipping_threshold] = clipping_threshold + m_hat_prelim[m_hat_prelim > 1 - clipping_threshold] = 1 - clipping_threshold if normalize_ipw: m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1) @@ -141,8 +141,8 @@ def ipw_score(theta): ml_m.fit(x[train_inds, :], d[train_inds]) m_hat[test_inds] = ml_m.predict_proba(x[test_inds, :])[:, 1] - m_hat[m_hat < trimming_threshold] = trimming_threshold - m_hat[m_hat > 1 - trimming_threshold] = 1 - trimming_threshold + m_hat[m_hat < clipping_threshold] = clipping_threshold + m_hat[m_hat > 1 - clipping_threshold] = 1 - clipping_threshold if normalize_ipw: m_hat = _normalize_ipw(m_hat, d) diff --git a/doubleml/irm/tests/_utils_iivm_manual.py b/doubleml/irm/tests/_utils_iivm_manual.py index b61526b2..601604ee 100644 --- a/doubleml/irm/tests/_utils_iivm_manual.py +++ b/doubleml/irm/tests/_utils_iivm_manual.py @@ -23,7 +23,7 @@ def fit_iivm( r0_params=None, r1_params=None, normalize_ipw=True, - trimming_threshold=1e-2, + clipping_threshold=1e-2, always_takers=True, never_takers=True, ): @@ -53,7 +53,7 @@ def fit_iivm( m_params=m_params, r0_params=r0_params, r1_params=r1_params, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, always_takers=always_takers, never_takers=never_takers, ) @@ -98,7 +98,7 @@ def fit_nuisance_iivm( m_params=None, r0_params=None, r1_params=None, - trimming_threshold=1e-12, + clipping_threshold=1e-12, always_takers=True, never_takers=True, ): @@ -117,7 +117,7 @@ def fit_nuisance_iivm( g_hat1_list = fit_predict(y, x, ml_g1, g1_params, smpls, train_cond=train_cond1) ml_m = clone(learner_m) - m_hat_list = fit_predict_proba(z, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold) + m_hat_list = fit_predict_proba(z, x, ml_m, m_params, smpls, clipping_threshold=clipping_threshold) ml_r0 = clone(learner_r) if always_takers: diff --git a/doubleml/irm/tests/_utils_irm_manual.py b/doubleml/irm/tests/_utils_irm_manual.py index f5a5bad7..d5bf3dc3 100644 --- a/doubleml/irm/tests/_utils_irm_manual.py +++ b/doubleml/irm/tests/_utils_irm_manual.py @@ -20,7 +20,7 @@ def fit_irm( g1_params=None, m_params=None, normalize_ipw=True, - trimming_threshold=1e-2, + clipping_threshold=1e-2, ): n_obs = len(y) @@ -44,7 +44,7 @@ def fit_irm( g0_params=g0_params, g1_params=g1_params, m_params=m_params, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) all_g_hat0.append(g_hat0) @@ -72,7 +72,7 @@ def fit_irm( def fit_nuisance_irm( - y, x, d, learner_g, learner_m, smpls, score, g0_params=None, g1_params=None, m_params=None, trimming_threshold=1e-12 + y, x, d, learner_g, learner_m, smpls, score, g0_params=None, g1_params=None, m_params=None, clipping_threshold=1e-12 ): ml_g0 = clone(learner_g) ml_g1 = clone(learner_g) @@ -89,7 +89,7 @@ def fit_nuisance_irm( g_hat1_list = fit_predict(y, x, ml_g1, g1_params, smpls, train_cond=train_cond1) ml_m = clone(learner_m) - m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold) + m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, clipping_threshold=clipping_threshold) p_hat_list = [] for _ in smpls: diff --git a/doubleml/irm/tests/_utils_lpq_manual.py b/doubleml/irm/tests/_utils_lpq_manual.py index 376c7c46..839025fd 100644 --- a/doubleml/irm/tests/_utils_lpq_manual.py +++ b/doubleml/irm/tests/_utils_lpq_manual.py @@ -20,7 +20,7 @@ def fit_lpq( treatment, n_rep=1, trimming_rule="truncate", - trimming_threshold=1e-2, + clipping_threshold=1e-2, kde=_default_kde, normalize_ipw=True, m_z_params=None, @@ -48,7 +48,7 @@ def fit_lpq( smpls, treatment, trimming_rule=trimming_rule, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, normalize_ipw=normalize_ipw, m_z_params=m_z_params, m_d_z0_params=m_d_z0_params, @@ -80,7 +80,7 @@ def fit_nuisance_lpq( smpls, treatment, trimming_rule, - trimming_threshold, + clipping_threshold, normalize_ipw, m_z_params, m_d_z0_params, @@ -144,7 +144,7 @@ def fit_nuisance_lpq( "preds" ] - m_z_hat_prelim = _trimm(m_z_hat_prelim, trimming_rule, trimming_threshold) + m_z_hat_prelim = _trimm(m_z_hat_prelim, trimming_rule, clipping_threshold) if normalize_ipw: m_z_hat_prelim = _normalize_ipw(m_z_hat_prelim, z_train_1) @@ -222,7 +222,7 @@ def ipw_score(theta): m_d_z1_hat[test_inds] = ml_m_d_z1.predict_proba(x[test_inds, :])[:, 1] # clip propensities - m_z_hat = _trimm(m_z_hat, trimming_rule, trimming_threshold) + m_z_hat = _trimm(m_z_hat, trimming_rule, clipping_threshold) if normalize_ipw: m_z_hat = _normalize_ipw(m_z_hat, z) diff --git a/doubleml/irm/tests/_utils_pq_manual.py b/doubleml/irm/tests/_utils_pq_manual.py index b5b27c7c..526854a0 100644 --- a/doubleml/irm/tests/_utils_pq_manual.py +++ b/doubleml/irm/tests/_utils_pq_manual.py @@ -18,7 +18,7 @@ def fit_pq( all_smpls, treatment, n_rep=1, - trimming_threshold=1e-2, + clipping_threshold=1e-2, normalize_ipw=True, g_params=None, m_params=None, @@ -40,7 +40,7 @@ def fit_pq( learner_m, smpls, treatment, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, normalize_ipw=normalize_ipw, g_params=g_params, m_params=m_params, @@ -57,7 +57,7 @@ def fit_pq( def fit_nuisance_pq( - y, x, d, quantile, learner_g, learner_m, smpls, treatment, trimming_threshold, normalize_ipw, g_params, m_params + y, x, d, quantile, learner_g, learner_m, smpls, treatment, clipping_threshold, normalize_ipw, g_params, m_params ): n_folds = len(smpls) n_obs = len(y) @@ -96,8 +96,8 @@ def fit_nuisance_pq( # todo change prediction method m_hat_prelim = _dml_cv_predict(clone(ml_m), x_train_1, d_train_1, method="predict_proba", smpls=smpls_prelim)["preds"] - m_hat_prelim[m_hat_prelim < trimming_threshold] = trimming_threshold - m_hat_prelim[m_hat_prelim > 1 - trimming_threshold] = 1 - trimming_threshold + m_hat_prelim[m_hat_prelim < clipping_threshold] = clipping_threshold + m_hat_prelim[m_hat_prelim > 1 - clipping_threshold] = 1 - clipping_threshold if normalize_ipw: m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1) @@ -129,8 +129,8 @@ def ipw_score(theta): ml_m.fit(x[train_inds, :], d[train_inds]) m_hat[test_inds] = ml_m.predict_proba(x[test_inds, :])[:, 1] - m_hat[m_hat < trimming_threshold] = trimming_threshold - m_hat[m_hat > 1 - trimming_threshold] = 1 - trimming_threshold + m_hat[m_hat < clipping_threshold] = clipping_threshold + m_hat[m_hat > 1 - clipping_threshold] = 1 - clipping_threshold if normalize_ipw: m_hat = _normalize_ipw(m_hat, d) diff --git a/doubleml/irm/tests/_utils_ssm_manual.py b/doubleml/irm/tests/_utils_ssm_manual.py index f14a1f66..07014018 100644 --- a/doubleml/irm/tests/_utils_ssm_manual.py +++ b/doubleml/irm/tests/_utils_ssm_manual.py @@ -19,7 +19,7 @@ def fit_selection( all_smpls, score, trimming_rule="truncate", - trimming_threshold=1e-2, + clipping_threshold=1e-2, normalize_ipw=True, n_rep=1, g_d0_params=None, @@ -55,7 +55,7 @@ def fit_selection( smpls, score, trimming_rule=trimming_rule, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, g_d0_params=g_d0_params, g_d1_params=g_d1_params, pi_params=pi_params, @@ -108,7 +108,7 @@ def fit_nuisance_selection( smpls, score, trimming_rule="truncate", - trimming_threshold=1e-2, + clipping_threshold=1e-2, g_d0_params=None, g_d1_params=None, pi_params=None, @@ -125,7 +125,7 @@ def fit_nuisance_selection( dx = np.column_stack((d, x, z)) if score == "missing-at-random": - pi_hat_list = fit_predict_proba(s, dx, ml_pi, pi_params, smpls, trimming_threshold=trimming_threshold) + pi_hat_list = fit_predict_proba(s, dx, ml_pi, pi_params, smpls, clipping_threshold=clipping_threshold) m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls) @@ -212,7 +212,7 @@ def fit_nuisance_selection( # predict conditional outcome g_hat_d0 = ml_g_d0.predict(xpi_test) - m_hat = _trimm(m_hat, trimming_rule, trimming_threshold) + m_hat = _trimm(m_hat, trimming_rule, clipping_threshold) # append predictions on test sample to final list of predictions g_hat_d1_list.append(g_hat_d1) diff --git a/doubleml/irm/tests/test_apo.py b/doubleml/irm/tests/test_apo.py index 7558b7c1..1b41705f 100644 --- a/doubleml/irm/tests/test_apo.py +++ b/doubleml/irm/tests/test_apo.py @@ -9,6 +9,7 @@ import doubleml as dml from doubleml.irm.datasets import make_irm_data, make_irm_data_discrete_treatments +from doubleml.utils.propensity_score_processing import PSProcessorConfig from ...tests._utils import draw_smpls from ._utils_apo_manual import boot_apo, fit_apo, fit_sensitivity_elements_apo @@ -34,7 +35,7 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.2, 0.15]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -44,7 +45,7 @@ def treatment_level(request): @pytest.fixture(scope="module") -def dml_apo_fixture(learner, normalize_ipw, trimming_threshold, treatment_level): +def dml_apo_fixture(learner, normalize_ipw, clipping_threshold, treatment_level): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -76,7 +77,7 @@ def dml_apo_fixture(learner, normalize_ipw, trimming_threshold, treatment_level) score="APO", normalize_ipw=normalize_ipw, draw_sample_splitting=False, - trimming_threshold=trimming_threshold, + ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold), ) # synchronize the sample splitting @@ -94,7 +95,7 @@ def dml_apo_fixture(learner, normalize_ipw, trimming_threshold, treatment_level) all_smpls=all_smpls, score="APO", normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) np.random.seed(3141) @@ -108,7 +109,7 @@ def dml_apo_fixture(learner, normalize_ipw, trimming_threshold, treatment_level) score="APO", normalize_ipw=normalize_ipw, draw_sample_splitting=False, - trimming_threshold=trimming_threshold, + ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold), ) # synchronize the sample splitting @@ -242,7 +243,12 @@ def test_dml_apo_capo_gapo(treatment_level, cov_type): ml_m = RandomForestClassifier(n_estimators=10) dml_obj = dml.DoubleMLAPO( - obj_dml_data, ml_m=ml_m, ml_g=ml_g, treatment_level=treatment_level, trimming_threshold=0.05, n_folds=5 + obj_dml_data, + ml_m=ml_m, + ml_g=ml_g, + treatment_level=treatment_level, + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), + n_folds=5, ) dml_obj.fit() diff --git a/doubleml/irm/tests/test_apo_classifier.py b/doubleml/irm/tests/test_apo_classifier.py index 042f3fe8..0b471956 100644 --- a/doubleml/irm/tests/test_apo_classifier.py +++ b/doubleml/irm/tests/test_apo_classifier.py @@ -7,6 +7,7 @@ from sklearn.linear_model import LogisticRegression import doubleml as dml +from doubleml.utils.propensity_score_processing import PSProcessorConfig from ...tests._utils import draw_smpls from ._utils_apo_manual import boot_apo, fit_apo @@ -32,12 +33,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.01, 0.05]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_apo_classifier_fixture(generate_data_irm_binary, learner, normalize_ipw, trimming_threshold): +def dml_apo_classifier_fixture(generate_data_irm_binary, learner, normalize_ipw, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -64,7 +65,7 @@ def dml_apo_classifier_fixture(generate_data_irm_binary, learner, normalize_ipw, n_folds=n_folds, score=score, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold), draw_sample_splitting=False, ) # synchronize the sample splitting @@ -82,7 +83,7 @@ def dml_apo_classifier_fixture(generate_data_irm_binary, learner, normalize_ipw, all_smpls, score, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/irm/tests/test_apo_exceptions.py b/doubleml/irm/tests/test_apo_exceptions.py index 5991ee5e..f428de6b 100644 --- a/doubleml/irm/tests/test_apo_exceptions.py +++ b/doubleml/irm/tests/test_apo_exceptions.py @@ -76,22 +76,6 @@ def test_apo_exception_scores(): _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, score="MAR") -@pytest.mark.ci -def test_apo_exception_trimming_rule(): - msg = "Invalid trimming_rule discard. Valid trimming_rule truncate." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, trimming_rule="discard") - - # check the trimming_threshold exceptions - msg = "trimming_threshold has to be a float. Object of type passed." - with pytest.raises(TypeError, match=msg): - _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, trimming_rule="truncate", trimming_threshold="0.1") - - msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, trimming_rule="truncate", trimming_threshold=0.6) - - @pytest.mark.ci def test_apo_exception_ipw_normalization(): msg = "Normalization indicator has to be boolean. Object of type passed." diff --git a/doubleml/irm/tests/test_apo_ps_processor.py b/doubleml/irm/tests/test_apo_ps_processor.py new file mode 100644 index 00000000..d70f2553 --- /dev/null +++ b/doubleml/irm/tests/test_apo_ps_processor.py @@ -0,0 +1,72 @@ +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +from doubleml import DoubleMLAPO, DoubleMLData +from doubleml.irm.datasets import make_irm_data_discrete_treatments +from doubleml.utils.propensity_score_processing import PSProcessorConfig + + +@pytest.fixture(scope="module") +def generate_data_apo(): + np.random.seed(3141) + data = make_irm_data_discrete_treatments(n_obs=200) + x = data["x"] + y = data["y"] + d = data["d"] + return x, y, d + + +@pytest.mark.ci +@pytest.mark.parametrize( + "ps_config", + [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ], +) +def test_apo_ml_m_predictions_ps_processor(generate_data_apo, ps_config): + x, y, d = generate_data_apo + dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d) + np.random.seed(3141) + dml_apo = DoubleMLAPO( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ps_processor_config=ps_config, + n_rep=1, + treatment_level=0, + ) + dml_apo.fit(store_predictions=True) + ml_m_preds = dml_apo.predictions["ml_m"][:, 0, 0] + assert np.all(ml_m_preds >= ps_config.clipping_threshold) + assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold) + + +@pytest.mark.ci +def test_apo_ml_m_predictions_ps_processor_differences(generate_data_apo): + x, y, d = generate_data_apo + dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d) + np.random.seed(3141) + configs = [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ] + preds = [] + for cfg in configs: + dml_apo = DoubleMLAPO( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ps_processor_config=cfg, + n_rep=1, + treatment_level=0, + ) + dml_apo.fit(store_predictions=True) + preds.append(dml_apo.predictions["ml_m"][:, 0, 0]) + diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))] + assert any(diffs) diff --git a/doubleml/irm/tests/test_apo_weighted_scores.py b/doubleml/irm/tests/test_apo_weighted_scores.py index 63687ebd..b5ba8a32 100644 --- a/doubleml/irm/tests/test_apo_weighted_scores.py +++ b/doubleml/irm/tests/test_apo_weighted_scores.py @@ -5,6 +5,7 @@ from sklearn.linear_model import LinearRegression, LogisticRegression import doubleml as dml +from doubleml.utils.propensity_score_processing import PSProcessorConfig from ...tests._utils import draw_smpls @@ -39,7 +40,7 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.2, 0.15]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -49,7 +50,7 @@ def treatment_level(request): @pytest.fixture(scope="module") -def weighted_apo_score_fixture(generate_data_irm, learner, score, n_rep, normalize_ipw, trimming_threshold, treatment_level): +def weighted_apo_score_fixture(generate_data_irm, learner, score, n_rep, normalize_ipw, clipping_threshold, treatment_level): n_folds = 2 # collect data @@ -67,7 +68,7 @@ def weighted_apo_score_fixture(generate_data_irm, learner, score, n_rep, normali "n_rep": n_rep, "score": score, "normalize_ipw": normalize_ipw, - "trimming_threshold": trimming_threshold, + "ps_processor_config": PSProcessorConfig(clipping_threshold=clipping_threshold), "draw_sample_splitting": False, } diff --git a/doubleml/irm/tests/test_apos.py b/doubleml/irm/tests/test_apos.py index 55a48ced..a3897352 100644 --- a/doubleml/irm/tests/test_apos.py +++ b/doubleml/irm/tests/test_apos.py @@ -7,6 +7,7 @@ import doubleml as dml from doubleml.irm.datasets import make_irm_data, make_irm_data_discrete_treatments +from doubleml.utils.propensity_score_processing import PSProcessorConfig from ...tests._utils import confint_manual from ._utils_apos_manual import boot_apos, fit_apos @@ -90,7 +91,7 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.2, 0.15]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -100,7 +101,7 @@ def treatment_levels(request): @pytest.fixture(scope="module") -def dml_apos_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatment_levels): +def dml_apos_fixture(learner, n_rep, normalize_ipw, clipping_threshold, treatment_levels): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -124,8 +125,7 @@ def dml_apos_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatmen "n_rep": n_rep, "score": "APO", "normalize_ipw": normalize_ipw, - "trimming_rule": "truncate", - "trimming_threshold": trimming_threshold, + "ps_processor_config": PSProcessorConfig(clipping_threshold=clipping_threshold), } unfitted_apos_model = dml.DoubleMLAPOS(**input_args) @@ -151,9 +151,8 @@ def dml_apos_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatmen all_smpls=all_smpls, n_rep=n_rep, score="APO", - trimming_rule="truncate", normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) ci = dml_obj.confint(joint=False, level=0.95) diff --git a/doubleml/irm/tests/test_apos_classfier.py b/doubleml/irm/tests/test_apos_classfier.py index f9cfc10c..a044a979 100644 --- a/doubleml/irm/tests/test_apos_classfier.py +++ b/doubleml/irm/tests/test_apos_classfier.py @@ -7,6 +7,7 @@ import doubleml as dml from doubleml.irm.datasets import make_irm_data_discrete_treatments +from doubleml.utils.propensity_score_processing import PSProcessorConfig from ...tests._utils import confint_manual from ._utils_apos_manual import boot_apos, fit_apos @@ -37,7 +38,7 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.2, 0.15]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -47,7 +48,7 @@ def treatment_levels(request): @pytest.fixture(scope="module") -def dml_apos_classifier_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatment_levels): +def dml_apos_classifier_fixture(learner, n_rep, normalize_ipw, clipping_threshold, treatment_levels): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -71,8 +72,7 @@ def dml_apos_classifier_fixture(learner, n_rep, normalize_ipw, trimming_threshol "n_rep": n_rep, "score": "APO", "normalize_ipw": normalize_ipw, - "trimming_rule": "truncate", - "trimming_threshold": trimming_threshold, + "ps_processor_config": PSProcessorConfig(clipping_threshold=clipping_threshold), } unfitted_apos_model = dml.DoubleMLAPOS(**input_args) @@ -97,9 +97,8 @@ def dml_apos_classifier_fixture(learner, n_rep, normalize_ipw, trimming_threshol treatment_levels=treatment_levels, all_smpls=all_smpls, score="APO", - trimming_rule="truncate", normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) ci = dml_obj.confint(joint=False, level=0.95) diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py index 93274cee..f5b741ef 100644 --- a/doubleml/irm/tests/test_apos_exceptions.py +++ b/doubleml/irm/tests/test_apos_exceptions.py @@ -59,22 +59,6 @@ def test_apos_exception_scores(): _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, score="MAR") -@pytest.mark.ci -def test_apos_exception_trimming_rule(): - msg = "Invalid trimming_rule discard. Valid trimming_rule truncate." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, trimming_rule="discard") - - # check the trimming_threshold exceptions - msg = "trimming_threshold has to be a float. Object of type passed." - with pytest.raises(TypeError, match=msg): - _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, trimming_rule="truncate", trimming_threshold="0.1") - - msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, trimming_rule="truncate", trimming_threshold=0.6) - - @pytest.mark.ci def test_apos_exception_ipw_normalization(): msg = "Normalization indicator has to be boolean. Object of type passed." diff --git a/doubleml/irm/tests/test_apos_weighted_scores.py b/doubleml/irm/tests/test_apos_weighted_scores.py index 6d0a7f65..e400532d 100644 --- a/doubleml/irm/tests/test_apos_weighted_scores.py +++ b/doubleml/irm/tests/test_apos_weighted_scores.py @@ -7,6 +7,7 @@ import doubleml as dml from doubleml.irm.datasets import make_irm_data_discrete_treatments +from doubleml.utils.propensity_score_processing import PSProcessorConfig @pytest.fixture( @@ -39,7 +40,7 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.2, 0.15]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -49,7 +50,7 @@ def treatment_levels(request): @pytest.fixture(scope="module") -def weighted_apos_score_fixture(learner, score, n_rep, normalize_ipw, trimming_threshold, treatment_levels): +def weighted_apos_score_fixture(learner, score, n_rep, normalize_ipw, clipping_threshold, treatment_levels): n_obs = 500 n_folds = 2 @@ -71,8 +72,7 @@ def weighted_apos_score_fixture(learner, score, n_rep, normalize_ipw, trimming_t "n_rep": n_rep, "score": score, "normalize_ipw": normalize_ipw, - "trimming_threshold": trimming_threshold, - "trimming_rule": "truncate", + "ps_processor_config": PSProcessorConfig(clipping_threshold=clipping_threshold), } np.random.seed(42) diff --git a/doubleml/irm/tests/test_cvar.py b/doubleml/irm/tests/test_cvar.py index 0eee71c6..d6b08a1c 100644 --- a/doubleml/irm/tests/test_cvar.py +++ b/doubleml/irm/tests/test_cvar.py @@ -42,12 +42,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.01, 0.05]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner, normalize_ipw, trimming_threshold): +def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner, normalize_ipw, clipping_threshold): n_folds = 3 # Set machine learning methods for m & g @@ -71,7 +71,7 @@ def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner, norm n_folds=n_folds, n_rep=1, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), draw_sample_splitting=False, ) @@ -91,7 +91,7 @@ def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner, norm treatment, normalize_ipw=normalize_ipw, n_rep=1, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/irm/tests/test_cvar_ps_processor.py b/doubleml/irm/tests/test_cvar_ps_processor.py new file mode 100644 index 00000000..6c78162b --- /dev/null +++ b/doubleml/irm/tests/test_cvar_ps_processor.py @@ -0,0 +1,65 @@ +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +import doubleml as dml +from doubleml.utils.propensity_score_processing import PSProcessorConfig + + +@pytest.mark.ci +@pytest.mark.parametrize( + "ps_config", + [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ], +) +def test_cvar_ml_m_predictions_ps_processor(generate_data_quantiles, ps_config): + x, y, d = generate_data_quantiles + dml_data = dml.DoubleMLData.from_arrays(x=x, y=y, d=d) + np.random.seed(3141) + dml_cvar = dml.DoubleMLCVAR( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + treatment=1, + quantile=0.5, + ps_processor_config=ps_config, + n_rep=1, + ) + dml_cvar.fit(store_predictions=True) + ml_m_preds = dml_cvar.predictions["ml_m"][:, 0, 0] + # Just check that predictions are within [clipping_threshold, 1-clipping_threshold] + assert np.all(ml_m_preds >= ps_config.clipping_threshold) + assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold) + + +@pytest.mark.ci +def test_cvar_ml_m_predictions_ps_processor_differences(generate_data_quantiles): + x, y, d = generate_data_quantiles + dml_data = dml.DoubleMLData.from_arrays(x=x, y=y, d=d) + np.random.seed(3141) + configs = [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ] + preds = [] + for cfg in configs: + dml_cvar = dml.DoubleMLCVAR( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + treatment=1, + quantile=0.5, + ps_processor_config=cfg, + n_rep=1, + ) + dml_cvar.fit(store_predictions=True) + preds.append(dml_cvar.predictions["ml_m"][:, 0, 0]) + # Check that at least two configurations yield different predictions (element-wise) + diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))] + assert any(diffs) diff --git a/doubleml/irm/tests/test_cvar_tune.py b/doubleml/irm/tests/test_cvar_tune.py index ade84769..d51e7852 100644 --- a/doubleml/irm/tests/test_cvar_tune.py +++ b/doubleml/irm/tests/test_cvar_tune.py @@ -71,7 +71,7 @@ def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner_g, le n_folds=n_folds, n_rep=1, normalize_ipw=normalize_ipw, - trimming_threshold=0.01, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=0.01), draw_sample_splitting=False, ) @@ -130,7 +130,7 @@ def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner_g, le all_smpls=all_smpls, treatment=treatment, n_rep=1, - trimming_threshold=0.01, + clipping_threshold=0.01, normalize_ipw=normalize_ipw, g_params=g_params, m_params=m_params, diff --git a/doubleml/irm/tests/test_iivm.py b/doubleml/irm/tests/test_iivm.py index 169f4175..1c049e26 100644 --- a/doubleml/irm/tests/test_iivm.py +++ b/doubleml/irm/tests/test_iivm.py @@ -34,12 +34,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.01, 0.05]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_iivm_fixture(generate_data_iivm, learner, score, normalize_ipw, trimming_threshold): +def dml_iivm_fixture(generate_data_iivm, learner, score, normalize_ipw, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 491 @@ -72,7 +72,8 @@ def dml_iivm_fixture(generate_data_iivm, learner, score, normalize_ipw, trimming n_folds, draw_sample_splitting=False, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), + score=score, ) # synchronize the sample splitting dml_iivm_obj.set_sample_splitting(all_smpls=all_smpls) @@ -91,7 +92,7 @@ def dml_iivm_fixture(generate_data_iivm, learner, score, normalize_ipw, trimming all_smpls, score, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/irm/tests/test_iivm_classifier.py b/doubleml/irm/tests/test_iivm_classifier.py index 983c34a7..78096031 100644 --- a/doubleml/irm/tests/test_iivm_classifier.py +++ b/doubleml/irm/tests/test_iivm_classifier.py @@ -34,12 +34,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.01, 0.05]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_iivm_classifier_fixture(generate_data_iivm_binary, learner, score, normalize_ipw, trimming_threshold): +def dml_iivm_classifier_fixture(generate_data_iivm_binary, learner, score, normalize_ipw, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 491 @@ -63,7 +63,7 @@ def dml_iivm_classifier_fixture(generate_data_iivm_binary, learner, score, norma ml_r, n_folds, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), draw_sample_splitting=False, ) # synchronize the sample splitting @@ -83,7 +83,7 @@ def dml_iivm_classifier_fixture(generate_data_iivm_binary, learner, score, norma all_smpls, score, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/irm/tests/test_iivm_ps_processor.py b/doubleml/irm/tests/test_iivm_ps_processor.py new file mode 100644 index 00000000..e8eed01f --- /dev/null +++ b/doubleml/irm/tests/test_iivm_ps_processor.py @@ -0,0 +1,68 @@ +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +from doubleml import DoubleMLData +from doubleml.irm.iivm import DoubleMLIIVM +from doubleml.utils.propensity_score_processing import PSProcessorConfig + + +@pytest.fixture +def dml_data_iivm(generate_data_iivm): + data = generate_data_iivm + x_cols = data.columns[data.columns.str.startswith("X")].tolist() + dml_data = DoubleMLData(data, "y", ["d"], x_cols, "z") + return dml_data + + +@pytest.mark.ci +@pytest.mark.parametrize( + "ps_config", + [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ], +) +def test_iivm_ml_m_predictions_ps_processor(dml_data_iivm, ps_config): + np.random.seed(3141) + dml_iivm = DoubleMLIIVM( + obj_dml_data=dml_data_iivm, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ml_r=LogisticRegression(), + ps_processor_config=ps_config, + n_rep=1, + ) + dml_iivm.fit(store_predictions=True) + ml_m_preds = dml_iivm.predictions["ml_m"][:, 0, 0] + # Just check that predictions are within [clipping_threshold, 1-clipping_threshold] + assert np.all(ml_m_preds >= ps_config.clipping_threshold) + assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold) + + +@pytest.mark.ci +def test_iivm_ml_m_predictions_ps_processor_differences(dml_data_iivm): + np.random.seed(3141) + configs = [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ] + preds = [] + for cfg in configs: + dml_iivm = DoubleMLIIVM( + obj_dml_data=dml_data_iivm, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ml_r=LogisticRegression(), + ps_processor_config=cfg, + n_rep=1, + ) + dml_iivm.fit(store_predictions=True) + preds.append(dml_iivm.predictions["ml_m"][:, 0, 0]) + # Check that at least two configurations yield different predictions (element-wise) + diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))] + assert any(diffs) diff --git a/doubleml/irm/tests/test_iivm_subgroups.py b/doubleml/irm/tests/test_iivm_subgroups.py index 906ed897..8633f6c8 100644 --- a/doubleml/irm/tests/test_iivm_subgroups.py +++ b/doubleml/irm/tests/test_iivm_subgroups.py @@ -30,7 +30,7 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.01]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -47,7 +47,7 @@ def subgroups(request): @pytest.fixture(scope="module") -def dml_iivm_subgroups_fixture(generate_data_iivm, learner, score, normalize_ipw, trimming_threshold, subgroups): +def dml_iivm_subgroups_fixture(generate_data_iivm, learner, score, normalize_ipw, clipping_threshold, subgroups): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 491 @@ -73,9 +73,10 @@ def dml_iivm_subgroups_fixture(generate_data_iivm, learner, score, normalize_ipw ml_m, ml_r, n_folds, + score=score, subgroups=subgroups, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), draw_sample_splitting=False, ) # synchronize the sample splitting @@ -99,7 +100,7 @@ def dml_iivm_subgroups_fixture(generate_data_iivm, learner, score, normalize_ipw all_smpls, score, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, always_takers=subgroups["always_takers"], never_takers=subgroups["never_takers"], ) diff --git a/doubleml/irm/tests/test_irm.py b/doubleml/irm/tests/test_irm.py index 856c7f59..f3b60ea9 100644 --- a/doubleml/irm/tests/test_irm.py +++ b/doubleml/irm/tests/test_irm.py @@ -9,6 +9,7 @@ import doubleml as dml from doubleml.irm.datasets import make_irm_data +from doubleml.utils.propensity_score_processing import PSProcessorConfig from doubleml.utils.resampling import DoubleMLResampling from ...tests._utils import draw_smpls @@ -40,12 +41,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.2, 0.15]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_threshold): +def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -62,6 +63,7 @@ def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_t all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d) obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d) + ps_processor_config = PSProcessorConfig(clipping_threshold=clipping_threshold) np.random.seed(3141) dml_irm_obj = dml.DoubleMLIRM( obj_dml_data, @@ -71,7 +73,7 @@ def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_t score=score, normalize_ipw=normalize_ipw, draw_sample_splitting=False, - trimming_threshold=trimming_threshold, + ps_processor_config=ps_processor_config, ) # synchronize the sample splitting @@ -88,7 +90,7 @@ def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_t all_smpls, score, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) np.random.seed(3141) @@ -101,7 +103,7 @@ def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_t score=score, normalize_ipw=normalize_ipw, draw_sample_splitting=False, - trimming_threshold=trimming_threshold, + ps_processor_config=ps_processor_config, ) # synchronize the sample splitting @@ -235,8 +237,8 @@ def test_dml_irm_cate_gate(cov_type): # First stage estimation ml_g = RandomForestRegressor(n_estimators=10) ml_m = RandomForestClassifier(n_estimators=10) - - dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_m=ml_m, ml_g=ml_g, trimming_threshold=0.05, n_folds=5) + ps_processor_config = PSProcessorConfig(clipping_threshold=0.05) + dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_m=ml_m, ml_g=ml_g, ps_processor_config=ps_processor_config, n_folds=5) dml_irm_obj.fit() # create a random basis @@ -279,7 +281,12 @@ def dml_irm_weights_fixture(n_rep): # collect data np.random.seed(42) obj_dml_data = make_irm_data(n_obs=n, dim_x=2) - kwargs = {"trimming_threshold": 0.05, "n_folds": 5, "n_rep": n_rep, "draw_sample_splitting": False} + kwargs = { + "ps_processor_config": PSProcessorConfig(clipping_threshold=0.05), + "n_folds": 5, + "n_rep": n_rep, + "draw_sample_splitting": False, + } smpls = DoubleMLResampling(n_folds=5, n_rep=n_rep, n_obs=n, stratify=obj_dml_data.d).split_samples() diff --git a/doubleml/irm/tests/test_irm_classifier.py b/doubleml/irm/tests/test_irm_classifier.py index 9389439d..afe8ca65 100644 --- a/doubleml/irm/tests/test_irm_classifier.py +++ b/doubleml/irm/tests/test_irm_classifier.py @@ -7,6 +7,7 @@ from sklearn.linear_model import LogisticRegression import doubleml as dml +from doubleml.utils.propensity_score_processing import PSProcessorConfig from ...tests._utils import draw_smpls from ._utils_irm_manual import boot_irm, fit_irm @@ -37,12 +38,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.01, 0.05]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_irm_classifier_fixture(generate_data_irm_binary, learner, score, normalize_ipw, trimming_threshold): +def dml_irm_classifier_fixture(generate_data_irm_binary, learner, score, normalize_ipw, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -65,7 +66,7 @@ def dml_irm_classifier_fixture(generate_data_irm_binary, learner, score, normali n_folds, score=score, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold), draw_sample_splitting=False, ) # synchronize the sample splitting @@ -82,7 +83,7 @@ def dml_irm_classifier_fixture(generate_data_irm_binary, learner, score, normali all_smpls, score, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/irm/tests/test_irm_external_predictions.py b/doubleml/irm/tests/test_irm_external_predictions.py index 5d0412d5..17bf44dc 100644 --- a/doubleml/irm/tests/test_irm_external_predictions.py +++ b/doubleml/irm/tests/test_irm_external_predictions.py @@ -64,7 +64,12 @@ def doubleml_irm_fixture(irm_score, n_rep, set_ml_m_ext, set_ml_g_ext): np.random.seed(3141) dml_irm_ext.fit(external_predictions=ext_predictions) - res_dict = {"coef_normal": dml_irm.coef[0], "coef_ext": dml_irm_ext.coef[0]} + res_dict = { + "coef_normal": dml_irm.coef[0], + "coef_ext": dml_irm_ext.coef[0], + "se": dml_irm.se[0], + "se_ext": dml_irm_ext.se[0], + } return res_dict @@ -72,3 +77,8 @@ def doubleml_irm_fixture(irm_score, n_rep, set_ml_m_ext, set_ml_g_ext): @pytest.mark.ci def test_doubleml_irm_coef(doubleml_irm_fixture): assert math.isclose(doubleml_irm_fixture["coef_normal"], doubleml_irm_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4) + + +@pytest.mark.ci +def test_doubleml_irm_se(doubleml_irm_fixture): + assert math.isclose(doubleml_irm_fixture["se"], doubleml_irm_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-4) diff --git a/doubleml/irm/tests/test_irm_ps_processor.py b/doubleml/irm/tests/test_irm_ps_processor.py new file mode 100644 index 00000000..33ae66f4 --- /dev/null +++ b/doubleml/irm/tests/test_irm_ps_processor.py @@ -0,0 +1,61 @@ +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +from doubleml import DoubleMLData, DoubleMLIRM +from doubleml.utils.propensity_score_processing import PSProcessorConfig + + +@pytest.mark.ci +@pytest.mark.parametrize( + "ps_config", + [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ], +) +def test_irm_ml_m_predictions_ps_processor(generate_data_irm, ps_config): + x, y, d = generate_data_irm + dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d) + np.random.seed(3141) + dml_irm = DoubleMLIRM( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ps_processor_config=ps_config, + n_rep=1, + ) + dml_irm.fit(store_predictions=True) + ml_m_preds = dml_irm.predictions["ml_m"][:, 0, 0] + # Just check that predictions are within [clipping_threshold, 1-clipping_threshold] + assert np.all(ml_m_preds >= ps_config.clipping_threshold) + assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold) + + +@pytest.mark.ci +def test_irm_ml_m_predictions_ps_processor_differences(generate_data_irm): + x, y, d = generate_data_irm + dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d) + np.random.seed(3141) + configs = [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ] + preds = [] + for cfg in configs: + dml_irm = DoubleMLIRM( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ps_processor_config=cfg, + n_rep=1, + ) + dml_irm.fit(store_predictions=True) + preds.append(dml_irm.predictions["ml_m"][:, 0, 0]) + # Check that at least two configurations yield different predictions (element-wise) + diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))] + assert any(diffs) diff --git a/doubleml/irm/tests/test_irm_vs_apos.py b/doubleml/irm/tests/test_irm_vs_apos.py index a91c8c05..aab0e09e 100644 --- a/doubleml/irm/tests/test_irm_vs_apos.py +++ b/doubleml/irm/tests/test_irm_vs_apos.py @@ -8,6 +8,7 @@ import doubleml as dml from doubleml.utils._propensity_score import _propensity_score_adjustment +from doubleml.utils.propensity_score_processing import PSProcessorConfig @pytest.fixture( @@ -35,12 +36,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.2, 0.15]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_irm_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimming_threshold): +def dml_irm_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, clipping_threshold): # collect data (x, y, d) = generate_data_irm @@ -54,7 +55,7 @@ def dml_irm_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimm kwargs = { "n_folds": n_folds, "n_rep": n_rep, - "trimming_threshold": trimming_threshold, + "ps_processor_config": PSProcessorConfig(clipping_threshold=clipping_threshold), "normalize_ipw": normalize_ipw, } @@ -159,7 +160,7 @@ def test_apos_vs_irm_sensitivity(dml_irm_apos_fixture): @pytest.fixture(scope="module") -def dml_irm_apos_weighted_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimming_threshold): +def dml_irm_apos_weighted_fixture(generate_data_irm, learner, n_rep, normalize_ipw, clipping_threshold): # collect data (x, y, d) = generate_data_irm @@ -173,7 +174,7 @@ def dml_irm_apos_weighted_fixture(generate_data_irm, learner, n_rep, normalize_i kwargs = { "n_folds": n_folds, "n_rep": n_rep, - "trimming_threshold": trimming_threshold, + "ps_processor_config": PSProcessorConfig(clipping_threshold=clipping_threshold), "normalize_ipw": normalize_ipw, } diff --git a/doubleml/irm/tests/test_irm_weighted_scores.py b/doubleml/irm/tests/test_irm_weighted_scores.py index 0592c3d3..56d841cc 100644 --- a/doubleml/irm/tests/test_irm_weighted_scores.py +++ b/doubleml/irm/tests/test_irm_weighted_scores.py @@ -6,6 +6,7 @@ import doubleml as dml from doubleml.utils._propensity_score import _normalize_ipw +from doubleml.utils.propensity_score_processing import PSProcessorConfig def old_score_elements(y, d, g_hat0, g_hat1, m_hat, score, normalize_ipw): @@ -65,12 +66,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.2, 0.15]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def old_vs_weighted_score_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_threshold): +def old_vs_weighted_score_fixture(generate_data_irm, learner, score, normalize_ipw, clipping_threshold): n_folds = 2 # collect data @@ -83,7 +84,13 @@ def old_vs_weighted_score_fixture(generate_data_irm, learner, score, normalize_i np.random.seed(3141) dml_irm_obj = dml.DoubleMLIRM( - obj_dml_data, ml_g, ml_m, n_folds, score=score, normalize_ipw=normalize_ipw, trimming_threshold=trimming_threshold + obj_dml_data, + ml_g, + ml_m, + n_folds, + score=score, + normalize_ipw=normalize_ipw, + ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold), ) dml_irm_obj.fit() diff --git a/doubleml/irm/tests/test_irm_with_missings.py b/doubleml/irm/tests/test_irm_with_missings.py index a6c30cae..838ea98a 100644 --- a/doubleml/irm/tests/test_irm_with_missings.py +++ b/doubleml/irm/tests/test_irm_with_missings.py @@ -9,6 +9,7 @@ from xgboost import XGBClassifier, XGBRegressor import doubleml as dml +from doubleml.utils.propensity_score_processing import PSProcessorConfig from ...tests._utils import draw_smpls from ._utils_irm_manual import boot_irm, fit_irm @@ -43,12 +44,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.01, 0.05]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_irm_w_missing_fixture(generate_data_irm_w_missings, learner_xgboost, score, normalize_ipw, trimming_threshold): +def dml_irm_w_missing_fixture(generate_data_irm_w_missings, learner_xgboost, score, normalize_ipw, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -66,7 +67,13 @@ def dml_irm_w_missing_fixture(generate_data_irm_w_missings, learner_xgboost, sco np.random.seed(3141) obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, force_all_x_finite="allow-nan") dml_irm_obj = dml.DoubleMLIRM( - obj_dml_data, ml_g, ml_m, n_folds, score=score, normalize_ipw=normalize_ipw, trimming_threshold=trimming_threshold + obj_dml_data, + ml_g, + ml_m, + n_folds, + score=score, + normalize_ipw=normalize_ipw, + ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold), ) # synchronize the sample splitting dml_irm_obj.set_sample_splitting(all_smpls=all_smpls) @@ -83,7 +90,7 @@ def dml_irm_w_missing_fixture(generate_data_irm_w_missings, learner_xgboost, sco all_smpls, score, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/irm/tests/test_lpq.py b/doubleml/irm/tests/test_lpq.py index 3e0049b8..2f90156e 100644 --- a/doubleml/irm/tests/test_lpq.py +++ b/doubleml/irm/tests/test_lpq.py @@ -41,7 +41,7 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.05]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -51,7 +51,7 @@ def kde(request): @pytest.fixture(scope="module") -def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, normalize_ipw, trimming_threshold, kde): +def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, normalize_ipw, clipping_threshold, kde): n_folds = 3 # collect data @@ -73,7 +73,7 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, n_folds=n_folds, n_rep=1, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), draw_sample_splitting=False, ) # synchronize the sample splitting @@ -94,7 +94,7 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, normalize_ipw=normalize_ipw, kde=_default_kde, n_rep=1, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) else: dml_lpq_obj = dml.DoubleMLLPQ( @@ -107,7 +107,7 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, n_rep=1, normalize_ipw=normalize_ipw, kde=kde, - trimming_threshold=trimming_threshold, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), draw_sample_splitting=False, ) @@ -129,7 +129,7 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, normalize_ipw=normalize_ipw, kde=kde, n_rep=1, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/irm/tests/test_lpq_ps_processor.py b/doubleml/irm/tests/test_lpq_ps_processor.py new file mode 100644 index 00000000..acd539ab --- /dev/null +++ b/doubleml/irm/tests/test_lpq_ps_processor.py @@ -0,0 +1,65 @@ +import numpy as np +import pytest +from sklearn.linear_model import LogisticRegression + +import doubleml as dml +from doubleml.utils.propensity_score_processing import PSProcessorConfig + + +@pytest.mark.ci +@pytest.mark.parametrize( + "ps_config", + [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ], +) +def test_lpq_ml_m_predictions_ps_processor(generate_data_local_quantiles, ps_config): + x, y, d, z = generate_data_local_quantiles + dml_data = dml.DoubleMLData.from_arrays(x=x, y=y, d=d, z=z) + np.random.seed(3141) + dml_lpq = dml.DoubleMLLPQ( + obj_dml_data=dml_data, + ml_g=LogisticRegression(), + ml_m=LogisticRegression(), + treatment=1, + quantile=0.5, + ps_processor_config=ps_config, + n_rep=1, + ) + dml_lpq.fit(store_predictions=True) + ml_m_preds = dml_lpq.predictions["ml_m_z"][:, 0, 0] + # Just check that predictions are within [clipping_threshold, 1-clipping_threshold] + assert np.all(ml_m_preds >= ps_config.clipping_threshold) + assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold) + + +@pytest.mark.ci +def test_lpq_ml_m_predictions_ps_processor_differences(generate_data_local_quantiles): + x, y, d, z = generate_data_local_quantiles + dml_data = dml.DoubleMLData.from_arrays(x=x, y=y, d=d, z=z) + np.random.seed(3141) + configs = [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ] + preds = [] + for cfg in configs: + dml_lpq = dml.DoubleMLLPQ( + obj_dml_data=dml_data, + ml_g=LogisticRegression(), + ml_m=LogisticRegression(), + treatment=1, + quantile=0.5, + ps_processor_config=cfg, + n_rep=1, + ) + dml_lpq.fit(store_predictions=True) + preds.append(dml_lpq.predictions["ml_m_z"][:, 0, 0]) + # Check that at least two configurations yield different predictions (element-wise) + diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))] + assert any(diffs) diff --git a/doubleml/irm/tests/test_lpq_tune.py b/doubleml/irm/tests/test_lpq_tune.py index c2b7d192..30c9b718 100644 --- a/doubleml/irm/tests/test_lpq_tune.py +++ b/doubleml/irm/tests/test_lpq_tune.py @@ -73,7 +73,7 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, n_folds=n_folds, n_rep=1, normalize_ipw=normalize_ipw, - trimming_threshold=0.01, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=0.01), draw_sample_splitting=False, ) @@ -150,7 +150,7 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, all_smpls=all_smpls, treatment=treatment, n_rep=1, - trimming_threshold=0.01, + clipping_threshold=0.01, normalize_ipw=normalize_ipw, m_z_params=m_z_params, m_d_z0_params=m_d_z0_params, diff --git a/doubleml/irm/tests/test_pq.py b/doubleml/irm/tests/test_pq.py index 62e69d53..b3505cf5 100644 --- a/doubleml/irm/tests/test_pq.py +++ b/doubleml/irm/tests/test_pq.py @@ -35,12 +35,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.01, 0.05]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner, normalize_ipw, trimming_threshold): +def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner, normalize_ipw, clipping_threshold): n_folds = 3 # collect data @@ -59,7 +59,7 @@ def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner, normal quantile=quantile, n_folds=n_folds, n_rep=1, - trimming_threshold=trimming_threshold, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), normalize_ipw=normalize_ipw, draw_sample_splitting=False, ) @@ -80,7 +80,7 @@ def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner, normal all_smpls, treatment, n_rep=1, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, normalize_ipw=normalize_ipw, ) diff --git a/doubleml/irm/tests/test_pq_ps_processor.py b/doubleml/irm/tests/test_pq_ps_processor.py new file mode 100644 index 00000000..c40786d7 --- /dev/null +++ b/doubleml/irm/tests/test_pq_ps_processor.py @@ -0,0 +1,65 @@ +import numpy as np +import pytest +from sklearn.linear_model import LogisticRegression + +from doubleml import DoubleMLData, DoubleMLPQ +from doubleml.utils.propensity_score_processing import PSProcessorConfig + + +@pytest.mark.ci +@pytest.mark.parametrize( + "ps_config", + [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ], +) +def test_pq_ml_m_predictions_ps_processor(generate_data_quantiles, ps_config): + x, y, d = generate_data_quantiles + dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d) + np.random.seed(3141) + dml_pq = DoubleMLPQ( + obj_dml_data=dml_data, + ml_g=LogisticRegression(), + ml_m=LogisticRegression(), + treatment=1, + quantile=0.5, + ps_processor_config=ps_config, + n_rep=1, + ) + dml_pq.fit(store_predictions=True) + ml_m_preds = dml_pq.predictions["ml_m"][:, 0, 0] + # Just check that predictions are within [clipping_threshold, 1-clipping_threshold] + assert np.all(ml_m_preds >= ps_config.clipping_threshold) + assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold) + + +@pytest.mark.ci +def test_pq_ml_m_predictions_ps_processor_differences(generate_data_quantiles): + x, y, d = generate_data_quantiles + dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d) + np.random.seed(3141) + configs = [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ] + preds = [] + for cfg in configs: + dml_pq = DoubleMLPQ( + obj_dml_data=dml_data, + ml_g=LogisticRegression(), + ml_m=LogisticRegression(), + treatment=1, + quantile=0.5, + ps_processor_config=cfg, + n_rep=1, + ) + dml_pq.fit(store_predictions=True) + preds.append(dml_pq.predictions["ml_m"][:, 0, 0]) + # Check that at least two configurations yield different predictions (element-wise) + diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))] + assert any(diffs) diff --git a/doubleml/irm/tests/test_pq_tune.py b/doubleml/irm/tests/test_pq_tune.py index 815c17d4..47e7bc18 100644 --- a/doubleml/irm/tests/test_pq_tune.py +++ b/doubleml/irm/tests/test_pq_tune.py @@ -73,7 +73,7 @@ def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner_g, lear n_folds=n_folds, n_rep=1, normalize_ipw=normalize_ipw, - trimming_threshold=0.01, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=0.01), draw_sample_splitting=False, ) @@ -132,7 +132,7 @@ def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner_g, lear all_smpls=all_smpls, treatment=treatment, n_rep=1, - trimming_threshold=0.01, + clipping_threshold=0.01, normalize_ipw=normalize_ipw, g_params=g_params, m_params=m_params, diff --git a/doubleml/irm/tests/test_qte_exceptions.py b/doubleml/irm/tests/test_qte_exceptions.py index f4e95110..75f08f6b 100644 --- a/doubleml/irm/tests/test_qte_exceptions.py +++ b/doubleml/irm/tests/test_qte_exceptions.py @@ -56,25 +56,6 @@ def test_exception_score(): _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(), score=2) -@pytest.mark.ci -def test_exception_trimming_rule(): - msg = "Invalid trimming_rule discard. Valid trimming_rule truncate." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="discard") - - msg = "trimming_threshold has to be a float. Object of type passed." - with pytest.raises(TypeError, match=msg): - _ = DoubleMLQTE( - dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1" - ) - - msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLQTE( - dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6 - ) - - @pytest.mark.ci def test_exception_quantiles(): msg = r"Quantiles have be between 0 or 1. Quantiles \[0.2 2. \] passed." diff --git a/doubleml/irm/tests/test_ssm.py b/doubleml/irm/tests/test_ssm.py index c561d9fe..735c6471 100644 --- a/doubleml/irm/tests/test_ssm.py +++ b/doubleml/irm/tests/test_ssm.py @@ -26,14 +26,14 @@ def normalize_ipw(request): return request.param -@pytest.fixture(scope="module", params=[0.01]) -def trimming_threshold(request): +@pytest.fixture(scope="module", params=[0.01, 0.05]) +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") def dml_selection_fixture( - generate_data_selection_mar, generate_data_selection_nonignorable, learner, score, trimming_threshold, normalize_ipw + generate_data_selection_mar, generate_data_selection_nonignorable, learner, score, clipping_threshold, normalize_ipw ): n_folds = 3 @@ -55,11 +55,27 @@ def dml_selection_fixture( np.random.seed(42) if score == "missing-at-random": obj_dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=None, s=s) - dml_sel_obj = dml.DoubleMLSSM(obj_dml_data, ml_g, ml_pi, ml_m, n_folds=n_folds, score=score) + dml_sel_obj = dml.DoubleMLSSM( + obj_dml_data, + ml_g, + ml_pi, + ml_m, + n_folds=n_folds, + score=score, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), + ) else: assert score == "nonignorable" obj_dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=z, s=s) - dml_sel_obj = dml.DoubleMLSSM(obj_dml_data, ml_g, ml_pi, ml_m, n_folds=n_folds, score=score) + dml_sel_obj = dml.DoubleMLSSM( + obj_dml_data, + ml_g, + ml_pi, + ml_m, + n_folds=n_folds, + score=score, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), + ) np.random.seed(42) dml_sel_obj.set_sample_splitting(all_smpls=all_smpls) @@ -78,7 +94,7 @@ def dml_selection_fixture( all_smpls, score, trimming_rule="truncate", - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, normalize_ipw=normalize_ipw, ) diff --git a/doubleml/irm/tests/test_ssm_exceptions.py b/doubleml/irm/tests/test_ssm_exceptions.py index 6df76908..039ed921 100644 --- a/doubleml/irm/tests/test_ssm_exceptions.py +++ b/doubleml/irm/tests/test_ssm_exceptions.py @@ -7,6 +7,7 @@ from doubleml import DoubleMLSSM from doubleml.data.base_data import DoubleMLBaseData from doubleml.irm.datasets import make_ssm_data +from doubleml.utils.propensity_score_processing import PSProcessorConfig np.random.seed(3141) n = 100 @@ -62,22 +63,6 @@ def test_ssm_exception_scores(): _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, score=0) -@pytest.mark.ci -def test_ssm_exception_trimming_rule(): - msg = "Invalid trimming_rule discard. Valid trimming_rule truncate." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, trimming_rule="discard") - - # check the trimming_threshold exceptions - msg = "trimming_threshold has to be a float. Object of type passed." - with pytest.raises(TypeError, match=msg): - _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, trimming_rule="truncate", trimming_threshold="0.1") - - msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, trimming_rule="truncate", trimming_threshold=0.6) - - @pytest.mark.ci def test_ssm_exception_ipw_normalization(): msg = "Normalization indicator has to be boolean. Object of type passed." @@ -200,7 +185,7 @@ def set_params(self): pass -class _DummyNoClassifier(_DummyNoGetParams): +class _DummyNoClassifier(_DummyNoGetParams, BaseEstimator): def get_params(self): pass @@ -307,7 +292,7 @@ def test_double_ml_exception_evaluate_learner(): ml_g=Lasso(), ml_pi=LogisticRegression(), ml_m=LogisticRegression(), - trimming_threshold=0.05, + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), n_folds=5, score="missing-at-random", ) diff --git a/doubleml/irm/tests/test_ssm_ps_processor.py b/doubleml/irm/tests/test_ssm_ps_processor.py new file mode 100644 index 00000000..c0627699 --- /dev/null +++ b/doubleml/irm/tests/test_ssm_ps_processor.py @@ -0,0 +1,63 @@ +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +import doubleml as dml +from doubleml.utils.propensity_score_processing import PSProcessorConfig + + +@pytest.mark.ci +@pytest.mark.parametrize( + "ps_config", + [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ], +) +def test_ssm_ml_m_predictions_ps_processor(generate_data_selection_mar, ps_config): + x, y, d, _, s = generate_data_selection_mar + dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=None, s=s) + np.random.seed(3141) + dml_ssm = dml.DoubleMLSSM( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_pi=LogisticRegression(), + ml_m=LogisticRegression(), + ps_processor_config=ps_config, + n_rep=1, + ) + dml_ssm.fit(store_predictions=True) + ml_m_preds = dml_ssm.predictions["ml_m"][:, 0, 0] + # Just check that predictions are within [clipping_threshold, 1-clipping_threshold] + assert np.all(ml_m_preds >= ps_config.clipping_threshold) + assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold) + + +@pytest.mark.ci +def test_ssm_ml_m_predictions_ps_processor_differences(generate_data_selection_mar): + x, y, d, _, s = generate_data_selection_mar + dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=None, s=s) + np.random.seed(3141) + configs = [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ] + preds = [] + for cfg in configs: + dml_ssm = dml.DoubleMLSSM( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_pi=LogisticRegression(), + ml_m=LogisticRegression(), + ps_processor_config=cfg, + n_rep=1, + ) + dml_ssm.fit(store_predictions=True) + preds.append(dml_ssm.predictions["ml_m"][:, 0, 0]) + # Check that at least two configurations yield different predictions (element-wise) + diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))] + assert any(diffs) diff --git a/doubleml/rdd/rdd.py b/doubleml/rdd/rdd.py index da54d61c..fffa9a0a 100644 --- a/doubleml/rdd/rdd.py +++ b/doubleml/rdd/rdd.py @@ -91,16 +91,6 @@ class RDFlex: >>> ml_g = RandomForestRegressor() >>> ml_m = RandomForestClassifier() >>> rdflex_obj = dml.rdd.RDFlex(obj_dml_data, ml_g, ml_m, fuzzy=True) - >>> print(rdflex_obj.fit()) - Method Coef. S.E. t-stat P>|t| 95% CI - ------------------------------------------------------------------------- - Conventional 0.939 0.225 4.168 3.071e-05 [0.498, 1.381] - Robust - - 3.589 3.316e-04 [0.416, 1.417] - Design Type: Fuzzy - Cutoff: 0 - First Stage Kernel: triangular - Final Bandwidth: [0.74754257] - """ def __init__( diff --git a/doubleml/tests/_utils.py b/doubleml/tests/_utils.py index 907d03d1..60416246 100644 --- a/doubleml/tests/_utils.py +++ b/doubleml/tests/_utils.py @@ -44,7 +44,7 @@ def fit_predict(y, x, ml_model, params, smpls, train_cond=None): return y_hat -def fit_predict_proba(y, x, ml_model, params, smpls, trimming_threshold=0, train_cond=None): +def fit_predict_proba(y, x, ml_model, params, smpls, clipping_threshold=0, train_cond=None): y_hat = [] for idx, (train_index, test_index) in enumerate(smpls): if params is not None: @@ -55,9 +55,9 @@ def fit_predict_proba(y, x, ml_model, params, smpls, trimming_threshold=0, train train_index_cond = np.intersect1d(train_cond, train_index) preds = ml_model.fit(x[train_index_cond], y[train_index_cond]).predict_proba(x[test_index])[:, 1] - if trimming_threshold > 0: - preds[preds < trimming_threshold] = trimming_threshold - preds[preds > 1 - trimming_threshold] = 1 - trimming_threshold + if clipping_threshold > 0: + preds[preds < clipping_threshold] = clipping_threshold + preds[preds > 1 - clipping_threshold] = 1 - clipping_threshold y_hat.append(preds) return y_hat diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py index e725a562..94b5f824 100644 --- a/doubleml/tests/test_exceptions.py +++ b/doubleml/tests/test_exceptions.py @@ -24,6 +24,7 @@ from doubleml.did.datasets import make_did_SZ2020 from doubleml.irm.datasets import make_iivm_data, make_irm_data from doubleml.plm.datasets import make_pliv_CHS2015, make_pliv_multiway_cluster_CKMS2021, make_plr_CCDDHNR2018 +from doubleml.utils import PSProcessorConfig from ._utils import DummyDataClass @@ -378,114 +379,6 @@ def test_doubleml_exception_scores(): _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), score=2) -@pytest.mark.ci -def test_doubleml_exception_trimming_rule(): - msg = "Invalid trimming_rule discard. Valid trimming_rule truncate." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_rule="discard") - with pytest.raises(ValueError, match=msg): - _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(), trimming_rule="discard") - with pytest.raises(ValueError, match=msg): - _ = DoubleMLPQ(dml_data_irm, LogisticRegression(), LogisticRegression(), treatment=1, trimming_rule="discard") - with pytest.raises(ValueError, match=msg): - _ = DoubleMLLPQ(dml_data_iivm, LogisticRegression(), LogisticRegression(), treatment=1, trimming_rule="discard") - with pytest.raises(ValueError, match=msg): - _ = DoubleMLCVAR(dml_data_irm, LogisticRegression(), LogisticRegression(), treatment=1, trimming_rule="discard") - with pytest.raises(ValueError, match=msg): - _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="discard") - with pytest.raises(ValueError, match=msg): - _ = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(), trimming_rule="discard") - with pytest.raises(ValueError, match=msg): - _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), trimming_rule="discard") - - # check the trimming_threshold exceptions - msg = "trimming_threshold has to be a float. Object of type passed." - with pytest.raises(TypeError, match=msg): - _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1") - with pytest.raises(TypeError, match=msg): - _ = DoubleMLIIVM( - dml_data_iivm, - Lasso(), - LogisticRegression(), - LogisticRegression(), - trimming_rule="truncate", - trimming_threshold="0.1", - ) - with pytest.raises(TypeError, match=msg): - _ = DoubleMLPQ( - dml_data_irm, - LogisticRegression(), - LogisticRegression(), - treatment=1, - trimming_rule="truncate", - trimming_threshold="0.1", - ) - with pytest.raises(TypeError, match=msg): - _ = DoubleMLLPQ( - dml_data_iivm, - LogisticRegression(), - LogisticRegression(), - treatment=1, - trimming_rule="truncate", - trimming_threshold="0.1", - ) - with pytest.raises(TypeError, match=msg): - _ = DoubleMLCVAR( - dml_data_irm, Lasso(), LogisticRegression(), treatment=1, trimming_rule="truncate", trimming_threshold="0.1" - ) - with pytest.raises(TypeError, match=msg): - _ = DoubleMLQTE( - dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1" - ) - with pytest.raises(TypeError, match=msg): - _ = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1") - with pytest.raises(TypeError, match=msg): - _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1") - - msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6) - with pytest.raises(ValueError, match=msg): - _ = DoubleMLIIVM( - dml_data_iivm, - Lasso(), - LogisticRegression(), - LogisticRegression(), - trimming_rule="truncate", - trimming_threshold=0.6, - ) - with pytest.raises(ValueError, match=msg): - _ = DoubleMLPQ( - dml_data_irm, - LogisticRegression(), - LogisticRegression(), - treatment=1, - trimming_rule="truncate", - trimming_threshold=0.6, - ) - with pytest.raises(ValueError, match=msg): - _ = DoubleMLLPQ( - dml_data_iivm, - LogisticRegression(), - LogisticRegression(), - treatment=1, - trimming_rule="truncate", - trimming_threshold=0.6, - ) - with pytest.raises(ValueError, match=msg): - _ = DoubleMLCVAR( - dml_data_irm, Lasso(), LogisticRegression(), treatment=1, trimming_rule="truncate", trimming_threshold=0.6 - ) - with pytest.raises(ValueError, match=msg): - _ = DoubleMLQTE( - dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6 - ) - with pytest.raises(ValueError, match=msg): - _ = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6) - with pytest.raises(ValueError, match=msg): - _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6) - - @pytest.mark.ci def test_doubleml_exception_weights(): msg = "weights must be a numpy array or dictionary. weights of type was passed." @@ -961,9 +854,9 @@ def set_params(self): pass -class _DummyNoClassifier(_DummyNoGetParams): - def get_params(self): - pass +class _DummyNoClassifier(_DummyNoGetParams, BaseEstimator): + def get_params(self, deep=True): + return {} def predict_proba(self): pass @@ -1063,28 +956,25 @@ def test_doubleml_exception_learner(): # construct a classifier which is not identifiable as classifier via is_classifier by sklearn # it then predicts labels and therefore an exception will be thrown log_reg = LogisticRegressionManipulatedPredict() - # TODO(0.11) can be removed if the sklearn dependency is bumped to 1.6.0 - log_reg._estimator_type = None - msg = ( + msg_warn = ( r"Learner provided for ml_m is probably invalid: LogisticRegressionManipulatedPredict\(\) is \(probably\) " "neither a regressor nor a classifier. Method predict is used for prediction." ) - with pytest.warns(UserWarning, match=msg): + with pytest.warns(UserWarning, match=msg_warn): dml_plr_hidden_classifier = DoubleMLPLR(dml_data_irm, Lasso(), log_reg) msg = ( r"For the binary variable d, predictions obtained with the ml_m learner LogisticRegressionManipulatedPredict\(\) " "are also observed to be binary with values 0 and 1. Make sure that for classifiers probabilities and not " "labels are predicted." ) - with pytest.raises(ValueError, match=msg): - dml_plr_hidden_classifier.fit() + with pytest.warns(UserWarning, match=msg_warn): + with pytest.raises(ValueError, match=msg): + dml_plr_hidden_classifier.fit() # construct a classifier which is not identifiable as classifier via is_classifier by sklearn # it then predicts labels and therefore an exception will be thrown # whether predict() or predict_proba() is being called can also be manipulated via the unrelated max_iter variable log_reg = LogisticRegressionManipulatedPredict() - # TODO(0.11) can be removed if the sklearn dependency is bumped to 1.6.0 - log_reg._estimator_type = None msg = ( r"Learner provided for ml_g is probably invalid: LogisticRegressionManipulatedPredict\(\) is \(probably\) " "neither a regressor nor a classifier. Method predict is used for prediction." @@ -1151,7 +1041,12 @@ def test_doubleml_sensitivity_not_yet_implemented(): @pytest.mark.ci def test_doubleml_sensitivity_inputs(): - dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1) + dml_irm = DoubleMLIRM( + dml_data_irm, + Lasso(), + LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.1), + ) dml_irm.fit() # test cf_y @@ -1231,7 +1126,9 @@ def test_doubleml_sensitivity_inputs(): def test_doubleml_sensitivity_reestimation_warning(): - dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1) + dml_irm = DoubleMLIRM( + dml_data_irm, Lasso(), LogisticRegression(), ps_processor_config=PSProcessorConfig(clipping_threshold=0.1) + ) dml_irm.fit() dml_irm.sensitivity_elements["nu2"] = -1.0 * dml_irm.sensitivity_elements["nu2"] @@ -1242,7 +1139,9 @@ def test_doubleml_sensitivity_reestimation_warning(): def test_doubleml_sensitivity_summary(): - dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1) + dml_irm = DoubleMLIRM( + dml_data_irm, Lasso(), LogisticRegression(), ps_processor_config=PSProcessorConfig(clipping_threshold=0.1) + ) msg = r"Apply sensitivity_analysis\(\) before sensitivity_summary." with pytest.raises(ValueError, match=msg): _ = dml_irm.sensitivity_summary @@ -1250,7 +1149,9 @@ def test_doubleml_sensitivity_summary(): @pytest.mark.ci def test_doubleml_sensitivity_benchmark(): - dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1) + dml_irm = DoubleMLIRM( + dml_data_irm, Lasso(), LogisticRegression(), ps_processor_config=PSProcessorConfig(clipping_threshold=0.1) + ) dml_irm.fit() # test input @@ -1272,7 +1173,12 @@ def test_doubleml_sensitivity_benchmark(): @pytest.mark.ci def test_doubleml_sensitivity_plot_input(): - dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1) + dml_irm = DoubleMLIRM( + dml_data_irm, + Lasso(), + LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.1), + ) dml_irm.fit() msg = r"Apply sensitivity_analysis\(\) to include senario in sensitivity_plot. " @@ -1411,7 +1317,9 @@ def test_doubleml_warning_blp(): @pytest.mark.ci def test_doubleml_exception_gate(): - dml_irm_obj = DoubleMLIRM(dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5) + dml_irm_obj = DoubleMLIRM( + dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), ps_processor_config=PSProcessorConfig(clipping_threshold=0.1) + ) dml_irm_obj.fit() msg = "Groups must be of DataFrame type. Groups of type was passed." @@ -1426,7 +1334,12 @@ def test_doubleml_exception_gate(): dml_irm_obj.gate(groups=groups) dml_irm_obj = DoubleMLIRM( - dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATTE" + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.1), + n_folds=5, + score="ATTE", ) dml_irm_obj.fit() groups = pd.DataFrame(np.random.choice([True, False], size=dml_data_irm.n_obs)) @@ -1435,7 +1348,13 @@ def test_doubleml_exception_gate(): dml_irm_obj.gate(groups=groups) dml_irm_obj = DoubleMLIRM( - dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=2 + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.1), + n_folds=5, + score="ATE", + n_rep=2, ) dml_irm_obj.fit() @@ -1447,7 +1366,12 @@ def test_doubleml_exception_gate(): @pytest.mark.ci def test_doubleml_exception_cate(): dml_irm_obj = DoubleMLIRM( - dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATTE" + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), + n_folds=5, + score="ATTE", ) dml_irm_obj.fit() @@ -1456,7 +1380,13 @@ def test_doubleml_exception_cate(): dml_irm_obj.cate(basis=2) dml_irm_obj = DoubleMLIRM( - dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=2 + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), + n_folds=5, + score="ATE", + n_rep=2, ) dml_irm_obj.fit() msg = "Only implemented for one repetition. Number of repetitions is 2." @@ -1504,7 +1434,12 @@ def test_doubleml_exception_plr_gate(): @pytest.mark.ci def test_double_ml_exception_evaluate_learner(): dml_irm_obj = DoubleMLIRM( - dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATTE" + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), + n_folds=5, + score="ATTE", ) msg = r"Apply fit\(\) before evaluate_learners\(\)." @@ -1532,7 +1467,13 @@ def eval_fct(y_pred, y_true): @pytest.mark.ci def test_doubleml_exception_policytree(): - dml_irm_obj = DoubleMLIRM(dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5) + dml_irm_obj = DoubleMLIRM( + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), + n_folds=5, + ) dml_irm_obj.fit() msg = "Covariates must be of DataFrame type. Covariates of type was passed." @@ -1546,7 +1487,12 @@ def test_doubleml_exception_policytree(): dml_irm_obj.policy_tree(features=pd.DataFrame(np.random.normal(0, 1, size=(dml_data_irm.n_obs, 3))), depth=0.1) dml_irm_obj = DoubleMLIRM( - dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATTE" + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), + n_folds=5, + score="ATTE", ) dml_irm_obj.fit() @@ -1555,7 +1501,13 @@ def test_doubleml_exception_policytree(): dml_irm_obj.policy_tree(features=2, depth=1) dml_irm_obj = DoubleMLIRM( - dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=2 + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), + n_folds=5, + score="ATE", + n_rep=2, ) dml_irm_obj.fit() msg = "Only implemented for one repetition. Number of repetitions is 2." @@ -1566,7 +1518,13 @@ def test_doubleml_exception_policytree(): @pytest.mark.ci def test_double_ml_external_predictions(): dml_irm_obj = DoubleMLIRM( - dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=2 + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), + n_folds=5, + score="ATE", + n_rep=2, ) msg = "external_predictions must be a dictionary. ml_m of type was passed." @@ -1574,7 +1532,13 @@ def test_double_ml_external_predictions(): dml_irm_obj.fit(external_predictions="ml_m") dml_irm_obj = DoubleMLIRM( - dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=1 + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), + n_folds=5, + score="ATE", + n_rep=1, ) predictions = {"d": "test", "d_f": "test"} diff --git a/doubleml/tests/test_model_defaults.py b/doubleml/tests/test_model_defaults.py index b04117eb..14650f39 100644 --- a/doubleml/tests/test_model_defaults.py +++ b/doubleml/tests/test_model_defaults.py @@ -92,8 +92,8 @@ def test_irm_defaults(): _fit_bootstrap(dml_irm) _assert_resampling_default_settings(dml_irm) assert dml_irm.score == "ATE" - assert dml_irm.trimming_rule == "truncate" - assert dml_irm.trimming_threshold == 1e-2 + assert isinstance(dml_irm.ps_processor_config, dml.utils.PSProcessorConfig) + assert isinstance(dml_irm.ps_processor, dml.utils.PSProcessor) assert not dml_irm.normalize_ipw assert set(dml_irm.weights.keys()) == set(["weights"]) assert np.array_equal(dml_irm.weights["weights"], np.ones((dml_irm._dml_data.n_obs,))) @@ -106,8 +106,8 @@ def test_iivm_defaults(): _assert_resampling_default_settings(dml_iivm) assert dml_iivm.score == "LATE" assert dml_iivm.subgroups == {"always_takers": True, "never_takers": True} - assert dml_iivm.trimming_rule == "truncate" - assert dml_iivm.trimming_threshold == 1e-2 + assert isinstance(dml_iivm.ps_processor_config, dml.utils.PSProcessorConfig) + assert isinstance(dml_iivm.ps_processor, dml.utils.PSProcessor) assert not dml_iivm.normalize_ipw @@ -119,8 +119,8 @@ def test_cvar_defaults(): assert dml_cvar.quantile == 0.5 assert dml_cvar.treatment == 1 assert dml_cvar.score == "CVaR" - assert dml_cvar.trimming_rule == "truncate" - assert dml_cvar.trimming_threshold == 1e-2 + assert isinstance(dml_cvar.ps_processor_config, dml.utils.PSProcessorConfig) + assert isinstance(dml_cvar.ps_processor, dml.utils.PSProcessor) @pytest.mark.ci @@ -131,8 +131,8 @@ def test_pq_defaults(): assert dml_pq.quantile == 0.5 assert dml_pq.treatment == 1 assert dml_pq.score == "PQ" - assert dml_pq.trimming_rule == "truncate" - assert dml_pq.trimming_threshold == 1e-2 + assert isinstance(dml_pq.ps_processor_config, dml.utils.PSProcessorConfig) + assert isinstance(dml_pq.ps_processor, dml.utils.PSProcessor) assert dml_pq.normalize_ipw @@ -144,8 +144,8 @@ def test_lpq_defaults(): assert dml_lpq.quantile == 0.5 assert dml_lpq.treatment == 1 assert dml_lpq.score == "LPQ" - assert dml_lpq.trimming_rule == "truncate" - assert dml_lpq.trimming_threshold == 1e-2 + assert isinstance(dml_lpq.ps_processor_config, dml.utils.PSProcessorConfig) + assert isinstance(dml_lpq.ps_processor, dml.utils.PSProcessor) assert dml_lpq.normalize_ipw @@ -159,8 +159,8 @@ def test_qte_defaults(): # not fix since its a differen object added in future versions _assert_resampling_default_settings(dml_qte) assert dml_qte.quantiles == 0.5 assert dml_qte.score == "PQ" - assert dml_qte.trimming_rule == "truncate" - assert dml_qte.trimming_threshold == 1e-2 + assert isinstance(dml_qte.ps_processor_config, dml.utils.PSProcessorConfig) + assert isinstance(dml_qte.ps_processor, dml.utils.PSProcessor) assert dml_qte.normalize_ipw @@ -171,8 +171,7 @@ def test_did_defaults(): _assert_resampling_default_settings(dml_did) assert dml_did.score == "observational" assert dml_did.in_sample_normalization - assert dml_did.trimming_rule == "truncate" - assert dml_did.trimming_threshold == 1e-2 + assert dml_did.clipping_threshold == 1e-2 @pytest.mark.ci @@ -182,8 +181,7 @@ def test_did_cs_defaults(): _assert_resampling_default_settings(dml_did_cs) assert dml_did.score == "observational" assert dml_did_cs.in_sample_normalization - assert dml_did_cs.trimming_rule == "truncate" - assert dml_did_cs.trimming_threshold == 1e-2 + assert dml_did_cs.clipping_threshold == 1e-2 @pytest.mark.ci @@ -192,8 +190,8 @@ def test_ssm_defaults(): _fit_bootstrap(dml_ssm) _assert_resampling_default_settings(dml_ssm) assert dml_ssm.score == "missing-at-random" - assert dml_ssm.trimming_rule == "truncate" - assert dml_ssm.trimming_threshold == 1e-2 + assert isinstance(dml_ssm.ps_processor_config, dml.utils.PSProcessorConfig) + assert isinstance(dml_ssm.ps_processor, dml.utils.PSProcessor) assert not dml_ssm.normalize_ipw @@ -203,8 +201,8 @@ def test_apo_defaults(): _fit_bootstrap(dml_apo) _assert_resampling_default_settings(dml_apo) assert dml_apo.score == "APO" - assert dml_apo.trimming_rule == "truncate" - assert dml_apo.trimming_threshold == 1e-2 + assert isinstance(dml_apo.ps_processor_config, dml.utils.PSProcessorConfig) + assert isinstance(dml_apo.ps_processor, dml.utils.PSProcessor) assert not dml_apo.normalize_ipw assert set(dml_apo.weights.keys()) == set(["weights"]) assert np.array_equal(dml_apo.weights["weights"], np.ones((dml_apo._dml_data.n_obs,))) @@ -216,10 +214,10 @@ def test_apos_defaults(): assert dml_apos.boot_method is None assert dml_apos.framework is None assert dml_apos.boot_t_stat is None - _fit_bootstrap(dml_qte) + _fit_bootstrap(dml_apos) assert dml_apos.score == "APO" - assert dml_apos.trimming_rule == "truncate" - assert dml_apos.trimming_threshold == 1e-2 + assert isinstance(dml_apos.ps_processor_config, dml.utils.PSProcessorConfig) + assert isinstance(dml_apos.ps_processor, dml.utils.PSProcessor) assert not dml_apos.normalize_ipw assert np.array_equal(dml_apos.weights, np.ones((dml_apos._dml_data.n_obs,))) diff --git a/doubleml/utils/__init__.py b/doubleml/utils/__init__.py index 386586ce..4f6269dd 100644 --- a/doubleml/utils/__init__.py +++ b/doubleml/utils/__init__.py @@ -7,6 +7,7 @@ from .gain_statistics import gain_statistics from .global_learner import GlobalClassifier, GlobalRegressor from .policytree import DoubleMLPolicyTree +from .propensity_score_processing import PSProcessor, PSProcessorConfig from .resampling import DoubleMLClusterResampling, DoubleMLResampling __all__ = [ @@ -19,4 +20,6 @@ "gain_statistics", "GlobalClassifier", "GlobalRegressor", + "PSProcessor", + "PSProcessorConfig", ] diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py new file mode 100644 index 00000000..c04c8f18 --- /dev/null +++ b/doubleml/utils/propensity_score_processing.py @@ -0,0 +1,247 @@ +import warnings +from dataclasses import dataclass +from typing import Optional, Union + +import numpy as np +from sklearn.isotonic import IsotonicRegression +from sklearn.model_selection import cross_val_predict +from sklearn.utils.multiclass import type_of_target + + +@dataclass +class PSProcessorConfig: + clipping_threshold: float = 1e-2 + extreme_threshold: float = 1e-12 + calibration_method: Optional[str] = None + cv_calibration: bool = False + + +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). +def init_ps_processor( + ps_processor_config: Optional[PSProcessorConfig], trimming_rule: Optional[str], trimming_threshold: Optional[float] +): + if trimming_rule != "truncate": + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config' with 'clipping_threshold' instead.", + DeprecationWarning, + stacklevel=3, + ) + if trimming_threshold != 1e-2: + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config' with 'clipping_threshold' instead.", + DeprecationWarning, + stacklevel=3, + ) + if ps_processor_config is not None: + config = ps_processor_config + else: + config = PSProcessorConfig(clipping_threshold=trimming_threshold if trimming_threshold is not None else 1e-2) + processor = PSProcessor.from_config(config) + return config, processor + + +class PSProcessor: + """ + Processor for propensity score calibration, clipping, and validation. + + Parameters + ---------- + clipping_threshold : float, default=1e-2 + Minimum and maximum bound for propensity scores after clipping. + + extreme_threshold : float, default=1e-12 + Threshold below which propensity scores are considered extreme. + Used for generating warnings. + + calibration_method : {'isotonic', None}, optional + If provided, applies the specified calibration method to + the propensity scores before clipping. + + cv_calibration : bool, default=False + Whether to use cross-validation for calibration. + Only applies if a calibration method is specified. + + Examples + -------- + >>> import numpy as np + >>> from doubleml.utils import PSProcessor + >>> ps = np.array([0.001, 0.2, 0.5, 0.8, 0.999]) + >>> treatment = np.array([0, 1, 1, 0, 1]) + >>> processor = PSProcessor(clipping_threshold=0.01) + >>> adjusted = processor.adjust_ps(ps, treatment) + >>> print(np.round(adjusted, 3)) + [0.01 0.2 0.5 0.8 0.99] + """ + + _VALID_CALIBRATION_METHODS = {None, "isotonic"} + + def __init__( + self, + clipping_threshold: float = 1e-2, + extreme_threshold: float = 1e-12, + calibration_method: Optional[str] = None, + cv_calibration: bool = False, + ): + self._clipping_threshold = clipping_threshold + self._extreme_threshold = extreme_threshold + self._calibration_method = calibration_method + self._cv_calibration = cv_calibration + + self._validate_config() + + @classmethod + def from_config(cls, config: PSProcessorConfig): + return cls( + clipping_threshold=config.clipping_threshold, + extreme_threshold=config.extreme_threshold, + calibration_method=config.calibration_method, + cv_calibration=config.cv_calibration, + ) + + # ------------------------------------------------------------------------- + # Properties + # ------------------------------------------------------------------------- + + @property + def clipping_threshold(self) -> float: + """Get the clipping threshold.""" + return self._clipping_threshold + + @property + def extreme_threshold(self) -> float: + """Get the extreme threshold.""" + return self._extreme_threshold + + @property + def calibration_method(self) -> Optional[str]: + """Get the calibration method.""" + return self._calibration_method + + @property + def cv_calibration(self) -> bool: + """Get whether cross-validation calibration is used.""" + return self._cv_calibration + + # ------------------------------------------------------------------------- + # Core functionality + # ------------------------------------------------------------------------- + def adjust_ps( + self, + propensity_scores: np.ndarray, + treatment: np.ndarray, + cv: Optional[Union[int, list]] = None, + learner_name: Optional[str] = None, + ) -> np.ndarray: + """ + Adjust propensity scores via validation, clipping, and warnings. + + Parameters + ---------- + propensity_scores : np.ndarray + Raw propensity score predictions. + treatment : np.ndarray + Treatment assignments (1 for treated, 0 for control). + cv : int or list, optional + Cross-validation strategy for calibration. Used only if calibration is applied. + learner_name : str, optional + Name of the learner providing the propensity scores, used in warnings. + + Returns + ------- + np.ndarray + Clipped and validated propensity scores. + """ + self._validate_propensity_scores( + propensity_scores, + learner_name, + ) + self._validate_treatment(treatment) + + calibrated_ps = self._apply_calibration(propensity_scores, treatment, cv=cv) + clipped_scores = np.clip(calibrated_ps, a_min=self.clipping_threshold, a_max=1 - self.clipping_threshold) + + return clipped_scores + + # ------------------------------------------------------------------------- + # Private helper methods + # ------------------------------------------------------------------------- + def _apply_calibration( + self, + propensity_scores: np.ndarray, + treatment: np.ndarray, + cv: Optional[Union[int, list]] = None, + ) -> np.ndarray: + """Apply calibration method to propensity scores if specified.""" + if self.calibration_method is None: + calibrated_ps = propensity_scores + elif self.calibration_method == "isotonic": + calibration_model = IsotonicRegression(out_of_bounds="clip", y_min=0.0, y_max=1.0) + + if self.cv_calibration: + calibrated_ps = cross_val_predict( + estimator=calibration_model, X=propensity_scores.reshape(-1, 1), y=treatment, cv=cv, method="predict" + ) + else: + calibration_model.fit(propensity_scores.reshape(-1, 1), treatment) + calibrated_ps = calibration_model.predict(propensity_scores.reshape(-1, 1)) + else: + # This point should never be reached due to prior validation + raise ValueError( + f"Unsupported calibration method: {self.calibration_method}. " + f"Valid methods are: {self._VALID_CALIBRATION_METHODS}" + ) + + return calibrated_ps + + def _validate_config(self) -> None: + """Validate configuration parameters.""" + if not isinstance(self.clipping_threshold, float): + raise TypeError("clipping_threshold must be a float.") + if not (0 < self.clipping_threshold < 0.5): + raise ValueError("clipping_threshold must be between 0 and 0.5.") + + if not (0 < self.extreme_threshold < 0.5): + raise ValueError("extreme_threshold must be between 0 and 0.5.") + + if self.calibration_method not in self._VALID_CALIBRATION_METHODS: + raise ValueError(f"calibration_method must be one of {self._VALID_CALIBRATION_METHODS}.") + + if not isinstance(self.cv_calibration, bool): + raise TypeError("cv_calibration must be of bool type.") + if self.cv_calibration and self.calibration_method is None: + raise ValueError("cv_calibration=True requires a calibration_method.") + + def _validate_propensity_scores( + self, + preds: np.ndarray, + learner_name: Optional[str] = None, + ) -> None: + """Validate if propensity predictions are valid.""" + learner_msg = f" from learner {learner_name}" if learner_name is not None else "" + + if not isinstance(preds, np.ndarray): + raise TypeError(f"Propensity predictions {learner_msg} must be of type np.ndarray. " f"Type {type(preds)} found.") + + if preds.ndim != 1: + raise ValueError(f"Propensity predictions {learner_msg} must be 1-dimensional. " f"Shape {preds.shape} found.") + + if any((preds < self.extreme_threshold) | (preds > 1 - self.extreme_threshold)): + warnings.warn( + f"Propensity predictions {learner_msg} " f"are close to zero or one (eps={self.extreme_threshold}).", + UserWarning, + ) + + def _validate_treatment(self, treatment: np.ndarray) -> None: + """Validate treatment vector.""" + if not isinstance(treatment, np.ndarray): + raise TypeError(f"Treatment assignments must be of type np.ndarray. " f"Type {type(treatment)} found.") + + if treatment.ndim != 1: + raise ValueError(f"Treatment assignments must be 1-dimensional. " f"Shape {treatment.shape} found.") + + binary_treat = type_of_target(treatment) == "binary" + zero_one_treat = np.all((np.power(treatment, 2) - treatment) == 0) + if not (binary_treat and zero_one_treat): + raise ValueError("Treatment vector must be binary (0 and 1).") diff --git a/doubleml/utils/tests/test_ps_processor.py b/doubleml/utils/tests/test_ps_processor.py new file mode 100644 index 00000000..a04354f1 --- /dev/null +++ b/doubleml/utils/tests/test_ps_processor.py @@ -0,0 +1,135 @@ +import warnings +from unittest.mock import patch + +import numpy as np +import pytest +from sklearn.isotonic import IsotonicRegression +from sklearn.model_selection import KFold, cross_val_predict + +from doubleml.utils.propensity_score_processing import PSProcessor, PSProcessorConfig, init_ps_processor + + +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). +@pytest.mark.ci +def test_init_ps_processor_with_deprecated(): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + cfg, proc = init_ps_processor(None, "truncate", 0.02) + assert any("deprecated" in str(warn.message) for warn in w) + assert isinstance(cfg, PSProcessorConfig) + assert proc.clipping_threshold == 0.02 + + +@pytest.mark.ci +def test_init_ps_processor_with_config(): + config = PSProcessorConfig(clipping_threshold=0.05) + cfg, proc = init_ps_processor(config, None, None) + assert isinstance(cfg, PSProcessorConfig) + assert isinstance(proc, PSProcessor) + assert proc.clipping_threshold == 0.05 + + +@pytest.mark.ci +def test_from_config_initialization(): + """Test initialization of PSProcessor from PSProcessorConfig.""" + config = PSProcessorConfig( + clipping_threshold=0.05, + extreme_threshold=1e-8, + calibration_method="isotonic", + cv_calibration=True, + ) + processor = PSProcessor.from_config(config) + assert processor.clipping_threshold == 0.05 + assert processor.extreme_threshold == 1e-8 + assert processor.calibration_method == "isotonic" + assert processor.cv_calibration is True + + +@pytest.mark.ci +def test_adjust_basic_clipping(): + """Test basic clipping functionality.""" + processor = PSProcessor(clipping_threshold=0.1) + + scores = np.array([0.05, 0.2, 0.8, 0.95]) + treatment = np.array([0, 1, 1, 0]) + adjusted = processor.adjust_ps(scores, treatment) + + expected = np.array([0.1, 0.2, 0.8, 0.9]) + np.testing.assert_array_equal(adjusted, expected) + + +@pytest.mark.ci +def test_adjust_no_clipping_needed(): + """Test when no clipping is needed.""" + processor = PSProcessor(clipping_threshold=0.01) + + scores = np.array([0.2, 0.3, 0.7, 0.8]) + treatment = np.array([0, 1, 1, 0]) + adjusted = processor.adjust_ps(scores, treatment) + + np.testing.assert_array_equal(adjusted, scores) + + +@pytest.mark.ci +def test_isotonic_calibration_without_cv(): + """Test isotonic calibration without cross-validation.""" + ps = np.random.uniform(0, 1, size=100) + treatment = np.random.binomial(1, 0.5, size=100) + + clipping_threshold = 0.01 + processor = PSProcessor( + calibration_method="isotonic", + cv_calibration=False, + clipping_threshold=clipping_threshold, + ) + + isotonic_manual = IsotonicRegression(out_of_bounds="clip", y_min=0.0, y_max=1.0) + isotonic_manual.fit(ps.reshape(-1, 1), treatment) + expected_ps_manual = isotonic_manual.predict(ps.reshape(-1, 1)) + expected_ps_manual = np.clip(expected_ps_manual, clipping_threshold, 1 - clipping_threshold) + + adjusted_ps = processor.adjust_ps(ps, treatment) + np.testing.assert_array_equal(adjusted_ps, expected_ps_manual) + + +@pytest.fixture(scope="module", params=[None, 3, "iterable", "splitter"]) +def cv(request): + return request.param + + +@pytest.mark.ci +def test_isotonic_calibration_with_cv(cv): + """Test isotonic calibration with cross-validation.""" + n_obs = 100 + ps = np.random.uniform(0, 1, size=n_obs) + treatment = np.random.binomial(1, 0.5, size=n_obs) + if cv == "iterable": + cv = [(train, test) for train, test in KFold(n_splits=3).split(ps)] + elif cv == "splitter": + cv = KFold(n_splits=3) + + clipping_threshold = 0.01 + processor = PSProcessor(calibration_method="isotonic", cv_calibration=True, clipping_threshold=clipping_threshold) + + isotonic_manual = IsotonicRegression(out_of_bounds="clip", y_min=0.0, y_max=1.0) + ps_cv = cross_val_predict(isotonic_manual, ps.reshape(-1, 1), treatment, cv=cv) + expected_ps_manual = np.clip(ps_cv, clipping_threshold, 1 - clipping_threshold) + + adjusted_ps = processor.adjust_ps(ps, treatment, cv=cv) + np.testing.assert_array_equal(adjusted_ps, expected_ps_manual) + + +@pytest.mark.ci +def test_no_calibration(): + """Test that no calibration is applied when calibration_method is None.""" + processor = PSProcessor(calibration_method=None, clipping_threshold=0.01) + + scores = np.array([0.2, 0.3, 0.7, 0.8]) + treatment = np.array([0, 1, 1, 0]) + + # Should not call any calibration methods + with patch("sklearn.isotonic.IsotonicRegression") as mock_isotonic: + adjusted = processor.adjust_ps(scores, treatment) + mock_isotonic.assert_not_called() + + np.testing.assert_array_equal(adjusted, scores) diff --git a/doubleml/utils/tests/test_ps_processor_exceptions.py b/doubleml/utils/tests/test_ps_processor_exceptions.py new file mode 100644 index 00000000..0c3971a7 --- /dev/null +++ b/doubleml/utils/tests/test_ps_processor_exceptions.py @@ -0,0 +1,127 @@ +import numpy as np +import pytest + +from doubleml.utils.propensity_score_processing import PSProcessor + +# ------------------------------------------------------------------------- +# Tests for __init__ method +# ------------------------------------------------------------------------- + + +@pytest.mark.ci +def test_init_clipping_threshold_type_error(): + """Test that non-float clipping_threshold raises TypeError.""" + with pytest.raises(TypeError, match="clipping_threshold must be a float."): + PSProcessor(clipping_threshold="0.01") + + +@pytest.mark.ci +def test_init_clipping_threshold_value_error(): + """Test that invalid clipping_threshold values raise ValueError.""" + with pytest.raises(ValueError, match="clipping_threshold must be between 0 and 0.5"): + PSProcessor(clipping_threshold=0.0) # exactly 0 + + with pytest.raises(ValueError, match="clipping_threshold must be between 0 and 0.5"): + PSProcessor(clipping_threshold=0.6) # above 0.5 + + +@pytest.mark.ci +def test_init_extreme_threshold_value_error(): + """Test that invalid extreme_threshold values raise ValueError.""" + with pytest.raises(ValueError, match="extreme_threshold must be between 0 and 0.5"): + PSProcessor(extreme_threshold=0.0) # exactly 0 + + with pytest.raises(ValueError, match="extreme_threshold must be between 0 and 0.5"): + PSProcessor(extreme_threshold=0.6) # above 0.5 + + +@pytest.mark.ci +def test_init_calibration_method_value_error(): + """Test that invalid calibration_method raises ValueError.""" + with pytest.raises(ValueError, match="calibration_method must be one of"): + PSProcessor(calibration_method="invalid_method") + + +@pytest.mark.ci +def test_init_cv_calibration_type_error(): + """Test that non-bool cv_calibration raises TypeError.""" + with pytest.raises(TypeError, match="cv_calibration must be of bool type."): + PSProcessor(cv_calibration="True") + + +@pytest.mark.ci +def test_init_cv_calibration_value_error(): + """Test that cv_calibration True with None calibration_method raises ValueError.""" + with pytest.raises(ValueError, match="cv_calibration=True requires a calibration_method."): + PSProcessor(calibration_method=None, cv_calibration=True) + + +# ------------------------------------------------------------------------- +# Tests for propensity score & treatment validation +# ------------------------------------------------------------------------- + + +@pytest.mark.ci +def test_validate_propensity_scores_type_error_with_learner(): + """Test TypeError includes learner name.""" + processor = PSProcessor() + with pytest.raises(TypeError, match="from learner test_learner"): + processor.adjust_ps([0.1, 0.2], np.array([0, 1]), learner_name="test_learner") + + +@pytest.mark.ci +def test_validate_propensity_scores_dimension_error(): + """Test that non-1D propensity scores raise ValueError.""" + processor = PSProcessor() + with pytest.raises(ValueError, match="must be 1-dimensional"): + processor.adjust_ps(np.array([[0.1, 0.2]]), np.array([0, 1])) + + +@pytest.mark.ci +def test_validate_propensity_scores_extreme_warning(): + """Test extreme values trigger warnings.""" + processor = PSProcessor(extreme_threshold=0.05) + with pytest.warns(UserWarning, match="close to zero or one"): + processor.adjust_ps(np.array([0.01, 0.99]), np.array([0, 1])) + + +@pytest.mark.ci +def test_validate_treatment_type_error(): + """Test that non-numpy array treatment raises TypeError.""" + processor = PSProcessor() + with pytest.raises(TypeError, match="Treatment assignments must be of type np.ndarray"): + processor.adjust_ps(np.array([0.2, 0.8]), [0, 1]) + + +@pytest.mark.ci +def test_validate_treatment_dimension_error(): + """Test that non-1D treatment raises ValueError.""" + processor = PSProcessor() + with pytest.raises(ValueError, match="must be 1-dimensional"): + processor.adjust_ps(np.array([0.2, 0.8]), np.array([[0, 1]])) + + +@pytest.mark.ci +def test_validate_treatment_binary_error(): + """Test that non-binary treatment values raise ValueError.""" + processor = PSProcessor() + with pytest.raises(ValueError, match="must be binary"): + processor.adjust_ps(np.array([0.2, 0.8]), np.array([0, 2])) + + +# ------------------------------------------------------------------------- +# Other exception tests +# ------------------------------------------------------------------------- + + +@pytest.mark.ci +def test_apply_calibration_unsupported_method_error(): + """Test that unsupported calibration method raises ValueError.""" + processor = PSProcessor() + processor._calibration_method = "unsupported_method" + + propensity_scores = np.array([0.2, 0.8]) + treatment = np.array([0, 1]) + + with pytest.raises(ValueError, match="Unsupported calibration method: unsupported_method"): + processor._apply_calibration(propensity_scores, treatment) diff --git a/pytest.ini b/pytest.ini index 3582830c..f7125f42 100644 --- a/pytest.ini +++ b/pytest.ini @@ -15,3 +15,4 @@ filterwarnings = ignore:.*Sensitivity analysis not implemented for callable scores.*:UserWarning ignore:.*Subsample has not common support. Results are based on adjusted propensities.*:UserWarning ignore:.*Treatment probability within bandwidth left from cutoff higher than right from cutoff.\nTreatment assignment might be based on the wrong side of the cutoff.*:UserWarning + ignore:.*The estimated nu2 for d is not positive.*:UserWarning