Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
202808d
first ps version
SvenKlaassen Oct 21, 2025
2ff5ecb
add exceptions for ps processor
SvenKlaassen Oct 22, 2025
9772694
add representation tests
SvenKlaassen Oct 22, 2025
b0f52a0
clean up ps processor class to only have clipping threshold and extre…
SvenKlaassen Oct 22, 2025
6b746d4
add treatment to adjust method calls
SvenKlaassen Oct 22, 2025
478a04e
include calibration via isotonic regression
SvenKlaassen Oct 22, 2025
61a529b
add cv to ps calibration
SvenKlaassen Oct 22, 2025
cd3605b
remove print from rdd example in docstring as test fails
SvenKlaassen Oct 22, 2025
dce29b4
change to direct arguments for propensity score processer
SvenKlaassen Oct 22, 2025
08a0073
add data class with config for psprocessor
SvenKlaassen Oct 23, 2025
4c5f850
add init_ps_processor function
SvenKlaassen Oct 23, 2025
f284fa4
update irm class and tests with ps_processor
SvenKlaassen Oct 24, 2025
dac26ec
add test for ps_processor with irm
SvenKlaassen Oct 24, 2025
5ec4648
update apo and tests for ps_processor_config
SvenKlaassen Oct 24, 2025
8e57bc7
add ps_processor to apos
SvenKlaassen Oct 24, 2025
0493024
add ps_processor test for apo
SvenKlaassen Oct 24, 2025
45a167d
Merge branch 'main' into s-add-propensity-score-adjustments
SvenKlaassen Oct 24, 2025
5b60bbf
update IIVM with psprocessor
SvenKlaassen Oct 27, 2025
a30f1ae
add ps_processor to ssm
SvenKlaassen Oct 27, 2025
5d1a822
add test for iivm ps processor with fixture
SvenKlaassen Oct 27, 2025
f97be67
add ps_processor to cvar
SvenKlaassen Oct 27, 2025
c485731
add ps_processor to pq models
SvenKlaassen Oct 27, 2025
2221201
add ps_processor to LPQ models
SvenKlaassen Oct 27, 2025
d800d3f
add ps_processor to qte
SvenKlaassen Oct 27, 2025
eecb623
update for sklearn warning
SvenKlaassen Oct 27, 2025
bd3d795
update exception tests for trimming and remove/supress warnings
SvenKlaassen Oct 27, 2025
07e5c27
add ps_processor to did binary rename trimming to clipping in did
SvenKlaassen Oct 27, 2025
118bc85
add ps_processor to did_cs_binary and clipping to did_cs
SvenKlaassen Oct 27, 2025
5fbfa26
update clipping terminology and enhance ps_processor usage in DID cla…
SvenKlaassen Oct 27, 2025
474b81f
add ps_processor test for did_cs_binary
SvenKlaassen Oct 27, 2025
adfe40d
add ps_processor to did_multi
SvenKlaassen Oct 27, 2025
ca07266
update defaults tests for PSProcessor and clipping_threshold
SvenKlaassen Oct 27, 2025
5d13b0a
add deprecation warninigs for DoubleMLDID and DoubleMLDIDCS and Doubl…
SvenKlaassen Oct 27, 2025
c69af49
update type hint for cv parameter in PSProcessor to use Union[int, list]
SvenKlaassen Oct 27, 2025
f7dcc98
update type hint for cv parameter in PSProcessor to use Union[int, list]
SvenKlaassen Oct 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doubleml/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .ssm_data import DoubleMLSSMData


# TODO: Remove DoubleMLClusterData with version 0.12.0
class DoubleMLClusterData(DoubleMLData):
"""
Backwards compatibility wrapper for DoubleMLData with cluster_cols.
Expand Down
10 changes: 9 additions & 1 deletion doubleml/data/did_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import io
import warnings

import pandas as pd
from sklearn.utils import assert_all_finite
Expand All @@ -7,6 +8,7 @@
from doubleml.data.base_data import DoubleMLData


# TODO: Remove DoubleMLDIDData with version 0.12.0
class DoubleMLDIDData(DoubleMLData):
"""Double machine learning data-backend for Difference-in-Differences models.

Expand Down Expand Up @@ -81,7 +83,13 @@ def __init__(
use_other_treat_as_covariate=True,
force_all_x_finite=True,
force_all_d_finite=True,
): # Initialize _t_col to None first to avoid AttributeError during parent init
):
warnings.warn(
"DoubleMLDIDData is deprecated and will be removed with version 0.12.0." "Use DoubleMLPanelData instead.",
FutureWarning,
stacklevel=2,
)
# Initialize _t_col to None first to avoid AttributeError during parent init
self._t_col = None

# Store whether x_cols was originally None to reset it later
Expand Down
40 changes: 16 additions & 24 deletions doubleml/did/did.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
from doubleml.data.did_data import DoubleMLDIDData
from doubleml.double_ml import DoubleML
from doubleml.double_ml_score_mixins import LinearScoreMixin
from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score, _check_trimming
from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score
from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls
from doubleml.utils._propensity_score import _trimm


# TODO: Remove DoubleMLDIDData with version 0.12.0
class DoubleMLDID(LinearScoreMixin, DoubleML):
"""Double machine learning for difference-in-differences models with panel data (two time periods).

Expand Down Expand Up @@ -50,12 +50,8 @@ class DoubleMLDID(LinearScoreMixin, DoubleML):
Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020).
Default is ``True``.

trimming_rule : str
A str (``'truncate'`` is the only choice) specifying the trimming approach.
Default is ``'truncate'``.

trimming_threshold : float
The threshold used for trimming.
clipping_threshold : float
The threshold used for clipping.
Default is ``1e-2``.

draw_sample_splitting : bool
Expand Down Expand Up @@ -89,10 +85,14 @@ def __init__(
n_rep=1,
score="observational",
in_sample_normalization=True,
trimming_rule="truncate",
trimming_threshold=1e-2,
clipping_threshold=1e-2,
draw_sample_splitting=True,
):
warnings.warn(
"DoubleMLDID is deprecated and will be removed with version 0.12.0. " "Please use DoubleMLDIDBinary instead.",
DeprecationWarning,
stacklevel=2,
)
super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)

self._check_data(self._dml_data)
Expand Down Expand Up @@ -142,9 +142,7 @@ def __init__(
self._predict_method["ml_m"] = "predict_proba"
self._initialize_ml_nuisance_params()

self._trimming_rule = trimming_rule
self._trimming_threshold = trimming_threshold
_check_trimming(self._trimming_rule, self._trimming_threshold)
self._clipping_threshold = clipping_threshold
self._sensitivity_implemented = True
self._external_predictions_implemented = True

Expand All @@ -156,18 +154,11 @@ def in_sample_normalization(self):
return self._in_sample_normalization

@property
def trimming_rule(self):
def clipping_threshold(self):
"""
Specifies the used trimming rule.
Specifies the used clipping threshold.
"""
return self._trimming_rule

@property
def trimming_threshold(self):
"""
Specifies the used trimming threshold.
"""
return self._trimming_threshold
return self._clipping_threshold

def _initialize_ml_nuisance_params(self):
if self.score == "observational":
Expand Down Expand Up @@ -269,9 +260,10 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
method=self._predict_method["ml_m"],
return_models=return_models,
)

_check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
_check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
m_hat["preds"] = np.clip(m_hat["preds"], self.clipping_threshold, 1 - self.clipping_threshold)

# nuisance estimates of the uncond. treatment prob.
p_hat = np.full_like(d, d.mean(), dtype="float64")
Expand Down
64 changes: 48 additions & 16 deletions doubleml/did/did_binary.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import warnings
from typing import Optional

import numpy as np
from sklearn.utils import check_X_y
Expand All @@ -19,14 +20,13 @@
from doubleml.utils._checks import (
_check_bool,
_check_finite_predictions,
_check_is_propensity,
_check_score,
_check_trimming,
)
from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls
from doubleml.utils._propensity_score import _trimm
from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor


# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
class DoubleMLDIDBinary(LinearScoreMixin, DoubleML):
"""Double machine learning for difference-in-differences models with panel data (binary setting in terms of group and time
combinations).
Expand Down Expand Up @@ -83,13 +83,16 @@ class DoubleMLDIDBinary(LinearScoreMixin, DoubleML):
Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020).
Default is ``True``.

trimming_rule : str
A str (``'truncate'`` is the only choice) specifying the trimming approach.
Default is ``'truncate'``.
trimming_rule : str, optional, deprecated
(DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach.
Use `ps_processor_config` instead. Will be removed in a future version.

trimming_threshold : float
The threshold used for trimming.
Default is ``1e-2``.
trimming_threshold : float, optional, deprecated
(DEPRECATED) The threshold used for trimming.
Use `ps_processor_config` instead. Will be removed in a future version.

ps_processor_config : PSProcessorConfig, optional
Configuration for propensity score processing (clipping, calibration, etc.).

draw_sample_splitting : bool
Indicates whether the sample splitting should be drawn during initialization of the object.
Expand All @@ -115,8 +118,9 @@ def __init__(
n_rep=1,
score="observational",
in_sample_normalization=True,
trimming_rule="truncate",
trimming_threshold=1e-2,
trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
ps_processor_config: Optional[PSProcessorConfig] = None,
draw_sample_splitting=True,
print_periods=False,
):
Expand Down Expand Up @@ -232,9 +236,12 @@ def __init__(
self._predict_method["ml_m"] = "predict_proba"
self._initialize_ml_nuisance_params()

# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
self._ps_processor_config, self._ps_processor = init_ps_processor(
ps_processor_config, trimming_rule, trimming_threshold
)
self._trimming_rule = trimming_rule
self._trimming_threshold = trimming_threshold
_check_trimming(self._trimming_rule, self._trimming_threshold)
self._trimming_threshold = self._ps_processor.clipping_threshold

self._sensitivity_implemented = True
self._external_predictions_implemented = True
Expand Down Expand Up @@ -321,19 +328,44 @@ def in_sample_normalization(self):
"""
return self._in_sample_normalization

@property
def ps_processor_config(self):
"""
Configuration for propensity score processing (clipping, calibration, etc.).
"""
return self._ps_processor_config

@property
def ps_processor(self):
"""
Propensity score processor.
"""
return self._ps_processor

# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
@property
def trimming_rule(self):
"""
Specifies the used trimming rule.
"""
warnings.warn(
"'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2
)
return self._trimming_rule

# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
@property
def trimming_threshold(self):
"""
Specifies the used trimming threshold.
"""
return self._trimming_threshold
warnings.warn(
"'trimming_threshold' is deprecated and will be removed in a future version. "
"Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.",
DeprecationWarning,
stacklevel=2,
)
return self._ps_processor.clipping_threshold

@property
def n_obs_subset(self):
Expand Down Expand Up @@ -499,9 +531,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
method=self._predict_method["ml_m"],
return_models=return_models,
)

_check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
_check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls, learner_name="ml_m")

# nuisance estimates of the uncond. treatment prob.
p_hat = np.full_like(d, d.mean(), dtype="float64")
Expand Down
45 changes: 18 additions & 27 deletions doubleml/did/did_cs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
from doubleml.data.did_data import DoubleMLDIDData
from doubleml.double_ml import DoubleML
from doubleml.double_ml_score_mixins import LinearScoreMixin
from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score, _check_trimming
from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score
from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d
from doubleml.utils._propensity_score import _trimm


# TODO: Remove DoubleMLDIDData with version 0.12.0
class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
"""Double machine learning for difference-in-difference with repeated cross-sections.

Expand Down Expand Up @@ -50,12 +50,8 @@ class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020).
Default is ``True``.

trimming_rule : str
A str (``'truncate'`` is the only choice) specifying the trimming approach.
Default is ``'truncate'``.

trimming_threshold : float
The threshold used for trimming.
clipping_threshold : float
The threshold used for clipping.
Default is ``1e-2``.

draw_sample_splitting : bool
Expand Down Expand Up @@ -87,10 +83,14 @@ def __init__(
n_rep=1,
score="observational",
in_sample_normalization=True,
trimming_rule="truncate",
trimming_threshold=1e-2,
clipping_threshold=1e-2,
draw_sample_splitting=True,
):
warnings.warn(
"DoubleMLDIDCS is deprecated and will be removed with version 0.12.0. " "Please use DoubleMLDIDCSBinary instead.",
DeprecationWarning,
stacklevel=2,
)
super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)

self._check_data(self._dml_data)
Expand Down Expand Up @@ -140,10 +140,7 @@ def __init__(
self._predict_method["ml_m"] = "predict_proba"
self._initialize_ml_nuisance_params()

self._trimming_rule = trimming_rule
self._trimming_threshold = trimming_threshold
_check_trimming(self._trimming_rule, self._trimming_threshold)

self._clipping_threshold = clipping_threshold
self._sensitivity_implemented = True
self._external_predictions_implemented = True

Expand All @@ -155,18 +152,11 @@ def in_sample_normalization(self):
return self._in_sample_normalization

@property
def trimming_rule(self):
"""
Specifies the used trimming rule.
"""
return self._trimming_rule

@property
def trimming_threshold(self):
def clipping_threshold(self):
"""
Specifies the used trimming threshold.
Specifies the used clipping threshold.
"""
return self._trimming_threshold
return self._clipping_threshold

def _initialize_ml_nuisance_params(self):
if self.score == "observational":
Expand Down Expand Up @@ -312,9 +302,10 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
method=self._predict_method["ml_m"],
return_models=return_models,
)
_check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
_check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)

_check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
_check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
m_hat["preds"] = np.clip(m_hat["preds"], self.clipping_threshold, 1 - self.clipping_threshold)

psi_a, psi_b = self._score_elements(
y,
Expand Down
Loading