From 85bff0a36bb99b0c6c756ea5944eefec09ddd30a Mon Sep 17 00:00:00 2001 From: bbd5721 Date: Sun, 22 Jun 2025 20:18:22 +0200 Subject: [PATCH 01/27] Prepared logistic simulation --- .gitignore | 1 + monte-cover/src/montecover/plm/__init__.py | 2 + .../src/montecover/plm/logistic_ate.py | 124 ++++++++++++++++++ results/plm/logistic_ate_config.yml | 38 ++++++ results/plm/logistic_ate_metadata.csv | 2 + scripts/plm/logistic_ate.py | 13 ++ scripts/plm/logistic_ate_config.yml | 74 +++++++++++ 7 files changed, 254 insertions(+) create mode 100644 monte-cover/src/montecover/plm/logistic_ate.py create mode 100644 results/plm/logistic_ate_config.yml create mode 100644 results/plm/logistic_ate_metadata.csv create mode 100644 scripts/plm/logistic_ate.py create mode 100644 scripts/plm/logistic_ate_config.yml diff --git a/.gitignore b/.gitignore index 24f7c5cc..93d4dfaf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ __pycache__/ +.idea/ # Logs monte-cover/logs/ diff --git a/monte-cover/src/montecover/plm/__init__.py b/monte-cover/src/montecover/plm/__init__.py index 167b36d8..3707ee6f 100644 --- a/monte-cover/src/montecover/plm/__init__.py +++ b/monte-cover/src/montecover/plm/__init__.py @@ -5,6 +5,7 @@ from montecover.plm.plr_ate_sensitivity import PLRATESensitivityCoverageSimulation from montecover.plm.plr_cate import PLRCATECoverageSimulation from montecover.plm.plr_gate import PLRGATECoverageSimulation +from montecover.plm.logistic_ate import LogisticATECoverageSimulation __all__ = [ "PLRATECoverageSimulation", @@ -12,4 +13,5 @@ "PLRGATECoverageSimulation", "PLRCATECoverageSimulation", "PLRATESensitivityCoverageSimulation", + "LogisticATECoverageSimulation", ] diff --git a/monte-cover/src/montecover/plm/logistic_ate.py b/monte-cover/src/montecover/plm/logistic_ate.py new file mode 100644 index 00000000..bef474ed --- /dev/null +++ b/monte-cover/src/montecover/plm/logistic_ate.py @@ -0,0 +1,124 @@ +from typing import Any, Dict, Optional + +import doubleml as dml +from doubleml.datasets import make_logistic_LZZ2020 + +from montecover.base import BaseSimulation +from montecover.utils import create_learner_from_config + + +class LogisticATECoverageSimulation(BaseSimulation): + """Simulation class for coverage properties of DoubleMLPLR for ATE estimation.""" + + def __init__( + self, + config_file: str, + suppress_warnings: bool = True, + log_level: str = "INFO", + log_file: Optional[str] = None, + ): + super().__init__( + config_file=config_file, + suppress_warnings=suppress_warnings, + log_level=log_level, + log_file=log_file, + ) + + # Calculate oracle values + self._calculate_oracle_values() + + def _process_config_parameters(self): + """Process simulation-specific parameters from config""" + # Process ML models in parameter grid + assert "learners" in self.dml_parameters, "No learners specified in the config file" + + required_learners = ["ml_m", "ml_M", "ml_t"] + for learner in self.dml_parameters["learners"]: + for ml in required_learners: + assert ml in learner, f"No {ml} specified in the config file" + + def _calculate_oracle_values(self): + """Calculate oracle values for the simulation.""" + self.logger.info("Calculating oracle values") + + self.oracle_values = dict() + self.oracle_values["theta"] = self.dgp_parameters["theta"] + + def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]: + """Run a single repetition with the given parameters.""" + # Extract parameters + learner_config = dml_params["learners"] + learner_m_name, ml_m = create_learner_from_config(learner_config["ml_m"]) + learner_M_name, ml_M = create_learner_from_config(learner_config["ml_M"]) + learner_t_name, ml_t = create_learner_from_config(learner_config["ml_t"]) + score = dml_params["score"] + + # Model + dml_model = dml.DoubleMLLogit( + obj_dml_data=dml_data, + ml_m=ml_m, + ml_M=ml_M, + ml_t=ml_t, + score=score,) + + dml_model.fit() + + result = { + "coverage": [], + } + for level in self.confidence_parameters["level"]: + level_result = dict() + level_result["coverage"] = self._compute_coverage( + thetas=dml_model.coef, + oracle_thetas=self.oracle_values["theta"], + confint=dml_model.confint(level=level), + joint_confint=None, + ) + + # add parameters to the result + for res in level_result.values(): + res.update( + { + "Learner m": learner_m_name, + "Learner M": learner_M_name, + "Learner t": learner_t_name, + "Score": score, + "level": level, + } + ) + for key, res in level_result.items(): + result[key].append(res) + + return result + + def summarize_results(self): + """Summarize the simulation results.""" + self.logger.info("Summarizing simulation results") + + # Group by parameter combinations + groupby_cols = ["Learner m", "Learner M", "Learner T", "Score", "level"] + aggregation_dict = { + "Coverage": "mean", + "CI Length": "mean", + "Bias": "mean", + "repetition": "count", + } + + # Aggregate results (possibly multiple result dfs) + result_summary = dict() + for result_name, result_df in self.results.items(): + result_summary[result_name] = result_df.groupby(groupby_cols).agg(aggregation_dict).reset_index() + self.logger.debug(f"Summarized {result_name} results") + + return result_summary + + def _generate_dml_data(self, dgp_params) -> dml.DoubleMLData: + """Generate data for the simulation.""" + data = make_logistic_LZZ2020( + alpha=dgp_params["theta"], + n_obs=dgp_params["n_obs"], + dim_x=dgp_params["dim_x"], + return_type="DataFrame", + ) + dml_data = dml.DoubleMLData(data, "y", "d", "p") + return dml_data diff --git a/results/plm/logistic_ate_config.yml b/results/plm/logistic_ate_config.yml new file mode 100644 index 00000000..94cf9e1c --- /dev/null +++ b/results/plm/logistic_ate_config.yml @@ -0,0 +1,38 @@ +simulation_parameters: + repetitions: 1000 + max_runtime: 19800 + random_seed: 42 + n_jobs: -2 +dgp_parameters: + theta: + - 0.5 + n_obs: + - 500 + dim_x: + - 20 +learner_definitions: + lasso: + name: LassoCV + rf: &id001 + name: RF Regr. + params: + n_estimators: 200 + max_features: 10 + max_depth: 5 + min_samples_leaf: 20 + lgbm: + name: LGBM Regr. + params: + n_estimators: 500 + learning_rate: 0.01 +dml_parameters: + learners: + - ml_m: *id001 + ml_M: *id001 + ml_t: *id001 + score: + - nuisance_space +confidence_parameters: + level: + - 0.95 + - 0.9 diff --git a/results/plm/logistic_ate_metadata.csv b/results/plm/logistic_ate_metadata.csv new file mode 100644 index 00000000..99ae2900 --- /dev/null +++ b/results/plm/logistic_ate_metadata.csv @@ -0,0 +1,2 @@ +DoubleML Version,Script,Date,Total Runtime (minutes),Python Version,Config File +0.10.dev0,LogisticATECoverageSimulation,2025-06-22 18:53,0.22107456922531127,3.12.2,scripts/plm/logistic_ate_config.yml diff --git a/scripts/plm/logistic_ate.py b/scripts/plm/logistic_ate.py new file mode 100644 index 00000000..8c03556d --- /dev/null +++ b/scripts/plm/logistic_ate.py @@ -0,0 +1,13 @@ +from montecover.plm import LogisticATECoverageSimulation + +# Create and run simulation with config file +sim = LogisticATECoverageSimulation( + config_file="scripts/plm/logistic_ate_config.yml", + log_level="INFO", + log_file="logs/plm/logistic_ate_sim.log", +) +sim.run_simulation() +sim.save_results(output_path="results/plm/", file_prefix="logistic_ate") + +# Save config file for reproducibility +sim.save_config("results/plm/logistic_ate_config.yml") \ No newline at end of file diff --git a/scripts/plm/logistic_ate_config.yml b/scripts/plm/logistic_ate_config.yml new file mode 100644 index 00000000..5d14ce23 --- /dev/null +++ b/scripts/plm/logistic_ate_config.yml @@ -0,0 +1,74 @@ +# Simulation parameters for PLR ATE Coverage + +simulation_parameters: + repetitions: 1000 + max_runtime: 19800 # 5.5 hours in seconds + random_seed: 42 + n_jobs: -2 + +dgp_parameters: + theta: [0.5] # Treatment effect + n_obs: [500] # Sample size + dim_x: [20] # Number of covariates + +# Define reusable learner configurations +learner_definitions: + lasso: &lasso + name: "LassoCV" + + rf: &rf + name: "RF Regr." + params: + n_estimators: 200 + max_features: 10 + max_depth: 5 + min_samples_leaf: 20 + + rf-class: &rf-class + name: "RF Clas." + params: + n_estimators: 200 + max_features: 10 + max_depth: 5 + min_samples_leaf: 20 + + lgbm: &lgbm + name: "LGBM Regr." + params: + n_estimators: 500 + learning_rate: 0.01 + +dml_parameters: + learners: +# - ml_m: *lasso +# ml_M: *lasso +# ml_t: *lasso + - ml_m: *rf + ml_M: *rf-class + ml_t: *rf +# - ml_m: *lgbm +# ml_M: *lgbm +# ml_t: *lgbm +# - ml_m: *rf +# ml_M: *lgbm +# ml_t: *lgbm +# - ml_m: *lgbm +# ml_M: *rf +# ml_t: *lgbm +# - ml_m: *lgbm +# ml_M: *lgbm +# ml_t: *rf +# - ml_m: *lgbm +# ml_M: *rf +# ml_t: *rf +# - ml_m: *rf +# ml_M: *lgbm +# ml_t: *rf +# - ml_m: *rf +# ml_M: *rf +# ml_t: *lgbm + + score: ["nuisance_space"] + +confidence_parameters: + level: [0.95, 0.90] # Confidence levels From 6e5ac77f7b804614e3e2ac19a379f07ccd73a835 Mon Sep 17 00:00:00 2001 From: bbd5721 Date: Wed, 27 Aug 2025 10:25:35 +0200 Subject: [PATCH 02/27] Fixes to make coverage simulation work --- monte-cover/src/montecover/plm/logistic_ate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monte-cover/src/montecover/plm/logistic_ate.py b/monte-cover/src/montecover/plm/logistic_ate.py index bef474ed..10e51784 100644 --- a/monte-cover/src/montecover/plm/logistic_ate.py +++ b/monte-cover/src/montecover/plm/logistic_ate.py @@ -96,7 +96,7 @@ def summarize_results(self): self.logger.info("Summarizing simulation results") # Group by parameter combinations - groupby_cols = ["Learner m", "Learner M", "Learner T", "Score", "level"] + groupby_cols = ["Learner m", "Learner M", "Learner t", "Score", "level"] aggregation_dict = { "Coverage": "mean", "CI Length": "mean", From 605fb0afef8def8f3f9e585c41ae3c21b58fed20 Mon Sep 17 00:00:00 2001 From: Julius Herzig Date: Thu, 28 Aug 2025 11:07:42 +0200 Subject: [PATCH 03/27] Resolved dataset creation bug --- monte-cover/src/montecover/plm/logistic_ate.py | 5 ++--- results/plm/logistic_ate_config.yml | 10 ++++------ scripts/plm/logistic_ate_config.yml | 12 ++++-------- 3 files changed, 10 insertions(+), 17 deletions(-) diff --git a/monte-cover/src/montecover/plm/logistic_ate.py b/monte-cover/src/montecover/plm/logistic_ate.py index 10e51784..265ad790 100644 --- a/monte-cover/src/montecover/plm/logistic_ate.py +++ b/monte-cover/src/montecover/plm/logistic_ate.py @@ -114,11 +114,10 @@ def summarize_results(self): def _generate_dml_data(self, dgp_params) -> dml.DoubleMLData: """Generate data for the simulation.""" - data = make_logistic_LZZ2020( + dml_data = make_logistic_LZZ2020( alpha=dgp_params["theta"], n_obs=dgp_params["n_obs"], dim_x=dgp_params["dim_x"], - return_type="DataFrame", + return_type="DoubleMLData", ) - dml_data = dml.DoubleMLData(data, "y", "d", "p") return dml_data diff --git a/results/plm/logistic_ate_config.yml b/results/plm/logistic_ate_config.yml index 94cf9e1c..4b2b1693 100644 --- a/results/plm/logistic_ate_config.yml +++ b/results/plm/logistic_ate_config.yml @@ -15,11 +15,9 @@ learner_definitions: name: LassoCV rf: &id001 name: RF Regr. - params: - n_estimators: 200 - max_features: 10 - max_depth: 5 - min_samples_leaf: 20 + rf-class: &id002 + name: RF Clas. + params: null lgbm: name: LGBM Regr. params: @@ -28,7 +26,7 @@ learner_definitions: dml_parameters: learners: - ml_m: *id001 - ml_M: *id001 + ml_M: *id002 ml_t: *id001 score: - nuisance_space diff --git a/scripts/plm/logistic_ate_config.yml b/scripts/plm/logistic_ate_config.yml index 5d14ce23..8e73a4d7 100644 --- a/scripts/plm/logistic_ate_config.yml +++ b/scripts/plm/logistic_ate_config.yml @@ -19,18 +19,14 @@ learner_definitions: rf: &rf name: "RF Regr." params: - n_estimators: 200 - max_features: 10 - max_depth: 5 - min_samples_leaf: 20 + n_estimators: 100 + max_features: "sqrt" rf-class: &rf-class name: "RF Clas." params: - n_estimators: 200 - max_features: 10 - max_depth: 5 - min_samples_leaf: 20 + n_estimators: 100 + max_features: "sqrt" lgbm: &lgbm name: "LGBM Regr." From 056b07799497db968ebad61a0367c443d3aee952 Mon Sep 17 00:00:00 2001 From: Julius Herzig Date: Thu, 28 Aug 2025 14:50:28 +0200 Subject: [PATCH 04/27] Changed sim config to include lgbm and lasso, instrument score --- scripts/plm/logistic_ate_config.yml | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/scripts/plm/logistic_ate_config.yml b/scripts/plm/logistic_ate_config.yml index 8e73a4d7..83b299ec 100644 --- a/scripts/plm/logistic_ate_config.yml +++ b/scripts/plm/logistic_ate_config.yml @@ -34,17 +34,23 @@ learner_definitions: n_estimators: 500 learning_rate: 0.01 + lgbm: &lgbm-class + name: "LGBM Clas." + params: + n_estimators: 500 + learning_rate: 0.01 + dml_parameters: learners: -# - ml_m: *lasso -# ml_M: *lasso -# ml_t: *lasso + - ml_m: *lasso + ml_M: *lasso + ml_t: *lasso - ml_m: *rf ml_M: *rf-class ml_t: *rf -# - ml_m: *lgbm -# ml_M: *lgbm -# ml_t: *lgbm + - ml_m: *lgbm + ml_M: *lgbm-class + ml_t: *lgbm # - ml_m: *rf # ml_M: *lgbm # ml_t: *lgbm @@ -64,7 +70,7 @@ dml_parameters: # ml_M: *rf # ml_t: *lgbm - score: ["nuisance_space"] + score: ["nuisance_space", "instrument"] confidence_parameters: level: [0.95, 0.90] # Confidence levels From 56858978847ee8e4658ddd0ab4d984460133ff90 Mon Sep 17 00:00:00 2001 From: Julius Herzig Date: Thu, 28 Aug 2025 16:00:35 +0200 Subject: [PATCH 05/27] Changed sim config to include lgbm and lasso, instrument score --- scripts/plm/logistic_ate_config.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/plm/logistic_ate_config.yml b/scripts/plm/logistic_ate_config.yml index 83b299ec..a2785021 100644 --- a/scripts/plm/logistic_ate_config.yml +++ b/scripts/plm/logistic_ate_config.yml @@ -16,6 +16,9 @@ learner_definitions: lasso: &lasso name: "LassoCV" + logistic: &logistic + name: "Logistic" + rf: &rf name: "RF Regr." params: @@ -34,7 +37,7 @@ learner_definitions: n_estimators: 500 learning_rate: 0.01 - lgbm: &lgbm-class + lgbm-class: &lgbm-class name: "LGBM Clas." params: n_estimators: 500 @@ -43,7 +46,7 @@ learner_definitions: dml_parameters: learners: - ml_m: *lasso - ml_M: *lasso + ml_M: *logistic ml_t: *lasso - ml_m: *rf ml_M: *rf-class From 8fd390982f586513e16633d1000a2b2447f3400b Mon Sep 17 00:00:00 2001 From: Julius Herzig Date: Thu, 28 Aug 2025 22:12:58 +0200 Subject: [PATCH 06/27] Full combination of learners --- scripts/plm/logistic_ate_config.yml | 54 +++++++++++++++++++---------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/scripts/plm/logistic_ate_config.yml b/scripts/plm/logistic_ate_config.yml index a2785021..8cb08b0f 100644 --- a/scripts/plm/logistic_ate_config.yml +++ b/scripts/plm/logistic_ate_config.yml @@ -54,24 +54,42 @@ dml_parameters: - ml_m: *lgbm ml_M: *lgbm-class ml_t: *lgbm -# - ml_m: *rf -# ml_M: *lgbm -# ml_t: *lgbm -# - ml_m: *lgbm -# ml_M: *rf -# ml_t: *lgbm -# - ml_m: *lgbm -# ml_M: *lgbm -# ml_t: *rf -# - ml_m: *lgbm -# ml_M: *rf -# ml_t: *rf -# - ml_m: *rf -# ml_M: *lgbm -# ml_t: *rf -# - ml_m: *rf -# ml_M: *rf -# ml_t: *lgbm + - ml_m: *rf + ml_M: *lgbm + ml_t: *lgbm-class + - ml_m: *lgbm + ml_M: *rf-class + ml_t: *lgbm + - ml_m: *lgbm + ml_M: *lgbm-class + ml_t: *rf + - ml_m: *lgbm + ml_M: *rf-class + ml_t: *rf + - ml_m: *rf + ml_M: *lgbm-class + ml_t: *rf + - ml_m: *rf + ml_M: *rf-class + ml_t: *lgbm + - ml_m: *lasso + ml_M: *lgbm + ml_t: *lgbm-class + - ml_m: *lgbm + ml_M: *logistic + ml_t: *lgbm + - ml_m: *lgbm + ml_M: *lgbm-class + ml_t: *lasso + - ml_m: *lasso + ml_M: *rf-class + ml_t: *rf + - ml_m: *rf + ml_M: *logistic + ml_t: *rf + - ml_m: *rf + ml_M: *rf-class + ml_t: *lasso score: ["nuisance_space", "instrument"] From f99294aefb8cff3b80b6513141b94a8802fcd523 Mon Sep 17 00:00:00 2001 From: bbd5721 Date: Mon, 1 Sep 2025 14:58:45 +0200 Subject: [PATCH 07/27] Full combination of learners fixes --- scripts/plm/logistic_ate_config.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/plm/logistic_ate_config.yml b/scripts/plm/logistic_ate_config.yml index 8cb08b0f..aef09ff9 100644 --- a/scripts/plm/logistic_ate_config.yml +++ b/scripts/plm/logistic_ate_config.yml @@ -55,8 +55,8 @@ dml_parameters: ml_M: *lgbm-class ml_t: *lgbm - ml_m: *rf - ml_M: *lgbm - ml_t: *lgbm-class + ml_M: *lgbm-class + ml_t: *lgbm - ml_m: *lgbm ml_M: *rf-class ml_t: *lgbm @@ -73,8 +73,8 @@ dml_parameters: ml_M: *rf-class ml_t: *lgbm - ml_m: *lasso - ml_M: *lgbm - ml_t: *lgbm-class + ml_M: *lgbm-class + ml_t: *lgbm - ml_m: *lgbm ml_M: *logistic ml_t: *lgbm From 787402b2bb2c8f87b63bba176aa643f66c4bb201 Mon Sep 17 00:00:00 2001 From: bbd5721 Date: Wed, 3 Sep 2025 10:29:53 +0200 Subject: [PATCH 08/27] Catch convergence warnings --- monte-cover/src/montecover/plm/logistic_ate.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/monte-cover/src/montecover/plm/logistic_ate.py b/monte-cover/src/montecover/plm/logistic_ate.py index 265ad790..585234ca 100644 --- a/monte-cover/src/montecover/plm/logistic_ate.py +++ b/monte-cover/src/montecover/plm/logistic_ate.py @@ -1,3 +1,4 @@ +import warnings from typing import Any, Dict, Optional import doubleml as dml @@ -16,6 +17,7 @@ def __init__( suppress_warnings: bool = True, log_level: str = "INFO", log_file: Optional[str] = None, + use_failed_scores: bool = False, ): super().__init__( config_file=config_file, @@ -27,6 +29,8 @@ def __init__( # Calculate oracle values self._calculate_oracle_values() + self._use_failed_scores = use_failed_scores + def _process_config_parameters(self): """Process simulation-specific parameters from config""" # Process ML models in parameter grid @@ -61,7 +65,15 @@ def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]: ml_t=ml_t, score=score,) - dml_model.fit() + if self._use_failed_scores: + dml_model.fit() + else: + warnings.filterwarnings("error") + try: + dml_model.fit() + except Warning as w: + return None + warnings.resetwarnings() result = { "coverage": [], From 43fed25bba9e577a7fc007883f24da9e0a190b78 Mon Sep 17 00:00:00 2001 From: bbd5721 Date: Wed, 3 Sep 2025 10:39:01 +0200 Subject: [PATCH 09/27] Short config for test --- scripts/plm/logistic_ate_config.yml | 89 +++++++++++++++-------------- 1 file changed, 45 insertions(+), 44 deletions(-) diff --git a/scripts/plm/logistic_ate_config.yml b/scripts/plm/logistic_ate_config.yml index aef09ff9..cca3d122 100644 --- a/scripts/plm/logistic_ate_config.yml +++ b/scripts/plm/logistic_ate_config.yml @@ -2,7 +2,7 @@ simulation_parameters: repetitions: 1000 - max_runtime: 19800 # 5.5 hours in seconds + max_runtime: 86400 # 24 hours in seconds random_seed: 42 n_jobs: -2 @@ -48,50 +48,51 @@ dml_parameters: - ml_m: *lasso ml_M: *logistic ml_t: *lasso - - ml_m: *rf - ml_M: *rf-class - ml_t: *rf - - ml_m: *lgbm - ml_M: *lgbm-class - ml_t: *lgbm - - ml_m: *rf - ml_M: *lgbm-class - ml_t: *lgbm - - ml_m: *lgbm - ml_M: *rf-class - ml_t: *lgbm - - ml_m: *lgbm - ml_M: *lgbm-class - ml_t: *rf - - ml_m: *lgbm - ml_M: *rf-class - ml_t: *rf - - ml_m: *rf - ml_M: *lgbm-class - ml_t: *rf - - ml_m: *rf - ml_M: *rf-class - ml_t: *lgbm - - ml_m: *lasso - ml_M: *lgbm-class - ml_t: *lgbm - - ml_m: *lgbm - ml_M: *logistic - ml_t: *lgbm - - ml_m: *lgbm - ml_M: *lgbm-class - ml_t: *lasso - - ml_m: *lasso - ml_M: *rf-class - ml_t: *rf - - ml_m: *rf - ml_M: *logistic - ml_t: *rf - - ml_m: *rf - ml_M: *rf-class - ml_t: *lasso +# - ml_m: *rf +# ml_M: *rf-class +# ml_t: *rf +# - ml_m: *lgbm +# ml_M: *lgbm-class +# ml_t: *lgbm +# - ml_m: *rf +# ml_M: *lgbm-class +# ml_t: *lgbm +# - ml_m: *lgbm +# ml_M: *rf-class +# ml_t: *lgbm +# - ml_m: *lgbm +# ml_M: *lgbm-class +# ml_t: *rf +# - ml_m: *lgbm +# ml_M: *rf-class +# ml_t: *rf +# - ml_m: *rf +# ml_M: *lgbm-class +# ml_t: *rf +# - ml_m: *rf +# ml_M: *rf-class +# ml_t: *lgbm +# - ml_m: *lasso +# ml_M: *lgbm-class +# ml_t: *lgbm +# - ml_m: *lgbm +# ml_M: *logistic +# ml_t: *lgbm +# - ml_m: *lgbm +# ml_M: *lgbm-class +# ml_t: *lasso +# - ml_m: *lasso +# ml_M: *rf-class +# ml_t: *rf +# - ml_m: *rf +# ml_M: *logistic +# ml_t: *rf +# - ml_m: *rf +# ml_M: *rf-class +# ml_t: *lasso - score: ["nuisance_space", "instrument"] +# score: ["nuisance_space", "instrument"] + score: ["nuisance_space"] confidence_parameters: level: [0.95, 0.90] # Confidence levels From 03173bcfd1a0bc24ef47358e663f21c0909a03d5 Mon Sep 17 00:00:00 2001 From: bbd5721 Date: Wed, 3 Sep 2025 14:17:18 +0200 Subject: [PATCH 10/27] Short config for test update --- .../src/montecover/plm/logistic_ate.py | 1 + results/plm/logistic_ate_config.yml | 24 ++++++++++++++----- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/monte-cover/src/montecover/plm/logistic_ate.py b/monte-cover/src/montecover/plm/logistic_ate.py index 585234ca..14c33c85 100644 --- a/monte-cover/src/montecover/plm/logistic_ate.py +++ b/monte-cover/src/montecover/plm/logistic_ate.py @@ -72,6 +72,7 @@ def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]: try: dml_model.fit() except Warning as w: + self.logger.debug(f"Warning during fitting: {w}. Returning None for this repetition.") return None warnings.resetwarnings() diff --git a/results/plm/logistic_ate_config.yml b/results/plm/logistic_ate_config.yml index 4b2b1693..829567dd 100644 --- a/results/plm/logistic_ate_config.yml +++ b/results/plm/logistic_ate_config.yml @@ -1,6 +1,6 @@ simulation_parameters: - repetitions: 1000 - max_runtime: 19800 + repetitions: 10 + max_runtime: 86400 random_seed: 42 n_jobs: -2 dgp_parameters: @@ -11,18 +11,30 @@ dgp_parameters: dim_x: - 20 learner_definitions: - lasso: + lasso: &id001 name: LassoCV - rf: &id001 + logistic: &id002 + name: Logistic + rf: name: RF Regr. - rf-class: &id002 + params: + n_estimators: 100 + max_features: sqrt + rf-class: name: RF Clas. - params: null + params: + n_estimators: 100 + max_features: sqrt lgbm: name: LGBM Regr. params: n_estimators: 500 learning_rate: 0.01 + lgbm-class: + name: LGBM Clas. + params: + n_estimators: 500 + learning_rate: 0.01 dml_parameters: learners: - ml_m: *id001 From 61982ab96be141362d7c2e24b9d75eaf66e302ca Mon Sep 17 00:00:00 2001 From: bbd5721 Date: Wed, 3 Sep 2025 14:21:55 +0200 Subject: [PATCH 11/27] Added logging --- monte-cover/src/montecover/plm/logistic_ate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monte-cover/src/montecover/plm/logistic_ate.py b/monte-cover/src/montecover/plm/logistic_ate.py index 14c33c85..3de81a13 100644 --- a/monte-cover/src/montecover/plm/logistic_ate.py +++ b/monte-cover/src/montecover/plm/logistic_ate.py @@ -101,7 +101,7 @@ def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]: ) for key, res in level_result.items(): result[key].append(res) - + self.logger.info(result) return result def summarize_results(self): From 03abe6f41a6ff9d991b2a013d6148cf223ef4dfa Mon Sep 17 00:00:00 2001 From: bbd5721 Date: Wed, 3 Sep 2025 14:23:32 +0200 Subject: [PATCH 12/27] Added logging --- monte-cover/src/montecover/plm/logistic_ate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monte-cover/src/montecover/plm/logistic_ate.py b/monte-cover/src/montecover/plm/logistic_ate.py index 3de81a13..78fdedd6 100644 --- a/monte-cover/src/montecover/plm/logistic_ate.py +++ b/monte-cover/src/montecover/plm/logistic_ate.py @@ -101,7 +101,7 @@ def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]: ) for key, res in level_result.items(): result[key].append(res) - self.logger.info(result) + self.logger.info(f"Results for loop {result}") return result def summarize_results(self): From ea0fb3f5ae876e75c0053a9db43dfbeb8368659a Mon Sep 17 00:00:00 2001 From: bbd5721 Date: Wed, 3 Sep 2025 14:25:19 +0200 Subject: [PATCH 13/27] Added logging --- scripts/plm/logistic_ate_config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/plm/logistic_ate_config.yml b/scripts/plm/logistic_ate_config.yml index cca3d122..c1226503 100644 --- a/scripts/plm/logistic_ate_config.yml +++ b/scripts/plm/logistic_ate_config.yml @@ -4,7 +4,7 @@ simulation_parameters: repetitions: 1000 max_runtime: 86400 # 24 hours in seconds random_seed: 42 - n_jobs: -2 + n_jobs: 1 #-2 dgp_parameters: theta: [0.5] # Treatment effect From c2da782fe0ed1ca89df7aed0059e8be64d70ce93 Mon Sep 17 00:00:00 2001 From: bbd5721 Date: Wed, 3 Sep 2025 14:32:23 +0200 Subject: [PATCH 14/27] Print statements --- monte-cover/src/montecover/plm/logistic_ate.py | 3 ++- scripts/plm/logistic_ate.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/monte-cover/src/montecover/plm/logistic_ate.py b/monte-cover/src/montecover/plm/logistic_ate.py index 78fdedd6..58b6355e 100644 --- a/monte-cover/src/montecover/plm/logistic_ate.py +++ b/monte-cover/src/montecover/plm/logistic_ate.py @@ -25,7 +25,7 @@ def __init__( log_level=log_level, log_file=log_file, ) - + print("In LogisticATECoverageSimulation init") # Calculate oracle values self._calculate_oracle_values() @@ -51,6 +51,7 @@ def _calculate_oracle_values(self): def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]: """Run a single repetition with the given parameters.""" # Extract parameters + print("Running single rep") learner_config = dml_params["learners"] learner_m_name, ml_m = create_learner_from_config(learner_config["ml_m"]) learner_M_name, ml_M = create_learner_from_config(learner_config["ml_M"]) diff --git a/scripts/plm/logistic_ate.py b/scripts/plm/logistic_ate.py index 8c03556d..5a668780 100644 --- a/scripts/plm/logistic_ate.py +++ b/scripts/plm/logistic_ate.py @@ -6,6 +6,7 @@ log_level="INFO", log_file="logs/plm/logistic_ate_sim.log", ) +print("Calling file") sim.run_simulation() sim.save_results(output_path="results/plm/", file_prefix="logistic_ate") From 64d8931f47a50c00e9c6d375f521eb377d7412d4 Mon Sep 17 00:00:00 2001 From: bbd5721 Date: Wed, 3 Sep 2025 14:36:28 +0200 Subject: [PATCH 15/27] Print statements --- monte-cover/src/montecover/plm/logistic_ate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monte-cover/src/montecover/plm/logistic_ate.py b/monte-cover/src/montecover/plm/logistic_ate.py index 58b6355e..be368a8f 100644 --- a/monte-cover/src/montecover/plm/logistic_ate.py +++ b/monte-cover/src/montecover/plm/logistic_ate.py @@ -102,7 +102,7 @@ def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]: ) for key, res in level_result.items(): result[key].append(res) - self.logger.info(f"Results for loop {result}") + print(f"Results for loop {result}") return result def summarize_results(self): From ce6c859d89381ae9d6f85f21f2bd9aa097448065 Mon Sep 17 00:00:00 2001 From: bbd5721 Date: Wed, 3 Sep 2025 14:36:54 +0200 Subject: [PATCH 16/27] Print statements --- monte-cover/src/montecover/plm/logistic_ate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/monte-cover/src/montecover/plm/logistic_ate.py b/monte-cover/src/montecover/plm/logistic_ate.py index be368a8f..8d618a6a 100644 --- a/monte-cover/src/montecover/plm/logistic_ate.py +++ b/monte-cover/src/montecover/plm/logistic_ate.py @@ -73,7 +73,8 @@ def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]: try: dml_model.fit() except Warning as w: - self.logger.debug(f"Warning during fitting: {w}. Returning None for this repetition.") + self.logger.info(f"Warning during fitting: {w}. Returning None for this repetition.") + print("Fit warning") return None warnings.resetwarnings() From fd35542154792cb537d729ce1ef040cd84d23a4c Mon Sep 17 00:00:00 2001 From: bbd5721 Date: Wed, 3 Sep 2025 14:58:35 +0200 Subject: [PATCH 17/27] Updated handling of failed convergence --- .../src/montecover/plm/logistic_ate.py | 20 +++++++------------ scripts/plm/logistic_ate_config.yml | 2 +- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/monte-cover/src/montecover/plm/logistic_ate.py b/monte-cover/src/montecover/plm/logistic_ate.py index 8d618a6a..9b373fe5 100644 --- a/monte-cover/src/montecover/plm/logistic_ate.py +++ b/monte-cover/src/montecover/plm/logistic_ate.py @@ -25,7 +25,7 @@ def __init__( log_level=log_level, log_file=log_file, ) - print("In LogisticATECoverageSimulation init") + # Calculate oracle values self._calculate_oracle_values() @@ -51,7 +51,6 @@ def _calculate_oracle_values(self): def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]: """Run a single repetition with the given parameters.""" # Extract parameters - print("Running single rep") learner_config = dml_params["learners"] learner_m_name, ml_m = create_learner_from_config(learner_config["ml_m"]) learner_M_name, ml_M = create_learner_from_config(learner_config["ml_M"]) @@ -64,19 +63,14 @@ def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]: ml_m=ml_m, ml_M=ml_M, ml_t=ml_t, - score=score,) + score=score, + error_on_convergence_failure= not self._use_failed_scores,) - if self._use_failed_scores: + try: dml_model.fit() - else: - warnings.filterwarnings("error") - try: - dml_model.fit() - except Warning as w: - self.logger.info(f"Warning during fitting: {w}. Returning None for this repetition.") - print("Fit warning") - return None - warnings.resetwarnings() + except RuntimeError as e: + self.logger.info(f"Exception during fit: {e}") + return None result = { "coverage": [], diff --git a/scripts/plm/logistic_ate_config.yml b/scripts/plm/logistic_ate_config.yml index c1226503..cca3d122 100644 --- a/scripts/plm/logistic_ate_config.yml +++ b/scripts/plm/logistic_ate_config.yml @@ -4,7 +4,7 @@ simulation_parameters: repetitions: 1000 max_runtime: 86400 # 24 hours in seconds random_seed: 42 - n_jobs: 1 #-2 + n_jobs: -2 dgp_parameters: theta: [0.5] # Treatment effect From 1498d0ed3db7a5eef7b16bd1326eac0957d53bcd Mon Sep 17 00:00:00 2001 From: bbd5721 Date: Wed, 3 Sep 2025 15:03:15 +0200 Subject: [PATCH 18/27] Removed debug msg --- monte-cover/src/montecover/plm/logistic_ate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monte-cover/src/montecover/plm/logistic_ate.py b/monte-cover/src/montecover/plm/logistic_ate.py index 9b373fe5..dc660cfa 100644 --- a/monte-cover/src/montecover/plm/logistic_ate.py +++ b/monte-cover/src/montecover/plm/logistic_ate.py @@ -97,7 +97,7 @@ def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]: ) for key, res in level_result.items(): result[key].append(res) - print(f"Results for loop {result}") + return result def summarize_results(self): From 67fb397e57fa9d7e6f97119386e2bf3c8291a98e Mon Sep 17 00:00:00 2001 From: bbd5721 Date: Wed, 3 Sep 2025 15:05:49 +0200 Subject: [PATCH 19/27] Full config --- scripts/plm/logistic_ate_config.yml | 87 ++++++++++++++--------------- 1 file changed, 43 insertions(+), 44 deletions(-) diff --git a/scripts/plm/logistic_ate_config.yml b/scripts/plm/logistic_ate_config.yml index cca3d122..10b8fcef 100644 --- a/scripts/plm/logistic_ate_config.yml +++ b/scripts/plm/logistic_ate_config.yml @@ -48,51 +48,50 @@ dml_parameters: - ml_m: *lasso ml_M: *logistic ml_t: *lasso -# - ml_m: *rf -# ml_M: *rf-class -# ml_t: *rf -# - ml_m: *lgbm -# ml_M: *lgbm-class -# ml_t: *lgbm -# - ml_m: *rf -# ml_M: *lgbm-class -# ml_t: *lgbm -# - ml_m: *lgbm -# ml_M: *rf-class -# ml_t: *lgbm -# - ml_m: *lgbm -# ml_M: *lgbm-class -# ml_t: *rf -# - ml_m: *lgbm -# ml_M: *rf-class -# ml_t: *rf -# - ml_m: *rf -# ml_M: *lgbm-class -# ml_t: *rf -# - ml_m: *rf -# ml_M: *rf-class -# ml_t: *lgbm -# - ml_m: *lasso -# ml_M: *lgbm-class -# ml_t: *lgbm -# - ml_m: *lgbm -# ml_M: *logistic -# ml_t: *lgbm -# - ml_m: *lgbm -# ml_M: *lgbm-class -# ml_t: *lasso -# - ml_m: *lasso -# ml_M: *rf-class -# ml_t: *rf -# - ml_m: *rf -# ml_M: *logistic -# ml_t: *rf -# - ml_m: *rf -# ml_M: *rf-class -# ml_t: *lasso + - ml_m: *rf + ml_M: *rf-class + ml_t: *rf + - ml_m: *lgbm + ml_M: *lgbm-class + ml_t: *lgbm + - ml_m: *rf + ml_M: *lgbm-class + ml_t: *lgbm + - ml_m: *lgbm + ml_M: *rf-class + ml_t: *lgbm + - ml_m: *lgbm + ml_M: *lgbm-class + ml_t: *rf + - ml_m: *lgbm + ml_M: *rf-class + ml_t: *rf + - ml_m: *rf + ml_M: *lgbm-class + ml_t: *rf + - ml_m: *rf + ml_M: *rf-class + ml_t: *lgbm + - ml_m: *lasso + ml_M: *lgbm-class + ml_t: *lgbm + - ml_m: *lgbm + ml_M: *logistic + ml_t: *lgbm + - ml_m: *lgbm + ml_M: *lgbm-class + ml_t: *lasso + - ml_m: *lasso + ml_M: *rf-class + ml_t: *rf + - ml_m: *rf + ml_M: *logistic + ml_t: *rf + - ml_m: *rf + ml_M: *rf-class + ml_t: *lasso -# score: ["nuisance_space", "instrument"] - score: ["nuisance_space"] + score: ["nuisance_space", "instrument"] confidence_parameters: level: [0.95, 0.90] # Confidence levels From 4e78105ce509853b2bf85c90daa3527388700a6d Mon Sep 17 00:00:00 2001 From: Julius Herzig Date: Fri, 5 Sep 2025 01:42:13 +0200 Subject: [PATCH 20/27] Simulation results --- results/plm/logistic_ate_config.yml | 53 ++++++++++++++++++++--- results/plm/logistic_ate_coverage.csv | 61 +++++++++++++++++++++++++++ results/plm/logistic_ate_metadata.csv | 2 +- 3 files changed, 110 insertions(+), 6 deletions(-) create mode 100644 results/plm/logistic_ate_coverage.csv diff --git a/results/plm/logistic_ate_config.yml b/results/plm/logistic_ate_config.yml index 829567dd..b203b920 100644 --- a/results/plm/logistic_ate_config.yml +++ b/results/plm/logistic_ate_config.yml @@ -1,5 +1,5 @@ simulation_parameters: - repetitions: 10 + repetitions: 1000 max_runtime: 86400 random_seed: 42 n_jobs: -2 @@ -15,22 +15,22 @@ learner_definitions: name: LassoCV logistic: &id002 name: Logistic - rf: + rf: &id003 name: RF Regr. params: n_estimators: 100 max_features: sqrt - rf-class: + rf-class: &id004 name: RF Clas. params: n_estimators: 100 max_features: sqrt - lgbm: + lgbm: &id005 name: LGBM Regr. params: n_estimators: 500 learning_rate: 0.01 - lgbm-class: + lgbm-class: &id006 name: LGBM Clas. params: n_estimators: 500 @@ -40,8 +40,51 @@ dml_parameters: - ml_m: *id001 ml_M: *id002 ml_t: *id001 + - ml_m: *id003 + ml_M: *id004 + ml_t: *id003 + - ml_m: *id005 + ml_M: *id006 + ml_t: *id005 + - ml_m: *id003 + ml_M: *id006 + ml_t: *id005 + - ml_m: *id005 + ml_M: *id004 + ml_t: *id005 + - ml_m: *id005 + ml_M: *id006 + ml_t: *id003 + - ml_m: *id005 + ml_M: *id004 + ml_t: *id003 + - ml_m: *id003 + ml_M: *id006 + ml_t: *id003 + - ml_m: *id003 + ml_M: *id004 + ml_t: *id005 + - ml_m: *id001 + ml_M: *id006 + ml_t: *id005 + - ml_m: *id005 + ml_M: *id002 + ml_t: *id005 + - ml_m: *id005 + ml_M: *id006 + ml_t: *id001 + - ml_m: *id001 + ml_M: *id004 + ml_t: *id003 + - ml_m: *id003 + ml_M: *id002 + ml_t: *id003 + - ml_m: *id003 + ml_M: *id004 + ml_t: *id001 score: - nuisance_space + - instrument confidence_parameters: level: - 0.95 diff --git a/results/plm/logistic_ate_coverage.csv b/results/plm/logistic_ate_coverage.csv new file mode 100644 index 00000000..920c3cf8 --- /dev/null +++ b/results/plm/logistic_ate_coverage.csv @@ -0,0 +1,61 @@ +Learner m,Learner M,Learner t,Score,level,Coverage,CI Length,Bias,repetition +LGBM Regr.,LGBM Clas.,LGBM Regr.,instrument,0.9,0.8867735470941884,0.6783720219284418,0.17182702238154213,998 +LGBM Regr.,LGBM Clas.,LGBM Regr.,instrument,0.95,0.9458917835671342,0.8083301208774294,0.17182702238154213,998 +LGBM Regr.,LGBM Clas.,LGBM Regr.,nuisance_space,0.9,0.886,0.5883608609896965,0.1546569991698314,1000 +LGBM Regr.,LGBM Clas.,LGBM Regr.,nuisance_space,0.95,0.942,0.7010752072754521,0.1546569991698314,1000 +LGBM Regr.,LGBM Clas.,LassoCV,instrument,0.9,0.8856569709127382,0.687914636116578,0.17843968090261725,997 +LGBM Regr.,LGBM Clas.,LassoCV,instrument,0.95,0.9398194583751254,0.819700847014181,0.17843968090261725,997 +LGBM Regr.,LGBM Clas.,LassoCV,nuisance_space,0.9,0.853,0.613277414594929,0.17455974016950299,1000 +LGBM Regr.,LGBM Clas.,LassoCV,nuisance_space,0.95,0.922,0.7307651121307722,0.17455974016950299,1000 +LGBM Regr.,LGBM Clas.,RF Regr.,instrument,0.9,0.833,0.6645257584558233,0.1981803920481237,1000 +LGBM Regr.,LGBM Clas.,RF Regr.,instrument,0.95,0.913,0.7918312803227949,0.1981803920481237,1000 +LGBM Regr.,LGBM Clas.,RF Regr.,nuisance_space,0.9,0.749,0.6389887792744618,0.2310882489727634,1000 +LGBM Regr.,LGBM Clas.,RF Regr.,nuisance_space,0.95,0.847,0.7614020927955242,0.2310882489727634,1000 +LGBM Regr.,Logistic,LGBM Regr.,instrument,0.9,0.8808808808808809,0.6011544597174262,0.15730144394486342,999 +LGBM Regr.,Logistic,LGBM Regr.,instrument,0.95,0.9269269269269269,0.7163197204212697,0.15730144394486342,999 +LGBM Regr.,Logistic,LGBM Regr.,nuisance_space,0.9,0.802,0.533982278217265,0.1735015501567642,1000 +LGBM Regr.,Logistic,LGBM Regr.,nuisance_space,0.95,0.893,0.6362791293643562,0.1735015501567642,1000 +LGBM Regr.,RF Clas.,LGBM Regr.,instrument,0.9,0.8808808808808809,0.6117037321129385,0.14924058625395906,999 +LGBM Regr.,RF Clas.,LGBM Regr.,instrument,0.95,0.938938938938939,0.7288899537961552,0.14924058625395906,999 +LGBM Regr.,RF Clas.,LGBM Regr.,nuisance_space,0.9,0.887,0.5255256282131954,0.12946206156000842,1000 +LGBM Regr.,RF Clas.,LGBM Regr.,nuisance_space,0.95,0.948,0.6262024093655342,0.12946206156000842,1000 +LGBM Regr.,RF Clas.,RF Regr.,instrument,0.9,0.893,0.6133564813843166,0.15711608477124128,1000 +LGBM Regr.,RF Clas.,RF Regr.,instrument,0.95,0.943,0.7308593260213176,0.15711608477124128,1000 +LGBM Regr.,RF Clas.,RF Regr.,nuisance_space,0.9,0.86,0.5540472193413977,0.15675464483344737,1000 +LGBM Regr.,RF Clas.,RF Regr.,nuisance_space,0.95,0.935,0.6601879813806316,0.15675464483344737,1000 +LassoCV,LGBM Clas.,LGBM Regr.,instrument,0.9,0.8062563067608476,0.6448097763855765,0.19653637418785105,991 +LassoCV,LGBM Clas.,LGBM Regr.,instrument,0.95,0.8890010090817356,0.7683382386658661,0.19653637418785105,991 +LassoCV,LGBM Clas.,LGBM Regr.,nuisance_space,0.9,0.72165991902834,0.5619651019188039,0.19918381058581103,988 +LassoCV,LGBM Clas.,LGBM Regr.,nuisance_space,0.95,0.840080971659919,0.6696227203940329,0.19918381058581103,988 +LassoCV,Logistic,LassoCV,instrument,0.9,0.9126506024096386,0.6493687054509357,0.15965331285568357,996 +LassoCV,Logistic,LassoCV,instrument,0.95,0.9618473895582329,0.7737705377043753,0.15965331285568357,996 +LassoCV,Logistic,LassoCV,nuisance_space,0.9,0.8682092555331992,0.5768393638614188,0.1458288654760023,994 +LassoCV,Logistic,LassoCV,nuisance_space,0.95,0.9356136820925554,0.6873464966781094,0.1458288654760023,994 +LassoCV,RF Clas.,RF Regr.,instrument,0.9,0.8667334669338678,0.5890487369844828,0.14213629243588016,998 +LassoCV,RF Clas.,RF Regr.,instrument,0.95,0.93687374749499,0.7018948620784813,0.14213629243588016,998 +LassoCV,RF Clas.,RF Regr.,nuisance_space,0.9,0.8908908908908909,0.5583249926493753,0.13040987029805642,999 +LassoCV,RF Clas.,RF Regr.,nuisance_space,0.95,0.9369369369369369,0.6652852626707622,0.13040987029805642,999 +RF Regr.,LGBM Clas.,LGBM Regr.,instrument,0.9,0.883,0.4286586066458282,0.10700456800013383,1000 +RF Regr.,LGBM Clas.,LGBM Regr.,instrument,0.95,0.939,0.510778233955119,0.10700456800013383,1000 +RF Regr.,LGBM Clas.,LGBM Regr.,nuisance_space,0.9,0.798,0.3832967523848996,0.11829755780901112,1000 +RF Regr.,LGBM Clas.,LGBM Regr.,nuisance_space,0.95,0.871,0.45672625074725515,0.11829755780901112,1000 +RF Regr.,LGBM Clas.,RF Regr.,instrument,0.9,0.866,0.42225079909506574,0.11434483968291848,1000 +RF Regr.,LGBM Clas.,RF Regr.,instrument,0.95,0.919,0.5031428603184782,0.11434483968291848,1000 +RF Regr.,LGBM Clas.,RF Regr.,nuisance_space,0.9,0.881,0.41648308996281536,0.10985709399222088,1000 +RF Regr.,LGBM Clas.,RF Regr.,nuisance_space,0.95,0.938,0.49627021099133717,0.10985709399222088,1000 +RF Regr.,Logistic,RF Regr.,instrument,0.9,0.856,0.38502789712056834,0.10721182765222284,1000 +RF Regr.,Logistic,RF Regr.,instrument,0.95,0.92,0.45878903692977124,0.10721182765222284,1000 +RF Regr.,Logistic,RF Regr.,nuisance_space,0.9,0.824,0.3771933481281758,0.11331805384094351,1000 +RF Regr.,Logistic,RF Regr.,nuisance_space,0.95,0.9,0.4494535960074909,0.11331805384094351,1000 +RF Regr.,RF Clas.,LGBM Regr.,instrument,0.9,0.828,0.38946263148586363,0.11262093701887263,1000 +RF Regr.,RF Clas.,LGBM Regr.,instrument,0.95,0.884,0.46407334885550183,0.11262093701887263,1000 +RF Regr.,RF Clas.,LGBM Regr.,nuisance_space,0.9,0.804,0.36190660207697933,0.10722868220974552,1000 +RF Regr.,RF Clas.,LGBM Regr.,nuisance_space,0.95,0.867,0.4312383145926426,0.10722868220974552,1000 +RF Regr.,RF Clas.,LassoCV,instrument,0.9,0.859,0.39360445751539874,0.10201463510531926,1000 +RF Regr.,RF Clas.,LassoCV,instrument,0.95,0.922,0.4690086389719632,0.10201463510531926,1000 +RF Regr.,RF Clas.,LassoCV,nuisance_space,0.9,0.847,0.37185525976227807,0.097545400580116,1000 +RF Regr.,RF Clas.,LassoCV,nuisance_space,0.95,0.905,0.44309287139830933,0.097545400580116,1000 +RF Regr.,RF Clas.,RF Regr.,instrument,0.9,0.885,0.3931395611851874,0.09840536307939636,1000 +RF Regr.,RF Clas.,RF Regr.,instrument,0.95,0.94,0.4684546808270991,0.09840536307939636,1000 +RF Regr.,RF Clas.,RF Regr.,nuisance_space,0.9,0.877,0.3834497709276788,0.09720459767352349,1000 +RF Regr.,RF Clas.,RF Regr.,nuisance_space,0.95,0.934,0.4569085835870289,0.09720459767352349,1000 diff --git a/results/plm/logistic_ate_metadata.csv b/results/plm/logistic_ate_metadata.csv index 99ae2900..ef34e596 100644 --- a/results/plm/logistic_ate_metadata.csv +++ b/results/plm/logistic_ate_metadata.csv @@ -1,2 +1,2 @@ DoubleML Version,Script,Date,Total Runtime (minutes),Python Version,Config File -0.10.dev0,LogisticATECoverageSimulation,2025-06-22 18:53,0.22107456922531127,3.12.2,scripts/plm/logistic_ate_config.yml +0.10.dev0,LogisticATECoverageSimulation,2025-09-03 22:35,447.33407898743945,3.12.9,scripts/plm/logistic_ate_config.yml From 5207f6010363c2884172b1765352a4b5a6eac8cc Mon Sep 17 00:00:00 2001 From: bbd5721 Date: Fri, 5 Sep 2025 02:15:22 +0200 Subject: [PATCH 21/27] Settings for render --- doc/_quarto-dev.yml | 1 + doc/_website.yml | 1 + doc/plm/logistic.qmd | 114 ++++++++++++++++++++++++++ results/plm/logistic_ate_metadata.csv | 1 + 4 files changed, 117 insertions(+) create mode 100644 doc/plm/logistic.qmd diff --git a/doc/_quarto-dev.yml b/doc/_quarto-dev.yml index 5c3587ab..5e934fc6 100644 --- a/doc/_quarto-dev.yml +++ b/doc/_quarto-dev.yml @@ -21,6 +21,7 @@ website: - plm/plr_gate.qmd - plm/plr_cate.qmd - plm/pliv.qmd + - plm/logistic.qmd # DID - did/did_pa.qmd - did/did_cs.qmd diff --git a/doc/_website.yml b/doc/_website.yml index 4bf06b85..a713e257 100644 --- a/doc/_website.yml +++ b/doc/_website.yml @@ -25,6 +25,7 @@ website: - plm/plr_gate.qmd - plm/plr_cate.qmd - plm/pliv.qmd + - plm/logistic.qmd - text: "DID" menu: - did/did_pa_multi.qmd diff --git a/doc/plm/logistic.qmd b/doc/plm/logistic.qmd new file mode 100644 index 00000000..7e943119 --- /dev/null +++ b/doc/plm/logistic.qmd @@ -0,0 +1,114 @@ +--- +title: "Logistic Models" + +jupyter: python3 +--- + +```{python} +#| echo: false + +import numpy as np +import pandas as pd +from itables import init_notebook_mode +import os +import sys + +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table + +init_notebook_mode(all_interactive=True) +``` + +## ATE Coverage + +The simulations are based on the the [make_logistic_LZZ2020](https://docs.doubleml.org/stable/api/generated/doubleml.datasets.make_plr_CCDDHNR2018.html)-DGP with $500$ observations. + +::: {.callout-note title="Metadata" collapse="true"} + +```{python} +#| echo: false +metadata_file = '../../results/plm/logistic_ate_metadata.csv' +metadata_df = pd.read_csv(metadata_file) +print(metadata_df.T.to_string(header=False)) +``` + +::: + +```{python} +#| echo: false + +# set up data and rename columns +df_coverage = pd.read_csv("../../results/plm/logistic_ate_coverage.csv", index_col=None) + +if "repetition" in df_coverage.columns and df_coverage["repetition"].nunique() == 1: + n_rep_coverage = df_coverage["repetition"].unique()[0] +elif "n_rep" in df_coverage.columns and df_coverage["n_rep"].nunique() == 1: + n_rep_coverage = df_coverage["n_rep"].unique()[0] +else: + n_rep_coverage = "N/A" # Fallback if n_rep cannot be determined + +display_columns_coverage = ["Learner m", "Learner M", "Learner t", "Bias", "CI Length", "Coverage"] +``` + +### Partialling out + +```{python} +# | echo: false + +generate_and_show_styled_table( + main_df=df_coverage, + filters={"level": 0.95, "Score": "nuisance_space"}, + display_cols=display_columns_coverage, + n_rep=n_rep_coverage, + level_col="level", +# rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage"] +) +``` + +```{python} +#| echo: false + +generate_and_show_styled_table( + main_df=df_coverage, + filters={"level": 0.9, "Score": "nuisance_space"}, + display_cols=display_columns_coverage, + n_rep=n_rep_coverage, + level_col="level", +# rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage"] +) +``` + +### IV-type + +For the IV-type score, the learners `ml_l` and `ml_g` are both set to the same type of learner (here **Learner g**). + +```{python} +#| echo: false + +generate_and_show_styled_table( + main_df=df_coverage, + filters={"level": 0.95, "Score": "instrument"}, + display_cols=display_columns_coverage, + n_rep=n_rep_coverage, + level_col="level", + coverage_highlight_cols=["Coverage"] +) +``` + +```{python} +#| echo: false + +generate_and_show_styled_table( + main_df=df_coverage, + filters={"level": 0.9, "Score": "instrument"}, + display_cols=display_columns_coverage, + n_rep=n_rep_coverage, + level_col="level", + coverage_highlight_cols=["Coverage"] +) +``` \ No newline at end of file diff --git a/results/plm/logistic_ate_metadata.csv b/results/plm/logistic_ate_metadata.csv index ef34e596..eead6aa7 100644 --- a/results/plm/logistic_ate_metadata.csv +++ b/results/plm/logistic_ate_metadata.csv @@ -1,2 +1,3 @@ DoubleML Version,Script,Date,Total Runtime (minutes),Python Version,Config File 0.10.dev0,LogisticATECoverageSimulation,2025-09-03 22:35,447.33407898743945,3.12.9,scripts/plm/logistic_ate_config.yml +0.10.dev0,LogisticATECoverageSimulation,2025-09-03 14:16,0.4242911458015442,3.12.11,scripts/plm/logistic_ate_config.yml From b9c3f10a28b73846df012ab7b7256ce215178b04 Mon Sep 17 00:00:00 2001 From: bbd5721 Date: Mon, 29 Sep 2025 12:06:31 -0700 Subject: [PATCH 22/27] Heading fixed, added dgp param to sim --- doc/plm/logistic.qmd | 5 +- scripts/plm/logistic_ate_config.yml | 73 +++++++++++++++-------------- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/doc/plm/logistic.qmd b/doc/plm/logistic.qmd index 7e943119..4d3e4854 100644 --- a/doc/plm/logistic.qmd +++ b/doc/plm/logistic.qmd @@ -53,7 +53,7 @@ else: display_columns_coverage = ["Learner m", "Learner M", "Learner t", "Bias", "CI Length", "Coverage"] ``` -### Partialling out +### Nuisance space ```{python} # | echo: false @@ -83,9 +83,8 @@ generate_and_show_styled_table( ) ``` -### IV-type +### Instrument -For the IV-type score, the learners `ml_l` and `ml_g` are both set to the same type of learner (here **Learner g**). ```{python} #| echo: false diff --git a/scripts/plm/logistic_ate_config.yml b/scripts/plm/logistic_ate_config.yml index 10b8fcef..10aa1b91 100644 --- a/scripts/plm/logistic_ate_config.yml +++ b/scripts/plm/logistic_ate_config.yml @@ -10,6 +10,7 @@ dgp_parameters: theta: [0.5] # Treatment effect n_obs: [500] # Sample size dim_x: [20] # Number of covariates + balanced_r0: [False] # Whether to use balanced r0 function # Define reusable learner configurations learner_definitions: @@ -54,42 +55,42 @@ dml_parameters: - ml_m: *lgbm ml_M: *lgbm-class ml_t: *lgbm - - ml_m: *rf - ml_M: *lgbm-class - ml_t: *lgbm - - ml_m: *lgbm - ml_M: *rf-class - ml_t: *lgbm - - ml_m: *lgbm - ml_M: *lgbm-class - ml_t: *rf - - ml_m: *lgbm - ml_M: *rf-class - ml_t: *rf - - ml_m: *rf - ml_M: *lgbm-class - ml_t: *rf - - ml_m: *rf - ml_M: *rf-class - ml_t: *lgbm - - ml_m: *lasso - ml_M: *lgbm-class - ml_t: *lgbm - - ml_m: *lgbm - ml_M: *logistic - ml_t: *lgbm - - ml_m: *lgbm - ml_M: *lgbm-class - ml_t: *lasso - - ml_m: *lasso - ml_M: *rf-class - ml_t: *rf - - ml_m: *rf - ml_M: *logistic - ml_t: *rf - - ml_m: *rf - ml_M: *rf-class - ml_t: *lasso +# - ml_m: *rf +# ml_M: *lgbm-class +# ml_t: *lgbm +# - ml_m: *lgbm +# ml_M: *rf-class +# ml_t: *lgbm +# - ml_m: *lgbm +# ml_M: *lgbm-class +# ml_t: *rf +# - ml_m: *lgbm +# ml_M: *rf-class +# ml_t: *rf +# - ml_m: *rf +# ml_M: *lgbm-class +# ml_t: *rf +# - ml_m: *rf +# ml_M: *rf-class +# ml_t: *lgbm +# - ml_m: *lasso +# ml_M: *lgbm-class +# ml_t: *lgbm +# - ml_m: *lgbm +# ml_M: *logistic +# ml_t: *lgbm +# - ml_m: *lgbm +# ml_M: *lgbm-class +# ml_t: *lasso +# - ml_m: *lasso +# ml_M: *rf-class +# ml_t: *rf +# - ml_m: *rf +# ml_M: *logistic +# ml_t: *rf +# - ml_m: *rf +# ml_M: *rf-class +# ml_t: *lasso score: ["nuisance_space", "instrument"] From 3ba3dc24eb74ba596b0abe0aa67a8c3990a4896d Mon Sep 17 00:00:00 2001 From: Julius Herzig Date: Mon, 6 Oct 2025 10:21:20 -0700 Subject: [PATCH 23/27] DGP param pass fix --- monte-cover/src/montecover/plm/logistic_ate.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/monte-cover/src/montecover/plm/logistic_ate.py b/monte-cover/src/montecover/plm/logistic_ate.py index dc660cfa..4f43409c 100644 --- a/monte-cover/src/montecover/plm/logistic_ate.py +++ b/monte-cover/src/montecover/plm/logistic_ate.py @@ -123,10 +123,4 @@ def summarize_results(self): def _generate_dml_data(self, dgp_params) -> dml.DoubleMLData: """Generate data for the simulation.""" - dml_data = make_logistic_LZZ2020( - alpha=dgp_params["theta"], - n_obs=dgp_params["n_obs"], - dim_x=dgp_params["dim_x"], - return_type="DoubleMLData", - ) - return dml_data + return make_logistic_LZZ2020(**dgp_params) From 47252582c79327c97ee8f4d646d2a4b9806bc64e Mon Sep 17 00:00:00 2001 From: Julius Herzig Date: Mon, 27 Oct 2025 13:36:00 -0700 Subject: [PATCH 24/27] Renaming of Logistic to LPLR --- .../plm/{logistic_ate.py => lplr_ate.py} | 8 ++++---- scripts/plm/logistic_ate.py | 14 -------------- scripts/plm/lplr_ate.py | 14 ++++++++++++++ ...logistic_ate_config.yml => lplr_ate_config.yml} | 0 4 files changed, 18 insertions(+), 18 deletions(-) rename monte-cover/src/montecover/plm/{logistic_ate.py => lplr_ate.py} (95%) delete mode 100644 scripts/plm/logistic_ate.py create mode 100644 scripts/plm/lplr_ate.py rename scripts/plm/{logistic_ate_config.yml => lplr_ate_config.yml} (100%) diff --git a/monte-cover/src/montecover/plm/logistic_ate.py b/monte-cover/src/montecover/plm/lplr_ate.py similarity index 95% rename from monte-cover/src/montecover/plm/logistic_ate.py rename to monte-cover/src/montecover/plm/lplr_ate.py index 4f43409c..15b88e64 100644 --- a/monte-cover/src/montecover/plm/logistic_ate.py +++ b/monte-cover/src/montecover/plm/lplr_ate.py @@ -2,13 +2,13 @@ from typing import Any, Dict, Optional import doubleml as dml -from doubleml.datasets import make_logistic_LZZ2020 +from doubleml.plm.data import make_lplr_LZZ2020 from montecover.base import BaseSimulation from montecover.utils import create_learner_from_config -class LogisticATECoverageSimulation(BaseSimulation): +class LPLRATECoverageSimulation(BaseSimulation): """Simulation class for coverage properties of DoubleMLPLR for ATE estimation.""" def __init__( @@ -58,7 +58,7 @@ def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]: score = dml_params["score"] # Model - dml_model = dml.DoubleMLLogit( + dml_model = dml.DoubleMLLPLR( obj_dml_data=dml_data, ml_m=ml_m, ml_M=ml_M, @@ -123,4 +123,4 @@ def summarize_results(self): def _generate_dml_data(self, dgp_params) -> dml.DoubleMLData: """Generate data for the simulation.""" - return make_logistic_LZZ2020(**dgp_params) + return make_lplr_LZZ2020(**dgp_params) diff --git a/scripts/plm/logistic_ate.py b/scripts/plm/logistic_ate.py deleted file mode 100644 index 5a668780..00000000 --- a/scripts/plm/logistic_ate.py +++ /dev/null @@ -1,14 +0,0 @@ -from montecover.plm import LogisticATECoverageSimulation - -# Create and run simulation with config file -sim = LogisticATECoverageSimulation( - config_file="scripts/plm/logistic_ate_config.yml", - log_level="INFO", - log_file="logs/plm/logistic_ate_sim.log", -) -print("Calling file") -sim.run_simulation() -sim.save_results(output_path="results/plm/", file_prefix="logistic_ate") - -# Save config file for reproducibility -sim.save_config("results/plm/logistic_ate_config.yml") \ No newline at end of file diff --git a/scripts/plm/lplr_ate.py b/scripts/plm/lplr_ate.py new file mode 100644 index 00000000..a98b2d46 --- /dev/null +++ b/scripts/plm/lplr_ate.py @@ -0,0 +1,14 @@ +from montecover.plm import LPLRATECoverageSimulation + +# Create and run simulation with config file +sim = LPLRATECoverageSimulation( + config_file="scripts/plm/lplr_ate_config.yml", + log_level="INFO", + log_file="logs/plm/plr_ate_sim.log", +) +print("Calling file") +sim.run_simulation() +sim.save_results(output_path="results/plm/", file_prefix="lplr_ate") + +# Save config file for reproducibility +sim.save_config("results/plm/lplr_ate_config.yml") \ No newline at end of file diff --git a/scripts/plm/logistic_ate_config.yml b/scripts/plm/lplr_ate_config.yml similarity index 100% rename from scripts/plm/logistic_ate_config.yml rename to scripts/plm/lplr_ate_config.yml From ba948669608fb7583640e4971f3dd227c56f07cb Mon Sep 17 00:00:00 2001 From: Julius Herzig Date: Mon, 27 Oct 2025 13:38:25 -0700 Subject: [PATCH 25/27] Typo --- scripts/plm/lplr_ate_config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/plm/lplr_ate_config.yml b/scripts/plm/lplr_ate_config.yml index 10aa1b91..da804ed9 100644 --- a/scripts/plm/lplr_ate_config.yml +++ b/scripts/plm/lplr_ate_config.yml @@ -1,4 +1,4 @@ -# Simulation parameters for PLR ATE Coverage +# Simulation parameters for LPLR ATE Coverage simulation_parameters: repetitions: 1000 From 251362345fce9dd9c49d6d22262166d8a0c7fb32 Mon Sep 17 00:00:00 2001 From: Julius Herzig Date: Mon, 27 Oct 2025 23:30:51 -0700 Subject: [PATCH 26/27] Renamings --- doc/_quarto-dev.yml | 2 +- doc/_website.yml | 2 +- doc/plm/{logistic.qmd => lplr.qmd} | 8 ++++---- monte-cover/src/montecover/plm/__init__.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) rename doc/plm/{logistic.qmd => lplr.qmd} (86%) diff --git a/doc/_quarto-dev.yml b/doc/_quarto-dev.yml index 5e934fc6..b73319b3 100644 --- a/doc/_quarto-dev.yml +++ b/doc/_quarto-dev.yml @@ -21,7 +21,7 @@ website: - plm/plr_gate.qmd - plm/plr_cate.qmd - plm/pliv.qmd - - plm/logistic.qmd + - plm/lplr.qmd # DID - did/did_pa.qmd - did/did_cs.qmd diff --git a/doc/_website.yml b/doc/_website.yml index a713e257..98c2a044 100644 --- a/doc/_website.yml +++ b/doc/_website.yml @@ -25,7 +25,7 @@ website: - plm/plr_gate.qmd - plm/plr_cate.qmd - plm/pliv.qmd - - plm/logistic.qmd + - plm/lplr.qmd - text: "DID" menu: - did/did_pa_multi.qmd diff --git a/doc/plm/logistic.qmd b/doc/plm/lplr.qmd similarity index 86% rename from doc/plm/logistic.qmd rename to doc/plm/lplr.qmd index 4d3e4854..200e5782 100644 --- a/doc/plm/logistic.qmd +++ b/doc/plm/lplr.qmd @@ -1,5 +1,5 @@ --- -title: "Logistic Models" +title: "Logistic Partial Linear Regression Models" jupyter: python3 --- @@ -24,13 +24,13 @@ init_notebook_mode(all_interactive=True) ## ATE Coverage -The simulations are based on the the [make_logistic_LZZ2020](https://docs.doubleml.org/stable/api/generated/doubleml.datasets.make_plr_CCDDHNR2018.html)-DGP with $500$ observations. +The simulations are based on the the [make_lplr_LZZ2020](https://docs.doubleml.org/stable/api/generated/doubleml.datasets.make_lplr_LZZ2020.html)-DGP with $500$ observations. ::: {.callout-note title="Metadata" collapse="true"} ```{python} #| echo: false -metadata_file = '../../results/plm/logistic_ate_metadata.csv' +metadata_file = '../../results/plm/lplr_ate_metadata.csv' metadata_df = pd.read_csv(metadata_file) print(metadata_df.T.to_string(header=False)) ``` @@ -41,7 +41,7 @@ print(metadata_df.T.to_string(header=False)) #| echo: false # set up data and rename columns -df_coverage = pd.read_csv("../../results/plm/logistic_ate_coverage.csv", index_col=None) +df_coverage = pd.read_csv("../../results/plm/lplr_ate_coverage.csv", index_col=None) if "repetition" in df_coverage.columns and df_coverage["repetition"].nunique() == 1: n_rep_coverage = df_coverage["repetition"].unique()[0] diff --git a/monte-cover/src/montecover/plm/__init__.py b/monte-cover/src/montecover/plm/__init__.py index 3707ee6f..5d995c92 100644 --- a/monte-cover/src/montecover/plm/__init__.py +++ b/monte-cover/src/montecover/plm/__init__.py @@ -5,7 +5,7 @@ from montecover.plm.plr_ate_sensitivity import PLRATESensitivityCoverageSimulation from montecover.plm.plr_cate import PLRCATECoverageSimulation from montecover.plm.plr_gate import PLRGATECoverageSimulation -from montecover.plm.logistic_ate import LogisticATECoverageSimulation +from montecover.plm.lplr_ate import LPLRATECoverageSimulation __all__ = [ "PLRATECoverageSimulation", @@ -13,5 +13,5 @@ "PLRGATECoverageSimulation", "PLRCATECoverageSimulation", "PLRATESensitivityCoverageSimulation", - "LogisticATECoverageSimulation", + "LPLRATECoverageSimulation", ] From bd264a35ac6d864564f89b2d8d0f6eb800d8658c Mon Sep 17 00:00:00 2001 From: Julius Herzig Date: Mon, 27 Oct 2025 23:42:38 -0700 Subject: [PATCH 27/27] Typo --- monte-cover/src/montecover/plm/lplr_ate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monte-cover/src/montecover/plm/lplr_ate.py b/monte-cover/src/montecover/plm/lplr_ate.py index 15b88e64..da962e32 100644 --- a/monte-cover/src/montecover/plm/lplr_ate.py +++ b/monte-cover/src/montecover/plm/lplr_ate.py @@ -2,7 +2,7 @@ from typing import Any, Dict, Optional import doubleml as dml -from doubleml.plm.data import make_lplr_LZZ2020 +from doubleml.plm.datasets import make_lplr_LZZ2020 from montecover.base import BaseSimulation from montecover.utils import create_learner_from_config