From 8ecbaf758526026ebbde0a69205c6bdc8422d99d Mon Sep 17 00:00:00 2001 From: igerber Date: Sun, 26 Apr 2026 14:38:38 -0400 Subject: [PATCH 1/3] Fix latent doc-snippet bugs from PR #389 (HAD ecosystem) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #389 added HAD code snippets to choosing_estimator.rst, troubleshooting.rst, and r_comparison.rst. None of those edits triggered rust-test.yml (which only runs on rust/, diff_diff/, tests/, pyproject.toml, and the workflow file), so tests/test_doc_snippets.py never executed and the snippets shipped with five latent bugs that now surface on every code PR via the Pure Python Fallback job. Bugs addressed: - r_comparison:block6 — bare HAD.fit(data, ...) with the generate_staggered_data fixture failed because the default aggregate='overall' requires exactly 2 periods and the namespace data has 10. Replaced with an inline HAD-shape panel construction (mirrors the upstream choosing_estimator:block7 fix in 55d7a27) plus aggregate='event_study'. - troubleshooting:block20 — the snippet demonstrates first_treat_col= auto-filtering on a staggered panel. The fixture's first_treat values disagree with the dose path (random per-row dose on never-treated units), tripping HAD's first_treat / dose-path consistency validator. Inlined a 120-unit / 10-period staggered HAD-shape panel (30 never + 30 cohort 5 + 60 cohort 8) so the validator passes and the boundary local-linear estimator has enough distinct dose values to fit. - troubleshooting:block17 / block18 / r_comparison:block7 — these are legitimately context-dependent snippets that reference est / results from prior text-flow context (inspection / output-format examples). Added them to _CONTEXT_DEPENDENT_SNIPPETS so the expected NameError is suppressed, matching the pattern already used for block8, the api_bacon blocks, and the existing r_comparison context-dependent set. choosing_estimator:block7 was the sixth failing snippet but was already fixed upstream in 55d7a27 with the inline-construction pattern; this branch rebases onto that. Verification: PYTHONPATH=. DIFF_DIFF_BACKEND=python pytest tests/test_doc_snippets.py reports 111 passed, 4 skipped, 0 failed on this branch (was 6 failed on origin/main before 55d7a27 and 5 failed after). Follow-up (separate PR queued): carve test_doc_snippets.py out into a dedicated docs-tests.yml workflow triggered on docs/** + diff_diff/** + the test file itself, and exclude it from rust-test.yml's pytest invocations so doc bugs are caught on doc PRs (not silently inherited by code PRs). Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/r_comparison.rst | 21 +++++++++++++++++++-- docs/troubleshooting.rst | 24 ++++++++++++++++++++++++ tests/test_doc_snippets.py | 3 +++ 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/docs/r_comparison.rst b/docs/r_comparison.rst index 9a641691..fdac4d54 100644 --- a/docs/r_comparison.rst +++ b/docs/r_comparison.rst @@ -237,11 +237,28 @@ identification assumptions (the design path is auto-detected separately by .. code-block:: python + import numpy as np + import pandas as pd from diff_diff import HeterogeneousAdoptionDiD + # Build a HAD-shape panel: D=0 in pre-periods (t < F), D > 0 only at F+. + rng = np.random.default_rng(42) + G, F, T = 200, 4, 5 + doses = rng.beta(0.5, 1.0, size=G) + rows = [] + for g in range(G): + for t in range(1, T + 1): + y = (rng.normal() + + (doses[g] + doses[g] ** 2) * (t >= F) + + rng.normal(0, 0.5)) + d = doses[g] if t >= F else 0.0 + rows.append({'unit': g, 'period': t, 'y': y, 'dose': d}) + had_data = pd.DataFrame(rows) + est = HeterogeneousAdoptionDiD() - results = est.fit(data, outcome_col='y', unit_col='unit', - time_col='period', dose_col='dose') + results = est.fit(had_data, outcome_col='y', unit_col='unit', + time_col='period', dose_col='dose', + aggregate='event_study') Key Differences --------------- diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index d438e36c..671cb649 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -593,6 +593,30 @@ a ``UserWarning``). The fit raises only when the panel is staggered .. code-block:: python + import numpy as np + import pandas as pd + + # Build a staggered HAD panel for this example: 120 units, three + # cohorts (30 never-treated + 30 treated at period 5 + 60 treated at + # period 8). Dose is zero pre-treatment per unit and a constant + # positive value post-treatment, so the first_treat / dose-path + # consistency validator passes. The 60-unit last cohort gives the + # boundary local-linear estimator enough distinct dose values to fit. + np.random.seed(42) + n_units, n_periods = 120, 10 + first_treat_per_unit = np.array([0] * 30 + [5] * 30 + [8] * 60) + dose_per_unit = np.where( + first_treat_per_unit > 0, np.random.uniform(0.5, 2.0, n_units), 0.0 + ) + rows = [] + for u in range(n_units): + ft = first_treat_per_unit[u] + for t in range(n_periods): + d_ut = dose_per_unit[u] if (ft > 0 and t >= ft) else 0.0 + y_ut = (d_ut > 0) * dose_per_unit[u] * 0.5 + np.random.normal() + rows.append((u, t, d_ut, ft, y_ut)) + data = pd.DataFrame(rows, columns=["unit", "period", "dose", "first_treat", "y"]) + # Primary remedy: pass `first_treat_col` so the estimator auto-filters # to the last-treatment cohort + never-treated and emits a UserWarning. est = HeterogeneousAdoptionDiD() diff --git a/tests/test_doc_snippets.py b/tests/test_doc_snippets.py index 8b02b565..f766af57 100644 --- a/tests/test_doc_snippets.py +++ b/tests/test_doc_snippets.py @@ -366,7 +366,10 @@ def _restore_datasets_module(): "r_comparison:block3", "r_comparison:block4", "r_comparison:block6", + "r_comparison:block7", "troubleshooting:block8", + "troubleshooting:block17", + "troubleshooting:block18", } From 8ccfadf92d3c9f007bed5cf557e314376db7e4c8 Mon Sep 17 00:00:00 2001 From: igerber Date: Sun, 26 Apr 2026 14:54:54 -0400 Subject: [PATCH 2/3] Address PR #396 R1 review (1 P1 + 1 P2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P1: troubleshooting:block17/block18 are not actually context-dependent within docs/troubleshooting.rst — the page has no prior HAD est/results binding before the new HAD Issues section. Suppressing NameError on those IDs hid broken docs (the readers' copy-paste would fail) while CI stopped reporting the failure. Fix: rewrote both snippets self-contained, mirroring the inline HAD- shape panel construction pattern from PR #396's r_comparison:block6 fix and the upstream choosing_estimator:block7 fix in 55d7a27. - block17 (Resolved estimand inspection): inline 200-unit / 5-period HAD panel with beta(0.5, 1.0) doses — d.min() near zero so the Design 1' (continuous_at_zero) detection rule fires and `target_ parameter == "WAS"` for the inspection demo. - block18 (Mass-point design selected): inline 200-unit / 5-period HAD panel where 30% of units share d_lower=0.5 so the modal-fraction-at-d.min() > 2% threshold trips and `_detect_design` resolves to mass_point. Verified locally: design='mass_point', target_parameter='WAS_d_lower'. Both snippets now define `est` and `results` locally; removed troubleshooting:block17 and troubleshooting:block18 from _CONTEXT_DEPENDENT_SNIPPETS. P2: r_comparison:block6 was already in _CONTEXT_DEPENDENT_SNIPPETS from a pre-existing entry, but PR #396's earlier rewrite already made it self-contained. The stale allowlist entry would mask future NameError regressions. Removed. Verification: PYTHONPATH=. DIFF_DIFF_BACKEND=python pytest tests/test_doc_snippets.py reports 111 passed, 4 skipped, 0 failed. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/troubleshooting.rst | 60 ++++++++++++++++++++++++++++++++++---- tests/test_doc_snippets.py | 3 -- 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index 671cb649..ea7513b9 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -498,17 +498,36 @@ is meaningfully positive relative to the dose scale. .. code-block:: python - # Inspect the dose support before fitting import numpy as np - d = data['dose'].to_numpy() - print(data['dose'].describe()) + import pandas as pd + from diff_diff import HeterogeneousAdoptionDiD + + # Build a HAD-shape panel: D=0 in pre-periods (t < F), D > 0 only at F+. + rng = np.random.default_rng(42) + G, F, T = 200, 4, 5 + doses = rng.beta(0.5, 1.0, size=G) + rows = [] + for g in range(G): + for t in range(1, T + 1): + y = (rng.normal() + + (doses[g] + doses[g] ** 2) * (t >= F) + + rng.normal(0, 0.5)) + d = doses[g] if t >= F else 0.0 + rows.append({'unit': g, 'period': t, 'y': y, 'dose': d}) + had_data = pd.DataFrame(rows) + + # Inspect the dose support before fitting + d = had_data['dose'].to_numpy() + print(had_data['dose'].describe()) print(f"d.min() = {d.min():.6g}; " f"0.01 * median(|d|) = {0.01 * np.median(np.abs(d)):.6g}; " f"d.min() < threshold => Design 1' (WAS)") # Check the resolved estimand after fitting - results = est.fit(data, outcome_col='y', unit_col='unit', - time_col='period', dose_col='dose') + est = HeterogeneousAdoptionDiD() + results = est.fit(had_data, outcome_col='y', unit_col='unit', + time_col='period', dose_col='dose', + aggregate='event_study') print(f"Resolved: {results.target_parameter}") # If you intend Design 1' but `d.min()` exceeds the threshold, verify @@ -536,6 +555,37 @@ SE path is not used here). .. code-block:: python + import numpy as np + import pandas as pd + from diff_diff import HeterogeneousAdoptionDiD + + # Build a HAD panel with a heavy boundary mass at d_lower so the + # modal fraction at d.min() exceeds 2% and `_detect_design` resolves + # to `mass_point`. + rng = np.random.default_rng(42) + G, F, T = 200, 4, 5 + d_lower = 0.5 + mass_frac = 0.3 + doses = np.where( + rng.uniform(size=G) < mass_frac, + d_lower, + rng.uniform(d_lower + 0.1, 2.0, size=G), + ) + rows = [] + for g in range(G): + for t in range(1, T + 1): + y = (rng.normal() + + doses[g] * (t >= F) + + rng.normal(0, 0.5)) + d = doses[g] if t >= F else 0.0 + rows.append({'unit': g, 'period': t, 'y': y, 'dose': d}) + had_data = pd.DataFrame(rows) + + est = HeterogeneousAdoptionDiD() + results = est.fit(had_data, outcome_col='y', unit_col='unit', + time_col='period', dose_col='dose', + aggregate='event_study') + # Inspect the resolved design print(f"Design: {results.design}") # 'mass_point' here diff --git a/tests/test_doc_snippets.py b/tests/test_doc_snippets.py index f766af57..e03ced49 100644 --- a/tests/test_doc_snippets.py +++ b/tests/test_doc_snippets.py @@ -365,11 +365,8 @@ def _restore_datasets_module(): "r_comparison:block2", "r_comparison:block3", "r_comparison:block4", - "r_comparison:block6", "r_comparison:block7", "troubleshooting:block8", - "troubleshooting:block17", - "troubleshooting:block18", } From 0328b4a5ff558494fa3a413c293a9ce0d7b4c157 Mon Sep 17 00:00:00 2001 From: igerber Date: Sun, 26 Apr 2026 15:07:38 -0400 Subject: [PATCH 3/3] Address PR #396 R2 review (1 P1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P1: The "Resolved estimand is not what I expected" inspection snippet computed `d.min()` over `had_data['dose']` (the full panel column), but HAD's `_detect_design()` resolves on `D_{g,F}` — per-unit dose at the first treated period — not the full panel. Per `diff_diff/had.py:1834-1888` (event-study path) and `diff_diff/had.py:4089-4091` (`_detect_design(d_arr)` where `d_arr` is `D_{g,F}`), the detector ignores the structural pre-period zeros that HAD requires (`D_{g,t} = 0` for `t < F`). Consequence: on every valid HAD panel, `had_data['dose'].min()` is always 0 and the snippet would report "Design 1' (WAS)" regardless of the true resolution — exactly the false sense of confirmation the troubleshooting page is meant to dispel. Fix: rewrote the snippet to extract `d_at_F = had_data.loc[ had_data['period'] == F].set_index('unit')['dose']` (per-unit post-period dose, mirroring `had.py:1886-1888`) and compute the threshold check on that series. Renamed printed labels from `d.min()` to `D_{g,F}.min()` and `0.01 * median(|d|)` to `0.01 * median(|D_{g,F}|)` so the diagnostic syntactically matches the registry's rule statement. Updated the surrounding **Cause** prose at `docs/troubleshooting.rst :486-495` to (a) state explicitly that detection runs on `D_{g,F}` not the panel column, (b) note that pre-period zeros on the panel column are structural and uninformative for design choice, and (c) restate the threshold rule and modal-fraction check on `D_{g,F}`. Verification: PYTHONPATH=. DIFF_DIFF_BACKEND=python pytest tests/test_doc_snippets.py reports 111 passed, 4 skipped, 0 failed. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/troubleshooting.rst | 46 +++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index ea7513b9..99f97ba8 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -483,16 +483,20 @@ HeterogeneousAdoptionDiD (HAD) Issues **Problem:** ``HeterogeneousAdoptionDiD`` resolves ``target_parameter`` to ``"WAS_d_lower"`` when you expected ``"WAS"`` (or vice versa). -**Cause:** HAD auto-detects the design path from the dose distribution. The -``_detect_design`` rule resolves to Design 1' (``continuous_at_zero``, -targets WAS) when EITHER ``d.min() == 0`` exactly OR ``d.min()`` is a small -positive value below ``0.01 * median(|d|)`` (the small-share-of-treated -escape clause). Otherwise (``d.min()`` larger than that threshold) the -estimator routes to Design 1, with a further check for mass-point structure -(modal fraction at ``d.min()`` exceeding 2% routes to ``mass_point``; -otherwise ``continuous_near_d_lower``); both Design 1 paths target -``WAS_{d_lower}``. So a Design 1 resolution only fires when ``d.min()`` -is meaningfully positive relative to the dose scale. +**Cause:** HAD auto-detects the design path from the unit-level +post-treatment dose ``D_{g,F}`` (the dose at the first treated period +``F``, one value per unit), NOT from the full panel ``dose`` column. The +panel column carries structural pre-period zeros (HAD requires +``D_{g,t} = 0`` for ``t < F``), so ``had_data['dose'].min()`` is always +zero on a valid HAD panel and tells you nothing about the resolved +design. ``_detect_design`` then resolves on ``D_{g,F}`` and picks Design +1' (``continuous_at_zero``, targets WAS) when EITHER +``D_{g,F}.min() == 0`` exactly OR ``D_{g,F}.min()`` is a small positive +value below ``0.01 * median(|D_{g,F}|)`` (the small-share-of-treated +escape clause). Otherwise the estimator routes to Design 1, with a +further check for mass-point structure (modal fraction at ``D_{g,F}.min()`` +exceeding 2% routes to ``mass_point``; otherwise +``continuous_near_d_lower``); both Design 1 paths target ``WAS_{d_lower}``. **Solutions:** @@ -516,12 +520,16 @@ is meaningfully positive relative to the dose scale. rows.append({'unit': g, 'period': t, 'y': y, 'dose': d}) had_data = pd.DataFrame(rows) - # Inspect the dose support before fitting - d = had_data['dose'].to_numpy() - print(had_data['dose'].describe()) - print(f"d.min() = {d.min():.6g}; " - f"0.01 * median(|d|) = {0.01 * np.median(np.abs(d)):.6g}; " - f"d.min() < threshold => Design 1' (WAS)") + # Inspect the support the detector actually uses: per-unit dose at the + # first treated period F. Pre-period zeros on the panel column are + # structural and ignored by `_detect_design()`. + d_at_F = had_data.loc[had_data['period'] == F].set_index('unit')['dose'] + print(d_at_F.describe()) + d_min = float(d_at_F.min()) + d_thr = 0.01 * float(np.median(np.abs(d_at_F))) + print(f"D_{{g,F}}.min() = {d_min:.6g}; " + f"0.01 * median(|D_{{g,F}}|) = {d_thr:.6g}; " + f"D_{{g,F}}.min() < threshold => Design 1' (WAS)") # Check the resolved estimand after fitting est = HeterogeneousAdoptionDiD() @@ -530,9 +538,9 @@ is meaningfully positive relative to the dose scale. aggregate='event_study') print(f"Resolved: {results.target_parameter}") - # If you intend Design 1' but `d.min()` exceeds the threshold, verify - # the dose-variable encoding (e.g. log-transformed doses where 0 was - # mapped to a small positive value larger than 1% of the median). + # If you intend Design 1' but `D_{g,F}.min()` exceeds the threshold, + # verify the dose-variable encoding (e.g. log-transformed doses where + # 0 was mapped to a small positive value larger than 1% of the median). "Mass-point design selected" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~