PolicyEngine · anth-volk · Apr 14, 2026 · Apr 15, 2026 · Apr 24, 2026 · Apr 28, 2026
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
@@ -0,0 +1,7 @@
+# Copilot Instructions
+
+Follow the repository's canonical engineering skills under
+`docs/engineering/skills/`.
+
+For tests, read `docs/engineering/skills/testing.md` before adding, moving, or
+reviewing test files. Do not duplicate or override that testing guidance here.
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -23,11 +23,11 @@ jobs:
     runs-on: ubuntu-latest
     needs: check-fork
     steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/checkout@v6
+      - uses: actions/setup-python@v6
         with:
           python-version: "3.14"
-      - uses: astral-sh/setup-uv@v5
+      - uses: astral-sh/setup-uv@v8.1.0
       - name: Check lock file is up-to-date
         run: |
           uv lock --locked || {
@@ -39,21 +39,33 @@ jobs:
     runs-on: ubuntu-latest
     needs: check-fork
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
       - run: pip install ruff>=0.9.0
       - run: ruff format --check .
 
+  quality-guards:
+    name: Quality guards
+    runs-on: ubuntu-latest
+    needs: check-fork
+    steps:
+      - uses: actions/checkout@v6
+      - uses: actions/setup-python@v6
+        with:
+          python-version: "3.14"
+      - name: Run quality guards
+        run: python scripts/run_quality_guards.py
+
   check-changelog:
     runs-on: ubuntu-latest
     needs: check-fork
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           fetch-depth: 0
-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@v6
         with:
           python-version: "3.14"
-      - uses: astral-sh/setup-uv@v5
+      - uses: astral-sh/setup-uv@v8.1.0
       - run: uv sync --dev
       - name: Check for changelog fragment
         run: uv run towncrier check --compare-with origin/main
@@ -62,11 +74,11 @@ jobs:
     runs-on: ubuntu-latest
     needs: [check-fork, lint]
     steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/checkout@v6
+      - uses: actions/setup-python@v6
         with:
           python-version: "3.14"
-      - uses: astral-sh/setup-uv@v5
+      - uses: astral-sh/setup-uv@v8.1.0
       - run: uv sync --dev
       - name: Run unit tests with coverage
         env:
@@ -86,18 +98,11 @@ jobs:
         env:
           CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
 
-  optimized-integration-tests:
+  target-integration-tests:
     runs-on: ubuntu-latest
-    needs:
-      [
-        check-fork,
-        check-lock-freshness,
-        lint,
-        check-changelog,
-        unit-tests,
-        smoke-test,
-        docs-build,
-      ]
+    needs: [check-fork, lint, unit-tests, smoke-test, decide-test-scope]
+    if: needs.decide-test-scope.outputs.run_integration == 'true'
+    name: Integration tests
     env:
       MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
       MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
@@ -107,36 +112,51 @@ jobs:
       MODAL_APP_NAME: policyengine-us-data-pipeline
       MODAL_LOCAL_AREA_APP_NAME: policyengine-us-data-local-area
       MODAL_H5_TEST_HARNESS_APP_NAME: policyengine-us-data-h5-test-harness
-    name: Optimized integration tests (PR staging)
     steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/checkout@v6
+      - uses: actions/setup-python@v6
         with:
           python-version: "3.14"
-      - name: Install optimized test deps
-        run: pip install modal pytest numpy pandas
+      - uses: astral-sh/setup-uv@v8.1.0
+      - run: uv sync --dev
+      - name: Install integration test deps
+        run: uv pip install modal pytest numpy pandas
       - name: Ensure PR Modal environment exists
-        run: python .github/scripts/ensure_modal_environment.py
+        run: uv run python .github/scripts/ensure_modal_environment.py
       - name: Sync Modal secrets to PR environment
-        run: python .github/scripts/sync_modal_secrets.py
+        run: uv run python .github/scripts/sync_modal_secrets.py
       - name: Deploy Modal pipeline app to PR staging
-        run: modal deploy --env="${MODAL_ENVIRONMENT}" modal_app/pipeline.py
+        run: uv run modal deploy --env="${MODAL_ENVIRONMENT}" modal_app/pipeline.py
       - name: Deploy Modal local-area app to PR staging
-        run: modal deploy --env="${MODAL_ENVIRONMENT}" modal_app/local_area.py
+        run: uv run modal deploy --env="${MODAL_ENVIRONMENT}" modal_app/local_area.py
       - name: Deploy Modal H5 test harness to PR staging
-        run: modal deploy --env="${MODAL_ENVIRONMENT}" modal_app/h5_test_harness.py
-      - name: Run optimized integration tests against PR staging
-        run: python -m pytest tests/optimized/ -v
+        run: uv run modal deploy --env="${MODAL_ENVIRONMENT}" modal_app/h5_test_harness.py
+      - name: Run integration tests
+        run: >
+          uv run pytest
+          tests/integration/test_tiny_pipeline_workspace.py
+          tests/integration/test_tiny_stage_1_artifacts.py
+          tests/integration/test_tiny_stage_2_artifacts.py
+          tests/integration/test_tiny_stage_3_artifacts.py
+          tests/integration/test_tiny_stage_4_artifacts.py
+          tests/integration/test_tiny_stage_5_artifacts.py
+          tests/integration/test_tiny_pipeline_e2e.py
+          tests/integration/test_tiny_pipeline_h5_e2e.py
+          tests/integration/local_h5/
+          tests/integration/test_modal_pipeline_seams.py
+          tests/integration/test_tiny_h5_pipeline.py
+          tests/integration/test_modal_pipeline_e2e.py
+          -v
       - name: Cleanup PR Modal environment
         if: always()
-        run: python .github/scripts/delete_modal_environment.py
+        run: uv run python .github/scripts/delete_modal_environment.py
 
   smoke-test:
     runs-on: ubuntu-latest
     needs: [check-fork, lint]
     steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/checkout@v6
+      - uses: actions/setup-python@v6
         with:
           python-version: "3.14"
       - run: python -m pip install .
@@ -147,14 +167,14 @@ jobs:
     runs-on: ubuntu-latest
     needs: [check-fork]
     steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/checkout@v6
+      - uses: actions/setup-python@v6
         with:
           python-version: "3.14"
-      - uses: actions/setup-node@v4
+      - uses: actions/setup-node@v6
         with:
           node-version: "24"
-      - uses: astral-sh/setup-uv@v5
+      - uses: astral-sh/setup-uv@v8.1.0
       - run: uv sync --dev
       - name: Test documentation builds
         run: uv run make documentation
@@ -165,51 +185,15 @@ jobs:
     outputs:
       run_integration: ${{ steps.check.outputs.run_integration }}
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           fetch-depth: 0
       - name: Check changed files for integration scope
         id: check
         run: |
           CHANGED=$(git diff --name-only origin/main...HEAD)
-          if echo "$CHANGED" | grep -qE '^(policyengine_us_data/|modal_app/|tests/integration/)'; then
+          if echo "$CHANGED" | grep -qE '^(\.github/scripts/|\.github/workflows/pr\.yaml|modal_app/|policyengine_us_data/|tests/integration/|tests/support/|pyproject\.toml|uv\.lock)'; then
             echo "run_integration=true" >> "$GITHUB_OUTPUT"
           else
             echo "run_integration=false" >> "$GITHUB_OUTPUT"
           fi
-
-  integration-tests:
-    runs-on: ubuntu-latest
-    needs: [check-fork, lint, decide-test-scope]
-    if: needs.decide-test-scope.outputs.run_integration == 'true'
-    env:
-      MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
-      MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
-      MODAL_ENVIRONMENT: main
-      HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.14"
-      - run: pip install modal
-      - name: Build datasets and run integration tests on Modal
-        run: |
-          STAGE_ARGS=""
-          if git diff --name-only origin/main...HEAD | grep -qx 'pyproject.toml'; then
-            VERSION=$(python .github/fetch_version.py)
-            STAGE_ARGS="--upload --stage-only --run-id=${VERSION}"
-            {
-              echo "## Release Artifact Staging"
-              echo ""
-              echo "- package version: \`${VERSION}\`"
-              echo "- staged HF prefix: \`staging/${VERSION}/\`"
-              echo "- promote with: \`uv run python policyengine_us_data/storage/upload_completed_datasets.py --promote-only --run-id=${VERSION} --version=${VERSION}\`"
-            } >> "$GITHUB_STEP_SUMMARY"
-          fi
-
-          modal run --env="${MODAL_ENVIRONMENT}" modal_app/data_build.py \
-            --branch=${{ github.head_ref || github.ref_name }} \
-            ${STAGE_ARGS}
diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml
@@ -12,9 +12,9 @@ jobs:
       - run: pip install ruff>=0.9.0
       - run: ruff format --check .
 
-  # ── Build and linear integration tests ──────────────────────
-  build-and-linear-integration-tests:
-    name: Build and linear integration tests
+  # ── Dataset build ───────────────────────────────────────────
+  build-datasets:
+    name: Build datasets
     runs-on: ubuntu-latest
     needs: lint
     if: github.event.head_commit.message != 'Update package version'
@@ -29,7 +29,7 @@ jobs:
         with:
           python-version: "3.14"
       - run: pip install modal
-      - name: Run linear integration tests on Modal
+      - name: Build datasets on Modal
         run: |
           modal run --env="${MODAL_ENVIRONMENT}" modal_app/data_build.py \
             --upload \

diff --git a/AGENTS.md b/AGENTS.md
@@ -0,0 +1,14 @@
+# Codex Instructions
+
+These instructions apply repository-wide.
+
+## Skills system
+
+Canonical AI-facing engineering skills live under `docs/engineering/skills/`.
+Use those files as the source of truth across Codex, Claude, Copilot, and other
+AI tools.
+
+When adding, moving, or reviewing tests, read
+`docs/engineering/skills/testing.md`. Do not put pytest files under
+`policyengine_us_data/tests/`, do not import from `tests.conftest`, and do not
+import helpers across test lanes.
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -7,25 +7,33 @@
 
 ## Testing
 
+Canonical testing guidance lives in `docs/engineering/skills/testing.md`. If
+this file conflicts with that skill, follow the skill and update this adapter.
+
 ### Running Tests
 - `make test-unit` - Run unit tests only (fast, no data dependencies)
 - `make test-integration` - Run integration tests (requires built H5 datasets)
 - `make test` - Run all tests
 - `pytest tests/unit/ -v` - Unit tests directly
 - `pytest tests/integration/test_cps.py -v` - Specific integration test
+- `python scripts/run_quality_guards.py` - Run layout/import quality guards
 
 ### Test Organization
-Tests are in the top-level `tests/` directory, split into two sub-directories:
+Tests are in the top-level `tests/` directory, split into these sub-directories:
 
 - **`tests/unit/`** — Self-contained tests that use synthetic data, mocks, patches, or checked-in fixtures. Run in seconds with no external dependencies.
   - `unit/datasets/` — unit tests for dataset code
   - `unit/calibration/` — unit tests for calibration code
 
 - **`tests/integration/`** — Tests that require built H5 datasets, HuggingFace downloads, Microsimulation objects, or database ETL. Named after the dataset they test.
+- **`tests/optimized/`** — Tests that exercise deployed Modal/staging seams.
 
 ### Test Placement Rules
+- **NEVER** put pytest files under `policyengine_us_data/tests/`; CI does not collect that tree
 - **NEVER** put tests that require H5 files or Microsimulation in `unit/`
 - **NEVER** put tests that use only synthetic data or mocks in `integration/`
+- **NEVER** import from `tests.conftest`; fixtures are discovered automatically and helper functions belong in local support modules
+- **NEVER** import helpers across test lanes, such as `tests.unit` from an integration test
 - Integration test files are named after their dataset dependency: `test_cps.py` tests `cps_2024.h5`
 - Sanity checks (value ranges, population counts) belong in the per-dataset integration test file, not in a separate sanity file
 - When adding a new integration test, add it to the existing per-dataset file if one exists

diff --git a/changelog.d/760.added.md b/changelog.d/760.added.md
@@ -0,0 +1 @@
+Added local H5 traceability metadata and scope fingerprinting for calibration artifacts.
diff --git a/changelog.d/test-quality-guards.changed.md b/changelog.d/test-quality-guards.changed.md
@@ -0,0 +1 @@
+Add quality guards for test layout and document the testing skill for AI tooling.
diff --git a/docs/engineering/skills/README.md b/docs/engineering/skills/README.md
@@ -0,0 +1,13 @@
+# Engineering Skills
+
+This directory is the canonical source for AI-facing engineering rules.
+
+Tool-specific instruction files such as `AGENTS.md`, `CLAUDE.md`, and
+`.github/copilot-instructions.md` should point here instead of duplicating
+implementation-specific guidance. When a rule changes, update the skill here
+first, then keep adapters thin.
+
+Current skills:
+
+- `testing.md`: test layout, fixture scope, helper placement, and quality guard
+  expectations.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Added local H5 traceability metadata and scope fingerprinting for calibration artifacts.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Add quality guards for test layout and document the testing skill for AI tooling.