From 536c4ce89f3147481fb5e5ec16eb1066c6c7cdf8 Mon Sep 17 00:00:00 2001 From: minion1227 Date: Tue, 30 Jun 2026 19:19:26 -0700 Subject: [PATCH 1/2] fix(models): reject empty text/name/title on claim/entity/page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the non-empty contract for `Claim.text`, `Entity.name`, and `Page.title` lived only in the `propose_*` helpers, so `store.put_*`, `store.update_*`, and bundle/sync import (via `_validate_content`) all silently accepted empty or whitespace-only values and landed artifacts carrying zero of the field's semantic content. add `@field_validator`s on each field, mirroring the existing `Claim.evidence` min-citation validator (#81/#82): they reject blank input at construction time, so every write path — direct construction, put/update, and import — inherits the check at once. the validators gate on emptiness only and preserve surrounding whitespace of non-blank values. `Source.locator` is deliberately left out of scope — its format question is bigger and deserves its own issue, as the report notes. closes #155 --- CHANGELOG.md | 1 + src/vouch/models.py | 30 +++++++++++++++++++ tests/test_storage.py | 67 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f4d85de..eb9a94bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -96,6 +96,7 @@ All notable changes to vouch are documented here. Format follows KB under `eval/fixture-kb/`, and an `eval` workflow gating retrieval changes (#226). ### Fixed +- `Claim.text`, `Entity.name`, and `Page.title` now reject empty / whitespace-only values at the model layer via `@field_validator`s, mirroring the `Claim.evidence` min-citation validator (#81/#82). the non-empty contract previously lived only in the `propose_*` helpers, so `store.put_*`, `store.update_*`, and bundle/sync import all silently accepted blank-content artifacts; enforcing on the model closes every write path at once (fixes #155). - `parse_since` (the `--since` parser behind `vouch metrics`/`vouch audit`) now raises a clean `MetricsError` for a duration too large to represent (e.g. `--since 1000000000000d`), instead of letting an uncaught `OverflowError` traceback escape — restoring the documented "clean error, not a traceback" contract. - `sync_apply` now loads the sync source exactly once and passes the same `_SyncSource` instance into `sync_check`, closing a TOCTOU window where a bundle replaced on disk between the two `_load_source` calls could cause the validation and write phases to operate on different snapshots. Also eliminates redundant directory walks (KB sources) and triple tarball opens (bundle sources). Fixes #217. - `vault_to_kb` now passes `slug_hint=page_id` to `propose_page` so vault edit proposals target the existing page id from frontmatter instead of a slugified copy of the title (fixes #219). diff --git a/src/vouch/models.py b/src/vouch/models.py index 23f94aa4..d08ed8c0 100644 --- a/src/vouch/models.py +++ b/src/vouch/models.py @@ -231,6 +231,18 @@ def _at_least_one_citation(cls, v: list[str]) -> list[str]: "(README §'Object model'; CONTRIBUTING §'Things we won't merge')" ) return v + + @field_validator("text") + @classmethod + def _text_non_empty(cls, v: str) -> str: + # Same shape as _at_least_one_citation: the non-empty contract lived + # only in proposals.propose_claim, so store.put_claim, + # store.update_claim, and bundle.import_apply via _validate_content + # accepted text="" / whitespace and landed a claim carrying zero + # semantic content. Enforce on the model to close all paths at once. + if not v.strip(): + raise ValueError("claim text must not be empty") + return v entities: list[str] = Field(default_factory=list) supersedes: list[str] = Field(default_factory=list) superseded_by: str | None = None @@ -266,6 +278,15 @@ class Entity(BaseModel): created_at: datetime = Field(default_factory=utcnow) updated_at: datetime = Field(default_factory=utcnow) + @field_validator("name") + @classmethod + def _name_non_empty(cls, v: str) -> str: + # See Claim._text_non_empty — the propose_entity check alone left + # store.put_entity and bundle import accepting name="" / whitespace. + if not v.strip(): + raise ValueError("entity name must not be empty") + return v + class Relation(BaseModel): """Typed edge between entities / claims / pages.""" @@ -314,6 +335,15 @@ def _normalize_type(cls, v: Any) -> str: raise ValueError("page type must be a non-empty string") return v.strip() + @field_validator("title") + @classmethod + def _title_non_empty(cls, v: str) -> str: + # See Claim._text_non_empty — the propose_page check alone left + # store.put_page and bundle import accepting title="" / whitespace. + if not v.strip(): + raise ValueError("page title must not be empty") + return v + # --- audit + sessions ----------------------------------------------------- diff --git a/tests/test_storage.py b/tests/test_storage.py index ce1d643f..7c3098ad 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -196,6 +196,47 @@ def test_update_claim_rejects_empty_evidence(store: KBStore) -> None: assert (store.kb_dir / "claims" / "c1.yaml").read_text() == persisted_before +@pytest.mark.parametrize("bad", ["", " ", "\n\t "]) +def test_claim_model_rejects_empty_text(store: KBStore, bad: str) -> None: + """Regression for #155: same shape as the #81 evidence validator, on the + text field. Empty / whitespace-only text is rejected at the model layer so + direct construction, store.put_claim, and bundle import all inherit it.""" + src = store.put_source(b"e") + with pytest.raises(ValidationError, match="text must not be empty"): + Claim(id="c1", text=bad, evidence=[src.id]) + + +def test_put_claim_rejects_empty_text(store: KBStore) -> None: + src = store.put_source(b"e") + with pytest.raises(ValidationError, match="text must not be empty"): + store.put_claim(Claim(id="c1", text=" ", evidence=[src.id])) + assert not (store.kb_dir / "claims" / "c1.yaml").exists() + + +def test_update_claim_rejects_empty_text(store: KBStore) -> None: + """A previously-populated claim cannot be mutated down to blank text and + silently re-persisted — update_claim re-validates via model_validate.""" + src = store.put_source(b"e") + store.put_claim(Claim(id="c1", text="cited", evidence=[src.id])) + persisted_before = (store.kb_dir / "claims" / "c1.yaml").read_text() + + c = store.get_claim("c1") + c.text = " " + + with pytest.raises(ValidationError, match="text must not be empty"): + store.update_claim(c) + assert (store.kb_dir / "claims" / "c1.yaml").read_text() == persisted_before + + +def test_claim_text_preserves_surrounding_whitespace_when_non_blank( + store: KBStore, +) -> None: + """The validator gates on emptiness only — it must not strip content.""" + src = store.put_source(b"e") + c = Claim(id="c1", text=" padded text ", evidence=[src.id]) + assert c.text == " padded text " + + # --- pages ---------------------------------------------------------------- @@ -217,6 +258,19 @@ def test_page_with_frontmatter_round_trip(store: KBStore) -> None: assert back.type == PageType.CONCEPT +@pytest.mark.parametrize("bad", ["", " ", "\n"]) +def test_page_model_rejects_empty_title(bad: str) -> None: + """Regression for #155 — empty / whitespace titles rejected on the model.""" + with pytest.raises(ValidationError, match="title must not be empty"): + Page(id="p1", title=bad) + + +def test_put_page_rejects_empty_title(store: KBStore) -> None: + with pytest.raises(ValidationError, match="title must not be empty"): + store.put_page(Page(id="p1", title=" ")) + assert not (store.kb_dir / "pages" / "p1.md").exists() + + # --- entities + relations ------------------------------------------------- @@ -226,6 +280,19 @@ def test_entity_round_trip(store: KBStore) -> None: assert back.name == "Foo" +@pytest.mark.parametrize("bad", ["", " ", "\t\n"]) +def test_entity_model_rejects_empty_name(bad: str) -> None: + """Regression for #155 — empty / whitespace names rejected on the model.""" + with pytest.raises(ValidationError, match="name must not be empty"): + Entity(id="e1", name=bad, type=EntityType.CONCEPT) + + +def test_put_entity_rejects_empty_name(store: KBStore) -> None: + with pytest.raises(ValidationError, match="name must not be empty"): + store.put_entity(Entity(id="e1", name=" ", type=EntityType.CONCEPT)) + assert not (store.kb_dir / "entities" / "e1.yaml").exists() + + def test_relation_round_trip(store: KBStore) -> None: store.put_entity(Entity(id="a", name="A", type=EntityType.PROJECT)) store.put_entity(Entity(id="b", name="B", type=EntityType.PROJECT)) From 89815c0088771cf70d9d6406138768b011c78a74 Mon Sep 17 00:00:00 2001 From: minion1227 Date: Tue, 30 Jun 2026 19:25:45 -0700 Subject: [PATCH 2/2] refactor(models): extract shared _require_non_empty validator helper address review on #300: the three #155 field validators (Claim.text / Entity.name / Page.title) were structurally identical strip-checks. extract a module-level `_require_non_empty(v, label)` helper they all delegate to, keeping the per-field error messages. also fix a mid-entry sentence casing in the CHANGELOG. no behaviour change. --- CHANGELOG.md | 2 +- src/vouch/models.py | 24 +++++++++++++++--------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb9a94bc..4cacf1aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -96,7 +96,7 @@ All notable changes to vouch are documented here. Format follows KB under `eval/fixture-kb/`, and an `eval` workflow gating retrieval changes (#226). ### Fixed -- `Claim.text`, `Entity.name`, and `Page.title` now reject empty / whitespace-only values at the model layer via `@field_validator`s, mirroring the `Claim.evidence` min-citation validator (#81/#82). the non-empty contract previously lived only in the `propose_*` helpers, so `store.put_*`, `store.update_*`, and bundle/sync import all silently accepted blank-content artifacts; enforcing on the model closes every write path at once (fixes #155). +- `Claim.text`, `Entity.name`, and `Page.title` now reject empty / whitespace-only values at the model layer via `@field_validator`s, mirroring the `Claim.evidence` min-citation validator (#81/#82). The non-empty contract previously lived only in the `propose_*` helpers, so `store.put_*`, `store.update_*`, and bundle/sync import all silently accepted blank-content artifacts; enforcing on the model closes every write path at once (fixes #155). - `parse_since` (the `--since` parser behind `vouch metrics`/`vouch audit`) now raises a clean `MetricsError` for a duration too large to represent (e.g. `--since 1000000000000d`), instead of letting an uncaught `OverflowError` traceback escape — restoring the documented "clean error, not a traceback" contract. - `sync_apply` now loads the sync source exactly once and passes the same `_SyncSource` instance into `sync_check`, closing a TOCTOU window where a bundle replaced on disk between the two `_load_source` calls could cause the validation and write phases to operate on different snapshots. Also eliminates redundant directory walks (KB sources) and triple tarball opens (bundle sources). Fixes #217. - `vault_to_kb` now passes `slug_hint=page_id` to `propose_page` so vault edit proposals target the existing page id from frontmatter instead of a slugified copy of the title (fixes #219). diff --git a/src/vouch/models.py b/src/vouch/models.py index d08ed8c0..3855d6e4 100644 --- a/src/vouch/models.py +++ b/src/vouch/models.py @@ -82,6 +82,18 @@ def _coerce_artifact_scope(value: object) -> object: return value +def _require_non_empty(v: str, label: str) -> str: + """Reject empty / whitespace-only required text fields (#155). + + Shared by the ``Claim.text`` / ``Entity.name`` / ``Page.title`` + validators. Gates on emptiness only — non-blank values pass through + unchanged, so surrounding whitespace is preserved. + """ + if not v.strip(): + raise ValueError(f"{label} must not be empty") + return v + + class ArtifactScope(BaseModel): """Structured scope: visibility tier plus optional project/agent binding.""" @@ -240,9 +252,7 @@ def _text_non_empty(cls, v: str) -> str: # store.update_claim, and bundle.import_apply via _validate_content # accepted text="" / whitespace and landed a claim carrying zero # semantic content. Enforce on the model to close all paths at once. - if not v.strip(): - raise ValueError("claim text must not be empty") - return v + return _require_non_empty(v, "claim text") entities: list[str] = Field(default_factory=list) supersedes: list[str] = Field(default_factory=list) superseded_by: str | None = None @@ -283,9 +293,7 @@ class Entity(BaseModel): def _name_non_empty(cls, v: str) -> str: # See Claim._text_non_empty — the propose_entity check alone left # store.put_entity and bundle import accepting name="" / whitespace. - if not v.strip(): - raise ValueError("entity name must not be empty") - return v + return _require_non_empty(v, "entity name") class Relation(BaseModel): @@ -340,9 +348,7 @@ def _normalize_type(cls, v: Any) -> str: def _title_non_empty(cls, v: str) -> str: # See Claim._text_non_empty — the propose_page check alone left # store.put_page and bundle import accepting title="" / whitespace. - if not v.strip(): - raise ValueError("page title must not be empty") - return v + return _require_non_empty(v, "page title") # --- audit + sessions -----------------------------------------------------