diff --git a/content-guards/README.md b/content-guards/README.md index de08a4a..ce483c9 100644 --- a/content-guards/README.md +++ b/content-guards/README.md @@ -8,6 +8,9 @@ See [ARCHITECTURE.md](ARCHITECTURE.md) for integration diagrams. - **markdown-validator**: Validates markdown with markdownlint - **token-validator**: Enforces configurable file token limits +- **sensitive-content-guard**: Blocks 7 categories of sensitive literals + (IPv4, IPv6, emails, user paths, private keys, AWS account IDs, real + domains) in Write/Edit content; first-block, second-allow flow per detector - **webfetch-guard**: Blocks outdated year references in web queries - **readme-validator**: Checks README files for required sections and badge health - **issue-limiter**: Prevents GitHub issue backlog overflow with 24h rate limiting @@ -17,6 +20,39 @@ See [ARCHITECTURE.md](ARCHITECTURE.md) for integration diagrams. No manual invocation required. All hooks activate automatically: - **token-validator** — blocks files exceeding token limits (PreToolUse: Write, Edit) +- **sensitive-content-guard** — blocks 7 categories of sensitive literals + in Write/Edit content (PreToolUse: Write, Edit). First attempt blocks + with a per-detector hint; a retry within 5 minutes for the same + `(file, detector, value)` is treated as the agent's acknowledgment and + allowed through. State persists in + `$XDG_CACHE_HOME/content-guards/sensitive-content-state.json`. + + Detectors and their allowlist anchors: + - **`ipv4`** — IPv4 outside `192.168.0.0/24`, loopback, `0.0.0.0`, + broadcast (`255.255.255.x`), link-local metadata (`169.254.169.254`). + - **`ipv6`** — IPv6 outside `::`/`::1`, `fe80::*` (link-local), + `fc00::/7` (ULA), `2001:db8::*` (RFC 3849 doc prefix), `ff00::*` + (multicast). + - **`email`** — real email addresses outside `noreply@github.com`, + `*@users.noreply.github.com`, `*@example.{com,org,net,local}`, + `*@test`, `*@localhost`, and `` shapes. + - **`absolute_user_path`** — hard-coded `/Users//` or + `/home//` outside `${USER}`, `$USER`, or `` placeholder + shapes. + - **`private_key_header`** — PEM private key markers + (`-----BEGIN … PRIVATE KEY-----`); always blocked. + - **`aws_account_id`** — bare 12-digit numbers on lines mentioning + `account_id`, `arn:aws:`, `aws_account_id`, or `:account:`; allows + `123456789012` (AWS's documented sample) and repeated-digit shapes. + - **`real_domain`** — only flags tokens whose TLD is in a focused + allowlist of ~29 popular public TLDs (`com`, `net`, `org`, `io`, + `ai`, `dev`, `app`, `co`, `cloud`, `gov`, `edu`, etc. — see + `REAL_TLDS` in the script). Anything outside that set + (filenames like `foo.py`, version strings) is left alone. + Also allows `*.example.*`, `*.test`, `*.localhost`, `*.invalid`, + `*.local`, and the project's short explicit allowlist + (`github.com`, `api.github.com`, `raw.githubusercontent.com`, + `docs.jacobpevans.com`, `runs-on.com`, `healthchecks.io`). - **webfetch-guard** — blocks outdated year references in web queries (PreToolUse: WebFetch, WebSearch) - **issue-limiter** — rate limits `gh issue create` and `gh pr create` (PreToolUse: Bash) - **branch-limiter** — limits concurrent open branches (PreToolUse: Bash) diff --git a/content-guards/hooks/hooks.json b/content-guards/hooks/hooks.json index 7435660..1a141cc 100644 --- a/content-guards/hooks/hooks.json +++ b/content-guards/hooks/hooks.json @@ -8,6 +8,11 @@ "type": "command", "command": "${CLAUDE_PLUGIN_ROOT}/scripts/validate-token-limits.py", "timeout": 30 + }, + { + "type": "command", + "command": "${CLAUDE_PLUGIN_ROOT}/scripts/validate-sensitive-content.py", + "timeout": 30 } ] }, diff --git a/content-guards/scripts/validate-sensitive-content.py b/content-guards/scripts/validate-sensitive-content.py new file mode 100755 index 0000000..cc12a75 --- /dev/null +++ b/content-guards/scripts/validate-sensitive-content.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python3 +"""PreToolUse hook: block sensitive content in Write/Edit. See README.""" +from __future__ import annotations + +import json +import os +import re +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Callable, Optional + +VERSION_PIN = re.compile(r"\brev:\s*v?\d") +HASH_LINE = re.compile(r"\b(?:sha\d+|md5|cas)[-: ]", re.IGNORECASE) +EMAIL_PLACEHOLDER = re.compile(r"<[A-Za-z][A-Za-z0-9._-]*@[A-Za-z0-9._-]*>") +LINK_REF = re.compile(r"^\s*\[[^\]]+\]:\s") +REPO_LINE = re.compile(r"^\s*repo:\s") +IMAGE_LINE = re.compile(r"^\s*image:\s") + +_OCT = r"(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)" +IPV4 = re.compile(rf"(? bool: + return any(p.match(v) for p in _IPV4_OK) + + +def _ipv6_allowed(v: str) -> bool: + v = v.lower() + if v in ("::", "::1"): + return True + if v.startswith(("fe80:", "fe80::", "2001:db8:", "2001:db8::")): + return True + return bool(re.match(r"^(?:f[cd][0-9a-f]{0,2}|ff[0-9a-f]{2}):", v)) + + +def _email_allowed(v: str) -> bool: + v = v.lower() + return (v == "noreply@github.com" + or v.endswith(_EMAIL_SUFFIX) or v.startswith(_EMAIL_PREFIX)) + + +def _user_path_allowed(v: str) -> bool: + lower = v.lower() + if "" in lower or "$user" in lower or "${user}" in lower: + return True + return bool(_REAL_USER) and v.endswith(f"/{_REAL_USER}/") + + +def _aws_allowed(v: str) -> bool: + return v in {"123456789012", "000000000000"} or len(set(v)) == 1 + + +def _domain_allowed(v: str) -> bool: + v = v.lower() + if v in _DOMAIN_EXACT or v.endswith(_DOMAIN_SUFFIX): + return True + # TLD not in REAL_TLDS = not a domain we care about (filename, version, etc.) + return v.rsplit(".", 1)[-1] not in REAL_TLDS + + +def _domain_skip(line: str) -> bool: + return bool( + REPO_LINE.match(line) or IMAGE_LINE.match(line) + or LINK_REF.match(line) or EMAIL_PLACEHOLDER.search(line) + ) + + +def _ip_skip(line: str) -> bool: + return bool(VERSION_PIN.search(line) or HASH_LINE.search(line)) + + +@dataclass +class Detector: + name: str + pattern: re.Pattern + is_allowed: Callable[[str], bool] + message_hint: str + skip_line: Optional[Callable[[str], bool]] = None + line_context: Optional[Callable[[str], bool]] = None + normalize: Callable[[str], str] = field(default=lambda v: v) + + +DETECTORS: list[Detector] = [ + Detector("ipv4", IPV4, _ipv4_allowed, + "use 192.168.0.x sample CIDR or env/secret.", + skip_line=lambda l: bool(VERSION_PIN.search(l))), + Detector("ipv6", IPV6, _ipv6_allowed, + "use 2001:db8:: (RFC 3849 doc prefix), fe80::, ::1, or env var.", + skip_line=_ip_skip), + Detector("email", EMAIL, _email_allowed, + "use ``, *@example.com, or a GitHub no-reply variant.", + skip_line=lambda l: bool(EMAIL_PLACEHOLDER.search(l))), + Detector("absolute_user_path", USER_PATH, _user_path_allowed, + "use ${HOME}, ~, ${USER}, or placeholders."), + Detector("private_key_header", PRIVATE_KEY, lambda _: False, + "private keys belong in keychain/SOPS/Doppler, never a file."), + Detector("aws_account_id", AWS_ACCT, _aws_allowed, + "use 123456789012 (AWS sample) or ${AWS_ACCOUNT_ID}.", + line_context=lambda l: bool(AWS_CTX.search(l))), + Detector("real_domain", DOMAIN, _domain_allowed, + "use example.com, *.test, *.localhost, or env var.", + skip_line=_domain_skip, normalize=str.lower), +] + +_CACHE = Path(os.environ.get("XDG_CACHE_HOME") or (Path.home() / ".cache")) +STATE_FILE = _CACHE / "content-guards" / "sensitive-content-state.json" +TTL_SECONDS = 300 +if (_override := os.environ.get("SENSITIVE_CONTENT_STATE_FILE")): + STATE_FILE = Path(_override) + + +def find_violations(content: str) -> list[tuple[Detector, str]]: + found: list[tuple[Detector, str]] = [] + seen: set[tuple[str, str]] = set() + for line in content.splitlines(): + for det in DETECTORS: + if det.skip_line and det.skip_line(line): + continue + if det.line_context and not det.line_context(line): + continue + for match in det.pattern.finditer(line): + value = det.normalize(match.group(0)) + if det.is_allowed(value): + continue + key = (det.name, value) + if key in seen: + continue + seen.add(key) + found.append((det, value)) + return found + + +def load_state() -> dict[str, float]: + try: + data = json.loads(STATE_FILE.read_text()) + except (OSError, json.JSONDecodeError): + return {} + return {k: float(v) for k, v in data.items() if isinstance(v, (int, float))} + + +def save_state(state: dict[str, float]) -> None: + try: + STATE_FILE.parent.mkdir(parents=True, exist_ok=True) + tmp = STATE_FILE.with_suffix(STATE_FILE.suffix + ".tmp") + tmp.write_text(json.dumps(state)) + os.replace(tmp, STATE_FILE) + except OSError: + pass + + +def prune(state: dict[str, float], now: float) -> dict[str, float]: + return {k: ts for k, ts in state.items() if now - ts < TTL_SECONDS} + + +def extract_content(tool_name: str, tool_input: dict) -> str: + if tool_name == "Write": + return str(tool_input.get("content") or "") + if tool_name == "Edit": + return str(tool_input.get("new_string") or "") + return "" + + +def emit(decision: str, reason: str) -> None: + print(json.dumps({"hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": decision, + "permissionDecisionReason": reason, + }})) + + +def _group(violations: list[tuple[Detector, str]]) -> list[tuple[Detector, list[str]]]: + order: list[Detector] = [] + by_name: dict[str, list[str]] = {} + for det, value in violations: + if det.name not in by_name: + by_name[det.name] = [] + order.append(det) + by_name[det.name].append(value) + return [(det, by_name[det.name]) for det in order] + + +def _format(groups: list[tuple[Detector, list[str]]], hint: bool) -> str: + lines = [] + for det, values in groups: + head = f" [{det.name}] {', '.join(values)}" + if hint and det.message_hint: + head += f"\n -> {det.message_hint}" + lines.append(head) + return "\n".join(lines) + + +def main() -> int: + try: + hook_input = json.load(sys.stdin) + except (json.JSONDecodeError, ValueError): + return 0 + tool_name = hook_input.get("tool_name", "") + if tool_name not in ("Write", "Edit"): + return 0 + tool_input = hook_input.get("tool_input") or {} + raw_path = str(tool_input.get("file_path") or "") + file_path = os.path.realpath(raw_path) if raw_path else "" + content = extract_content(tool_name, tool_input) + if not content: + return 0 + violations = find_violations(content) + if not violations: + return 0 + + now = time.time() + state = prune(load_state(), now) + keys = {(d.name, v): f"{file_path}:{d.name}:{v}" for d, v in violations} + unwarned = [(d, v) for d, v in violations if keys[(d.name, v)] not in state] + + if not unwarned: + emit("allow", ( + f"WARNING (acknowledged): sensitive content in {tool_name} of " + f"{file_path}:\n{_format(_group(violations), hint=False)}\n\n" + f"Proceeding because this is a retry within the {TTL_SECONDS // 60}-min " + "window. Confirm the file is not committed publicly." + )) + return 0 + + for det, value in unwarned: + state[keys[(det.name, value)]] = now + save_state(state) + emit("deny", ( + f"BLOCKED (first attempt): sensitive content in {tool_name} of " + f"{file_path}:\n{_format(_group(unwarned), hint=True)}\n\n" + "These values look like real artifacts and would leak if committed.\n" + f"Retry within {TTL_SECONDS // 60} min to acknowledge and proceed." + )) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/content-guards/sensitive-content/detectors.bats b/tests/content-guards/sensitive-content/detectors.bats new file mode 100644 index 0000000..e2f2792 --- /dev/null +++ b/tests/content-guards/sensitive-content/detectors.bats @@ -0,0 +1,237 @@ +#!/usr/bin/env bats +# Per-detector cases for validate-sensitive-content.py: ipv6, email, +# absolute_user_path, private_key_header, aws_account_id, real_domain, +# and cross-detector state isolation. +# +# Run with: bats tests/content-guards/sensitive-content/detectors.bats + +setup() { + REPO_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/../../.." && pwd)" + SCRIPT="$REPO_ROOT/content-guards/scripts/validate-sensitive-content.py" + STATE_FILE="$(mktemp)" + rm -f "$STATE_FILE" + export SENSITIVE_CONTENT_STATE_FILE="$STATE_FILE" +} + +teardown() { rm -f "$STATE_FILE"; } + +run_hook() { run python3 "$SCRIPT" <<< "$1"; } + +# --- ipv6 ------------------------------------------------------------------- + +@test "ipv6 allow: ::1 loopback" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":"h=\"::1\""}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "ipv6 allow: 2001:db8:: documentation prefix" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":"a=\"2001:db8::1\""}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "ipv6 allow: fe80:: link-local" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":"a=\"fe80::1234\""}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "ipv6 block: 2620:0:860::1 (real cloudflare-ish)" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":"a=\"2620:0:860::1\""}}' + echo "$output" | grep -q '"permissionDecision": "deny"' + echo "$output" | grep -q 'ipv6' +} + +@test "ipv6 skip: cas-sha256 hash not matched" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.json","content":"cas-sha256:abcd:1234:5678:beef"}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "ipv6 first-block-second-allow" { + IN='{"tool_name":"Write","tool_input":{"file_path":"/i6s.py","content":"a=\"2620:0:860::1\""}}' + run python3 "$SCRIPT" <<< "$IN" + echo "$output" | grep -q '"permissionDecision": "deny"' + run python3 "$SCRIPT" <<< "$IN" + echo "$output" | grep -q '"permissionDecision": "allow"' +} + +# --- email ------------------------------------------------------------------ + +@test "email allow: noreply@github.com" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":"a=\"noreply@github.com\""}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "email allow: foo@users.noreply.github.com" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":"e=\"foo@users.noreply.github.com\""}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "email allow: bar@example.com" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":"e=\"bar@example.com\""}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "email allow: placeholder shape " { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.md","content":"contact "}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "email block: real alice@realdomain.io" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":"e=\"alice@realdomain.io\""}}' + echo "$output" | grep -q '"permissionDecision": "deny"' + echo "$output" | grep -q 'email' +} + +@test "email first-block-second-allow" { + IN='{"tool_name":"Write","tool_input":{"file_path":"/es.py","content":"e=\"bob@realcompany.io\""}}' + run python3 "$SCRIPT" <<< "$IN" + echo "$output" | grep -q '"permissionDecision": "deny"' + run python3 "$SCRIPT" <<< "$IN" + echo "$output" | grep -q '"permissionDecision": "allow"' +} + +# --- absolute_user_path ----------------------------------------------------- + +@test "user_path allow: placeholder" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.md","content":"cd /Users//p"}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "user_path allow: \\$USER var" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.sh","content":"cd /home/$USER/w"}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "user_path block: hard-coded /Users/alice/" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.md","content":"cd /Users/alice/p"}}' + echo "$output" | grep -q '"permissionDecision": "deny"' + echo "$output" | grep -q 'absolute_user_path' +} + +@test "user_path first-block-second-allow" { + IN='{"tool_name":"Write","tool_input":{"file_path":"/ps.md","content":"cd /Users/carol/p"}}' + run python3 "$SCRIPT" <<< "$IN" + echo "$output" | grep -q '"permissionDecision": "deny"' + run python3 "$SCRIPT" <<< "$IN" + echo "$output" | grep -q '"permissionDecision": "allow"' +} + +# --- private_key_header ----------------------------------------------------- + +@test "private_key block: BEGIN RSA PRIVATE KEY" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.pem","content":"-----BEGIN RSA PRIVATE KEY-----"}}' + echo "$output" | grep -q '"permissionDecision": "deny"' + echo "$output" | grep -q 'private_key_header' +} + +@test "private_key block: bare BEGIN PRIVATE KEY (PKCS8)" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.pem","content":"-----BEGIN PRIVATE KEY-----"}}' + echo "$output" | grep -q '"permissionDecision": "deny"' +} + +@test "private_key allow: no header literal text" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.md","content":"do not commit private keys"}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "private_key first-block-second-allow" { + IN='{"tool_name":"Write","tool_input":{"file_path":"/ks.pem","content":"-----BEGIN OPENSSH PRIVATE KEY-----"}}' + run python3 "$SCRIPT" <<< "$IN" + echo "$output" | grep -q '"permissionDecision": "deny"' + run python3 "$SCRIPT" <<< "$IN" + echo "$output" | grep -q '"permissionDecision": "allow"' +} + +# --- aws_account_id --------------------------------------------------------- + +@test "aws allow: 123456789012 sample" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.tf","content":"account_id = \"123456789012\""}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "aws allow: 12-digit with no AWS context" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.txt","content":"phone: 555123456789"}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "aws block: 987654321098 with account_id" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.tf","content":"account_id = \"987654321098\""}}' + echo "$output" | grep -q '"permissionDecision": "deny"' + echo "$output" | grep -q 'aws_account_id' +} + +@test "aws block: ARN with real account id" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.tf","content":"arn:aws:iam::246813579246:role/x"}}' + echo "$output" | grep -q '"permissionDecision": "deny"' +} + +@test "aws first-block-second-allow" { + IN='{"tool_name":"Write","tool_input":{"file_path":"/as.tf","content":"aws_account_id = 555444333222"}}' + run python3 "$SCRIPT" <<< "$IN" + echo "$output" | grep -q '"permissionDecision": "deny"' + run python3 "$SCRIPT" <<< "$IN" + echo "$output" | grep -q '"permissionDecision": "allow"' +} + +# --- real_domain ------------------------------------------------------------ + +@test "domain allow: example.com" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":"u=\"https://example.com/a\""}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "domain allow: github.com (allowlist)" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":"u=\"https://github.com/f/b\""}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "domain allow: docs.jacobpevans.com" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.md","content":"see https://docs.jacobpevans.com/f"}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "domain allow: db.foo.test" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":"h=\"db.foo.test\""}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "domain allow: filename foo.md is not a domain" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":"p=\"docs/foo.md\""}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "domain allow: repo: line skipped" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.yaml","content":"repo: https://realdomain.io/f"}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "domain allow: image: line skipped" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.yaml","content":"image: docker.io/library/nginx:1.25"}}' + [ "$status" -eq 0 ] && [ -z "$output" ] +} + +@test "domain block: realbusiness.io" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":"u=\"https://realbusiness.io/a\""}}' + echo "$output" | grep -q '"permissionDecision": "deny"' + echo "$output" | grep -q 'real_domain' +} + +@test "domain first-block-second-allow" { + IN='{"tool_name":"Write","tool_input":{"file_path":"/ds.py","content":"u=\"https://realbusiness.io\""}}' + run python3 "$SCRIPT" <<< "$IN" + echo "$output" | grep -q '"permissionDecision": "deny"' + run python3 "$SCRIPT" <<< "$IN" + echo "$output" | grep -q '"permissionDecision": "allow"' +} + +# --- state-key isolation ---------------------------------------------------- + +@test "state isolation: acknowledged IPv4 does not pre-allow new email" { + F='{"tool_name":"Write","tool_input":{"file_path":"/mix.py","content":"x=\"10.0.1.200\""}}' + S='{"tool_name":"Write","tool_input":{"file_path":"/mix.py","content":"x=\"10.0.1.200\"\ne=\"al@realdom.io\""}}' + run python3 "$SCRIPT" <<< "$F" + echo "$output" | grep -q '"permissionDecision": "deny"' + run python3 "$SCRIPT" <<< "$S" + echo "$output" | grep -q '"permissionDecision": "deny"' + echo "$output" | grep -q 'email' + echo "$output" | grep -q 'al@realdom.io' +} diff --git a/tests/content-guards/sensitive-content/sensitive-content.bats b/tests/content-guards/sensitive-content/sensitive-content.bats new file mode 100644 index 0000000..3f38599 --- /dev/null +++ b/tests/content-guards/sensitive-content/sensitive-content.bats @@ -0,0 +1,193 @@ +#!/usr/bin/env bats +# Test suite for content-guards/scripts/validate-sensitive-content.py +# IPv4 detector + tool filtering + state machine (regression preservation +# from the original no-real-ips hook). Per-detector cases for ipv6, email, +# absolute_user_path, private_key_header, aws_account_id, real_domain live +# in detectors.bats in the same directory. +# +# Run with: bats tests/content-guards/sensitive-content/sensitive-content.bats + +setup() { + REPO_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/../../.." && pwd)" + SCRIPT="$REPO_ROOT/content-guards/scripts/validate-sensitive-content.py" + STATE_FILE="$(mktemp)" + rm -f "$STATE_FILE" + export SENSITIVE_CONTENT_STATE_FILE="$STATE_FILE" + + if [[ ! -f "$SCRIPT" ]]; then + echo "ERROR: Script not found at $SCRIPT" >&2 + return 1 + fi +} + +teardown() { + rm -f "$STATE_FILE" +} + +run_hook() { + run python3 "$SCRIPT" <<< "$1" +} + +# --------------------------------------------------------------------------- +# TC1: Non Write/Edit tools are silently allowed (exit 0, no output) +# --------------------------------------------------------------------------- + +@test "TC1: Bash tool is allowed silently" { + run_hook '{"tool_name":"Bash","tool_input":{"command":"echo 10.0.1.200"}}' + [ "$status" -eq 0 ] + [ -z "$output" ] +} + +@test "TC1b: Read tool is allowed silently" { + run_hook '{"tool_name":"Read","tool_input":{"file_path":"/x"}}' + [ "$status" -eq 0 ] + [ -z "$output" ] +} + +@test "TC1c: invalid JSON is allowed silently (fail-open)" { + run python3 "$SCRIPT" <<< "not json" + [ "$status" -eq 0 ] + [ -z "$output" ] +} + +# --------------------------------------------------------------------------- +# TC2: IPv4 allowlist — every sanctioned range passes +# --------------------------------------------------------------------------- + +@test "TC2a: 192.168.0.x sample range allowed" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":"url=\"192.168.0.200\""}}' + [ "$status" -eq 0 ] + [ -z "$output" ] +} + +@test "TC2b: loopback 127.0.0.1 allowed" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":"host=\"127.0.0.1\""}}' + [ "$status" -eq 0 ] + [ -z "$output" ] +} + +@test "TC2c: 0.0.0.0 wildcard allowed" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.yml","content":"bind: 0.0.0.0"}}' + [ "$status" -eq 0 ] + [ -z "$output" ] +} + +@test "TC2d: 169.254.169.254 metadata allowed" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.sh","content":"curl 169.254.169.254/latest"}}' + [ "$status" -eq 0 ] + [ -z "$output" ] +} + +@test "TC2e: version pin rev: v0.10.0.1 is skipped" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.yml","content":"rev: v0.10.0.1"}}' + [ "$status" -eq 0 ] + [ -z "$output" ] +} + +@test "TC2f: 192.168.1.x (not 192.168.0.x) is BLOCKED" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":"x=\"192.168.1.50\""}}' + echo "$output" | grep -q '"permissionDecision": "deny"' + echo "$output" | grep -q 'ipv4' +} + +# --------------------------------------------------------------------------- +# TC3-TC4: First-block / second-allow flow +# --------------------------------------------------------------------------- + +@test "TC3a: Write with 10.0.1.200 blocked on first attempt" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/repo/tests/test.py","content":"url=\"10.0.1.200\""}}' + echo "$output" | grep -q '"permissionDecision": "deny"' + echo "$output" | grep -q 'BLOCKED (first attempt)' + echo "$output" | grep -q '10.0.1.200' +} + +@test "TC3b: Edit with new_string containing 172.16.0.5 blocked" { + run_hook '{"tool_name":"Edit","tool_input":{"file_path":"/x.yml","old_string":"o","new_string":"host: 172.16.0.5"}}' + echo "$output" | grep -q '"permissionDecision": "deny"' + echo "$output" | grep -q '172.16.0.5' +} + +@test "TC4a: same file + same IP retried allows" { + IN='{"tool_name":"Write","tool_input":{"file_path":"/repo/scratch.py","content":"x=\"10.0.1.200\""}}' + run python3 "$SCRIPT" <<< "$IN" + echo "$output" | grep -q '"permissionDecision": "deny"' + run python3 "$SCRIPT" <<< "$IN" + echo "$output" | grep -q '"permissionDecision": "allow"' + echo "$output" | grep -q 'WARNING (acknowledged)' +} + +@test "TC4b: same IP in a different file blocks again" { + F='{"tool_name":"Write","tool_input":{"file_path":"/a.py","content":"x=\"10.0.1.200\""}}' + S='{"tool_name":"Write","tool_input":{"file_path":"/b.py","content":"x=\"10.0.1.200\""}}' + run python3 "$SCRIPT" <<< "$F" + echo "$output" | grep -q '"permissionDecision": "deny"' + run python3 "$SCRIPT" <<< "$S" + echo "$output" | grep -q '"permissionDecision": "deny"' +} + +@test "TC4c: same file, NEW IP on second write blocks the new one" { + F='{"tool_name":"Write","tool_input":{"file_path":"/c.py","content":"x=\"10.0.1.200\""}}' + S='{"tool_name":"Write","tool_input":{"file_path":"/c.py","content":"y=\"172.16.0.5\""}}' + run python3 "$SCRIPT" <<< "$F" + echo "$output" | grep -q '"permissionDecision": "deny"' + run python3 "$SCRIPT" <<< "$S" + echo "$output" | grep -q '"permissionDecision": "deny"' + echo "$output" | grep -q '172.16.0.5' +} + +@test "TC4d: same file, retry with both old (ack) + new IP blocks only new" { + F='{"tool_name":"Write","tool_input":{"file_path":"/d.py","content":"x=\"10.0.1.200\""}}' + S='{"tool_name":"Write","tool_input":{"file_path":"/d.py","content":"a=\"10.0.1.200\"\nb=\"172.16.0.5\""}}' + run python3 "$SCRIPT" <<< "$F" + echo "$output" | grep -q '"permissionDecision": "deny"' + run python3 "$SCRIPT" <<< "$S" + echo "$output" | grep -q '"permissionDecision": "deny"' + echo "$output" | grep -q '172.16.0.5' + echo "$output" | grep -vq 'BLOCKED.*10\.0\.1\.200,' +} + +# --------------------------------------------------------------------------- +# TC5-TC6: Edge cases — empty content, strict octet regex +# --------------------------------------------------------------------------- + +@test "TC5: Write with no content is allowed silently" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":""}}' + [ "$status" -eq 0 ] + [ -z "$output" ] +} + +@test "TC6a: 999.999.999.999 is not matched (each octet > 255)" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":"x=\"999.999.999.999\""}}' + [ "$status" -eq 0 ] + [ -z "$output" ] +} + +@test "TC6b: 256.1.1.1 is not matched (first octet > 255)" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":"x=\"256.1.1.1\""}}' + [ "$status" -eq 0 ] + [ -z "$output" ] +} + +@test "TC6c: 192.168.0.256 is not matched (last octet > 255)" { + run_hook '{"tool_name":"Write","tool_input":{"file_path":"/x.py","content":"x=\"192.168.0.256\""}}' + [ "$status" -eq 0 ] + [ -z "$output" ] +} + +# --------------------------------------------------------------------------- +# TC7: Path normalization +# --------------------------------------------------------------------------- + +@test "TC7: relative and absolute paths to the same file share state" { + T="$(mktemp -d)" + ABS="$T/foo.py" + REL="./foo.py" + F="{\"tool_name\":\"Write\",\"tool_input\":{\"file_path\":\"$ABS\",\"content\":\"x=\\\"10.0.1.200\\\"\"}}" + S="{\"tool_name\":\"Write\",\"tool_input\":{\"file_path\":\"$REL\",\"content\":\"x=\\\"10.0.1.200\\\"\"}}" + run python3 "$SCRIPT" <<< "$F" + echo "$output" | grep -q '"permissionDecision": "deny"' + cd "$T" + run python3 "$SCRIPT" <<< "$S" + echo "$output" | grep -q '"permissionDecision": "allow"' + rm -rf "$T" +}