From 2155cc2f5803ac45bdc7fae2694a89e66add90a1 Mon Sep 17 00:00:00 2001 From: Evan Harmon Date: Tue, 23 Jun 2026 11:43:18 -0500 Subject: [PATCH] fix(standardize-repo): scan only non-ignored files for template markers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit verify-applied.sh's "unrendered template markers" check recursively grepped the whole tree (excluding only .git/node_modules), so an applied iac repo false-failed on vendored dependencies in gitignored dirs — .venv ships Ansible's own .j2/jinja plugin docs and .terraform caches provider source. `task verify` and gitleaks (which respects .gitignore) both pass; only this structural check tripped. Enumerate files the way gitleaks does — honoring .gitignore — via `git ls-files --cached --others --exclude-standard`, covering tracked AND untracked-but-not-ignored files so a freshly rendered, not-yet-staged repo is still fully checked. The recursive-grep fallback (with explicit excludes for .venv/.terraform/.task/.worktrees/dist) remains for non-git targets. Verified: PASS on a real applied repo (sommerlawn-infra) that previously false-failed; still catches real leaks in both tracked and untracked files; correctly ignores a gitignored leak. shellcheck --severity=error and shfmt -i 4 clean. Surfaced applying harmon-init to sommerlawn-infra. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../standardize-repo/assets/verify-applied.sh | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/ai/skills/repo/standardize-repo/assets/verify-applied.sh b/ai/skills/repo/standardize-repo/assets/verify-applied.sh index 4dd52ba..79b90a5 100755 --- a/ai/skills/repo/standardize-repo/assets/verify-applied.sh +++ b/ai/skills/repo/standardize-repo/assets/verify-applied.sh @@ -104,11 +104,26 @@ fi # while bash bare-word tests ([[ true ]]) are not. Block markers anchor on the # jinja keyword set, including the raw/endraw the template actually emits and the # [%- whitespace-control form used in LICENSE.jinja. +# +# Enumerate files the way gitleaks (step 5) does — honoring .gitignore — so +# vendored dependencies in gitignored dirs cannot false-trip the scan: .venv +# ships Ansible's own .j2/jinja templates and plugin docs, .terraform caches +# provider source, node_modules is third-party. `git ls-files --cached --others +# --exclude-standard` lists tracked AND untracked-but-not-ignored files, so a +# freshly rendered, not-yet-staged repo is still fully checked. Fall back to a +# recursive grep (with explicit excludes) when the target is not a git work tree. varpfx='project_|author_|github_|organization|repo_url|ci_runner|include_|use_|devcontainer|git_init|bunch_add|obsidian_|run_task_install|projects_directory|bunches_directory|license|current_|country|state' blockkw='if|for|set|else|elif|endif|endfor|endset|raw|endraw|macro|endmacro|block|endblock|include|extends|with|endwith|filter|endfilter' -leaks=$(grep -rIlE \ - "\[\[-? ($varpfx)|\{\{-? ($varpfx)|\[%-? ($blockkw) " \ - --exclude-dir=.git --exclude-dir=node_modules . 2>/dev/null || true) +marker_re="\[\[-? ($varpfx)|\{\{-? ($varpfx)|\[%-? ($blockkw) " +if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then + leaks=$(git ls-files --cached --others --exclude-standard -z 2>/dev/null | + xargs -0 grep -IlE "$marker_re" 2>/dev/null || true) +else + leaks=$(grep -rIlE "$marker_re" \ + --exclude-dir=.git --exclude-dir=node_modules --exclude-dir=.venv \ + --exclude-dir=.terraform --exclude-dir=.task --exclude-dir=.worktrees \ + --exclude-dir=dist . 2>/dev/null || true) +fi if [ -n "$leaks" ]; then err "unrendered template markers found in:" # Print one path per line for readability; indented so it groups under the FAIL.